diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index ccdfd6688..1ac36e39f 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -2,14 +2,14 @@ name: Build on: push: - branches: [master, devel] + branches: [master, devel, DPVS-1.8-LTS] release: branches: [master] types: [published] schedule: - cron: '30 2 * * 1' pull_request: - branches: [master, devel] + branches: [master, devel, DPVS-1.8-LTS] types: [labeled] jobs: diff --git a/.github/workflows/run.yaml b/.github/workflows/run.yaml index 41a77bcba..29b1a5423 100644 --- a/.github/workflows/run.yaml +++ b/.github/workflows/run.yaml @@ -2,14 +2,14 @@ name: Run on: push: - branches: [master, devel] + branches: [master, devel, DPVS-1.8-LTS] release: branches: [master] types: [published] schedule: - cron: '30 3 * * 1' pull_request: - branches: [master, devel] + branches: [master, devel, DPVS-1.8-LTS] types: [labeled] jobs: diff --git a/README.md b/README.md index b2bc6ed66..4a7c1fa7c 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ $ mkdir dpdkbuild # user desired build folder $ meson -Denable_kmods=true -Dprefix=dpdklib dpdkbuild $ ninja -C dpdkbuild $ cd dpdkbuild; ninja install -$ export PKG_CONFIG_PATH=$(pwd)/../dpdklib/lib64/pkgconfig/libdpdk.pc +$ export PKG_CONFIG_PATH=$(pwd)/../dpdklib/lib64/pkgconfig/ ``` > Tips: You can use script [dpdk-build.sh](./scripts/dpdk-build.sh) to facilitate dpdk build. Run `dpdk-build.sh -h` for the usage of the script. @@ -146,14 +146,14 @@ $ ./usertools/dpdk-devbind.py -b uio_pci_generic 0000:06:00.0 It's simple, just set `PKG_CONFIG_PATH` and build it. ```bash -$ export PKG_CONFIG_PATH= # normally located at dpdklib/lib64/pkgconfig/libdpdk.pc +$ export PKG_CONFIG_PATH= # normally located at dpdklib/lib64/pkgconfig/ $ cd $ make # or "make -j" to speed up $ make install ``` > Notes: -> 1. Build dependencies may be needed, such as `pkg-config`(version 0.29.2+),`automake`, `libnl3`, `libnl-genl-3.0`, `openssl`, `popt` and `numactl`. You can install the missing dependencies by using the package manager of the system, e.g., `yum install popt-devel` (CentOS). +> 1. Build dependencies may be needed, such as `pkg-config`(version 0.29.2+),`automake`, `libnl3`, `libnl-genl-3.0`, `openssl`, `popt` and `numactl`. You can install the missing dependencies by using the package manager of the system, e.g., `yum install popt-devel automake` (CentOS) or `apt install libpopt-dev autoconfig` (Ubuntu). > 2. Early `pkg-config` versions (v0.29.2 before) may cause dpvs build failure. If so, please upgrade this tool. Output files are installed to `dpvs/bin`. @@ -253,6 +253,8 @@ Our test shows the forwarding speed (pps) of DPVS is several times than LVS and ![performance](./pic/performance.png) +Click [here](./test/release/v1.9.2/performance.md) for the lastest performance data. + # License Please refer to the [License](./LICENSE.md) file for details. diff --git a/conf/dpvs.bond.conf.sample b/conf/dpvs.bond.conf.sample index aaea16e98..fb85b1532 100644 --- a/conf/dpvs.bond.conf.sample +++ b/conf/dpvs.bond.conf.sample @@ -248,17 +248,18 @@ worker_defs { } } - ! worker cpu9 { + ! worker cpu17 { ! type kni - ! cpu_id 9 + ! cpu_id 17 ! port bond0 { + ! rx_queue_ids 8 ! tx_queue_ids 8 ! } ! port bond1 { + ! rx_queue_ids 8 ! tx_queue_ids 8 ! } !} - } ! timer config @@ -273,6 +274,11 @@ neigh_defs { timeout 60 } +! dpvs ipset config +ipset_defs { + ipset_hash_pool_size 131072 +} + ! dpvs ipv4 config ipv4_defs { forwarding off diff --git a/conf/dpvs.conf.items b/conf/dpvs.conf.items index e23c0316a..f418525a8 100644 --- a/conf/dpvs.conf.items +++ b/conf/dpvs.conf.items @@ -11,11 +11,13 @@ ! global config global_defs { - #daemon - log_level INFO - log_file /var/log/dpvs.log - log_async_mode off - pdump off + #daemon + log_level INFO + log_file /var/log/dpvs.log + log_with_timestamp off # note: only effective for async log now + log_async_mode off + log_async_pool_size 16383 <16383, 1023-unlimited> + pdump off } ! netif config @@ -71,6 +73,12 @@ netif_defs { } ! worker config (lcores) +! notes: +! 1. rx(tx) queue ids MUST start from 0 and continous +! 2. cpu ids and rx(tx) queue ids MUST be unique, repeated ids is forbidden +! 3. cpu ids identify dpvs workers only, and not correspond to physical cpu cores. +! If you are to specify cpu cores on which to run dpvs, please use dpdk eal options, +! such as "-c", "-l", "--lcores". Use "dpvs -- --help" for supported eal options. worker_defs { worker cpu0 { cpu_id 0 @@ -139,9 +147,11 @@ worker_defs { cpu_id 5 icmp_redirect_core port dpdk0 { + rx_queue_ids 4 tx_queue_ids 6 } port dpdk1 { + rx_queue_ids 4 tx_queue_ids 4 } } @@ -159,6 +169,11 @@ neigh_defs { timeout 60 <60, 1-3600> } +! dpvs ipset config +ipset_defs { + ipset_hash_pool_size 131072 <131072, 65536-524288> +} + ! dpvs ipv4 config ipv4_defs { forwarding off @@ -245,6 +260,7 @@ ipvs_defs { ! wscale ! timestamp } + close_client_window !defer_rs_syn rs_syn_max_retry 3 <3, 1-99> ack_storm_thresh 10 <10, 1-999> diff --git a/conf/dpvs.conf.sample b/conf/dpvs.conf.sample index 0585b80c4..4bd2a94d0 100644 --- a/conf/dpvs.conf.sample +++ b/conf/dpvs.conf.sample @@ -65,6 +65,12 @@ netif_defs { } ! worker config (lcores) +! notes: +! 1. rx(tx) queue ids MUST start from 0 and continous +! 2. cpu ids and rx(tx) queue ids MUST be unique, repeated ids is forbidden +! 3. cpu ids identify dpvs workers only, and not correspond to physical cpu cores. +! If you are to specify cpu cores on which to run dpvs, please use dpdk eal options, +! such as "-c", "-l", "--lcores". Use "dpvs -- --help" for supported eal options. worker_defs { worker cpu0 { type master @@ -208,17 +214,18 @@ worker_defs { } } - ! worker cpu9 { + ! worker cpu17 { ! type kni - ! cpu_id 9 + ! cpu_id 17 ! port dpdk0 { + ! rx_queue_ids 8 ! tx_queue_ids 8 ! } ! port dpdk1 { + ! rx_queue_ids 8 ! tx_queue_ids 8 ! } !} - } ! timer config @@ -233,6 +240,11 @@ neigh_defs { timeout 60 } +! dpvs ipset config +ipset_defs { + ipset_hash_pool_size 131072 +} + ! dpvs ipv4 config ipv4_defs { forwarding off @@ -312,6 +324,7 @@ ipvs_defs { ! wscale ! timestamp } + close_client_window ! defer_rs_syn rs_syn_max_retry 3 ack_storm_thresh 10 diff --git a/conf/dpvs.conf.single-bond.sample b/conf/dpvs.conf.single-bond.sample index ce6a16562..52a6bf1fd 100644 --- a/conf/dpvs.conf.single-bond.sample +++ b/conf/dpvs.conf.single-bond.sample @@ -159,14 +159,14 @@ worker_defs { } } - ! worker cpu9 { + ! worker cpu17 { ! type kni - ! cpu_id 9 + ! cpu_id 17 ! port bond0 { + ! rx_queue_ids 8 ! tx_queue_ids 8 ! } !} - } ! timer config @@ -181,6 +181,11 @@ neigh_defs { timeout 60 } +! dpvs ipset config +ipset_defs { + ipset_hash_pool_size 131072 +} + ! dpvs ipv4 config ipv4_defs { forwarding off @@ -260,6 +265,7 @@ ipvs_defs { ! wscale ! timestamp } + close_client_window ! defer_rs_syn rs_syn_max_retry 3 ack_storm_thresh 10 diff --git a/conf/dpvs.conf.single-nic.sample b/conf/dpvs.conf.single-nic.sample index faa58b9e4..7547542af 100644 --- a/conf/dpvs.conf.single-nic.sample +++ b/conf/dpvs.conf.single-nic.sample @@ -134,14 +134,14 @@ worker_defs { } } - ! worker cpu9 { + ! worker cpu17 { ! type kni - ! cpu_id 9 + ! cpu_id 17 ! port dpdk0 { + ! rx_queue_ids 8 ! tx_queue_ids 8 ! } !} - } ! timer config @@ -156,6 +156,11 @@ neigh_defs { timeout 60 } +! dpvs ipset config +ipset_defs { + ipset_hash_pool_size 131072 +} + ! dpvs ipv4 config ipv4_defs { forwarding off @@ -235,6 +240,7 @@ ipvs_defs { ! wscale ! timestamp } + close_client_window ! defer_rs_syn rs_syn_max_retry 3 ack_storm_thresh 10 diff --git a/doc/IPset.md b/doc/IPset.md new file mode 100644 index 000000000..06a36b5e2 --- /dev/null +++ b/doc/IPset.md @@ -0,0 +1,743 @@ +DPVS IPset +------ + +* [Concepts](#concepts) +* [Set Types](#types) + * [bitmap:ip](#bitmap_ip) + * [bitmap:ip,mac](#bitmap_ip_mac) + * [bitmap:port](#bitmap_port) + * [hash:ip](#hash_ip) + * [hash:ip,port](#hash_ip_port) + * [hash:ip,port,ip](#hash_ip_port_ip) + * [hash:net](#hash_net) + * [hash:net,port](#hash_net_port) + * [hash:net,port,iface](#hash_net_port_iface) + * [hash:ip,port,net](#hash_ip_port_net) + * [hash:net,port,net](#hash_net_port_net) + * [hash:net,port,net,port](#hash_net_port_net_port) +* [For Developers](#developer) + + + +# Concepts + +DPVS ipset is derived from [Linux IP sets](https://ipset.netfilter.org/index.html). Depending on the type of the set, an IP set may store IP(v4/v6) addresses, (TCP/UDP) port numbers, IP and MAC address pairs, IP address and port number pairs, etc. + +Administrator may use `dpip` tool to create/destroy an ipset, add/delete entries to/from an existing ipset, test if an entry matches an ipset, or list/flush all entries of an ipset. Ipset intends to cooperate with DPVS's other modules, such as TC, ACL, to achieve fexible control over packet process. + +According to storage methods, DPVS's ipset types can be divided into two categories: bitmap sets and hash sets. Bitmap sets support IPv4 only and have better performance, but an IPv4 or (TCP, UDP) port range must be specified when the set is created and corresponding memory is allocated even if there is no element yet. Hash sets store entries use hash tables with a maximum entry limits, the performance is not as good, but can support more complicated entries. Notes that complicated set type generally costs more CPU and memory resources, so it's advised to use the most suitable set types for your application case. + +You can find the latest ipset usage with command `dpip ipset -h`. + +```bash +# ./bin/dpip ipset -h +Usage: + dpip ipset create SETNAME TYPE [ OPTIONS ] + dpip ipset destroy SETNAME + dpip ipset { add | del | test } SETNAME ENTRY [ ADTOPTS ] + dpip ipset { show | flush } [ SETNAME ] +Parameters: + TYPE := { bitmap:ip | bitmap:ip,mac | bitmap:port | hash:ip | hash:net | hash:ip,port + | hash:net,port | hash:net,port,iface | hash:ip,port,ip | hash:ip,port,net + | hash:net,port,net | hash:net,port,net,port } + ENTRY := combinations of one or more comma seperated tokens below, + { { IP | NET } | PORT | MAC | IFACE } + IP := ipv4 or ipv6 string literal + NET := "{ IP/prefix | IP(range from)[-IP(range end)] }" + MAC := 6 bytes MAC address string literal + PORT := "[{ tcp | udp | icmp | icmp6 }:]port1[-port2]" + OPTIONS := { comment | range NET | hashsize NUM | maxelem NUM } + ADTOPTS := { comment STRING | unmatch (for add only) } + flag := { -F(--force) | { -4 | -6 } | -v } +``` + +For example, the following command creates a hash:net,port type ipset named `foo`, whose hash table bucket size is 256, and can store 1000 entries at most with comment enabled. + +```bash +./bin/dpip ipset create foo hash:net,port hashsize 256 maxelem 1000 comment +``` + +Then we add some entries into the ipset foo. + +```bash + ./bin/dpip ipset add foo 10.132.0.0/16,udp:10240 + ./bin/dpip ipset add foo 192.168.88.0/24,tcp:8080-8082 + ./bin/dpip ipset add foo 192.168.88.200-192.168.88.255,tcp:8082 nomatch comment "bad guys" +``` + +As shown above, we add to the ipset foo the whole network range 10.132.0.0/16 with udp port 10240 firstly, and then add a network range 192.168.88.0/24 with tcp port 8080, 8081, 8082, and finally exclude the ip range 192.168.88.200-192.168.88.255 with tcp port 8082. + +Now let's have a look at what are stored in ipset. + +```Bash +# ./bin/dpip -v ipset list +Name: foo +Type: hash:net,port +Header: family inet hashsize 256 maxelem 1000 comment +Size in memory: 5160 +References: 0 +Number of entries: 7 +Members: +192.168.88.200/29,tcp:8082 nomatch comment "bad guys" +192.168.88.208/28,tcp:8082 nomatch comment "bad guys" +192.168.88.224/27,tcp:8082 nomatch comment "bad guys" +192.168.88.0/24,tcp:8080 +192.168.88.0/24,tcp:8081 +192.168.88.0/24,tcp:8082 +10.132.0.0/16,udp:10240 +``` +Note that 7 ipset entries are created, and IPv4 range is transformed to CIDR range format when stored into ipset. The flag `-v` in the command above indicates to sort ipset entries if the ipset type supports `sort_compare` method in dpip. + +Finally, let's do some tests. + +```bash +# ./bin/dpip ipset -v test foo 10.132.1.2,udp:10240 +10.132.1.2,udp:10240 is in set foo +# ./bin/dpip ipset -v test foo 10.100.100.100,udp:10240 +10.100.100.100,udp:10240 is NOT in set foo +# ./bin/dpip ipset test foo 192.168.88.0,tcp:8080 +true +# ./bin/dpip ipset test foo 192.168.88.22,tcp:8082 +true +# ./bin/dpip ipset test foo 192.168.88.222,tcp:8082 +false +``` +The last test returned "false" because of the nomatch entry `192.168.88.200-192.168.88.255,tcp:8082 nomatch`. If we add one more specific entry `192.168.88.222/32,tcp:8082` and test again, the result would turn out to be "true". Note that hash types always match entries in descending order of net cidr. + +```bash +# ./bin/dpip ipset add foo 192.168.88.222,tcp:8082 comment "you are an exception" +# ./bin/dpip ipset list -v +Name: foo +Type: hash:net,port +Header: family inet hashsize 256 maxelem 1000 comment +Size in memory: 5312 +References: 0 +Number of entries: 8 +Members: +192.168.88.222/32,tcp:8082 comment "you are an exception" +192.168.88.200/29,tcp:8082 nomatch comment "bad guys" +192.168.88.208/28,tcp:8082 nomatch comment "bad guys" +192.168.88.224/27,tcp:8082 nomatch comment "bad guys" +192.168.88.0/24,tcp:8080 +192.168.88.0/24,tcp:8081 +192.168.88.0/24,tcp:8082 +10.132.0.0/16,udp:10240 + +# ./bin/dpip ipset test foo 192.168.88.222,tcp:8082 +true +``` + + + +# Set Types + +> For more exmaple of ipset types, refer to [ipset test script](../test/ipset/dpip.sh) and the [test results](../test/ipset/dpip.log). + + + +#### bitmap:ip + +The bitmap:ip set type uses a memory range to store IPv4 addresses. IPv4 range is supported when add/delete entries: ipset parses the range and derives all specific IPv4 addresses in the range, then save them in the bitmap storage. Therefore, we can add an IPv4 range to bitmap:ip, and delete a subrange IPv4 address from the ipset. IPv6 is not supported by the type. + +```bash +# ./bin/dpip ipset create foo bitmap:ip range 192.168.0.0/16 +# ./bin/dpip ipset add foo 192.168.1.0/29 +# ./bin/dpip ipset list foo +Name: foo +Type: bitmap:ip +Header: range 192.168.0.0/16 +Size in memory: 8240 +References: 0 +Number of entries: 8 +Members: +192.168.1.0 +192.168.1.1 +192.168.1.2 +192.168.1.3 +192.168.1.4 +192.168.1.5 +192.168.1.6 +192.168.1.7 +# ./bin/dpip ipset del foo 192.168.1.4-192.168.1.6 +# ./bin/dpip ipset list foo +Name: foo +Type: bitmap:ip +Header: range 192.168.0.0/16 +Size in memory: 8240 +References: 0 +Number of entries: 5 +Members: +192.168.1.0 +192.168.1.1 +192.168.1.2 +192.168.1.3 +192.168.1.7 +``` + + + +#### bitmap:ip,mac + +The bitmap:ip,mac set type uses a memory range to store IPv4 and a MAC address pairs. If MAC address is not specified in the pair, it is ignored when matching against this entry, i.e., only IPv4 address is considered in this case. + +```bash +# ./bin/dpip ipset create foo bitmap:ip,mac range 192.168.100.0/24 comment +# ./bin/dpip ipset add foo 192.168.100.100,AA:BB:CC:DD:EE:FF comment "initial" +# ./bin/dpip ipset list foo +Name: foo +Type: bitmap:ip,mac +Header: range 192.168.100.0/24 comment +Size in memory: 9808 +References: 0 +Number of entries: 1 +Members: +192.168.100.100,AA:BB:CC:DD:EE:FF comment "initial" +# ./bin/dpip ipset add foo 192.168.100.100,11:22:33:44:55:66 comment "overwrite" -F +# ./bin/dpip ipset list foo +Name: foo +Type: bitmap:ip,mac +Header: range 192.168.100.0/24 comment +Size in memory: 9808 +References: 0 +Number of entries: 1 +Members: +192.168.100.100,11:22:33:44:55:66 comment "overwrite" +# ./bin/dpip ipset test foo 192.168.100.100,11:22:33:44:55:66 +true +# ./bin/dpip ipset test foo 192.168.100.100 +true +# ./bin/dpip ipset test foo 192.168.100.100,12:34:45:78:a9 +false +``` + + + +#### bitmap:port + +The bitmap:port set type uses a memory range to store port numbers. Only TCP and UDP protocols are supported. When matching against a bitmap:port ipset, protocol should be specified explicitly. A bitmap:port ipset can hold up to 65536 TCP ports and 65536 UDP ports. + +```bash +# ./bin/dpip ipset create foo bitmap:port range 0-65535 +# ./bin/dpip ipset add foo tcp:80 +# ./bin/dpip ipset add foo tcp:8080 +# ./bin/dpip ipset test foo tcp:80 +true +# ./bin/dpip ipset test foo tcp:8080 +true +# ./bin/dpip ipset test foo udp:80 +false +# ./bin/dpip ipset add foo udp:80 +# ./bin/dpip ipset test foo udp:80 +true +# ./bin/dpip ipset del foo tcp:8080 +# ./bin/dpip ipset test foo tcp:8080 +false +``` + + + +#### hash:ip + +The hash:ip set type uses a hash table to store IPv4 or IPv6 host addresses. IP range is supported when adding/deleting entries, but the range is parsed and transformed to specific host IP addresses before stored into hash table. + + +```bash +# ./bin/dpip ipset create foo hash:ip comment +# ./bin/dpip ipset add foo 10.100.100.100 comment "a single address" +# ./bin/dpip ipset add foo 192.168.1.0-192.168.1.9 comment "an ip range" +# ./bin/dpip ipset add foo 192.168.2.0/30 comment "a cidr range" +# ./bin/dpip ipset list foo +Name: foo +Type: hash:ip +Header: family inet hashsize 1024 maxelem 65535 comment +Size in memory: 18664 +References: 0 +Number of entries: 15 +Members: +10.100.100.100 comment "a single address" +192.168.1.0 comment "an ip range" +192.168.1.1 comment "an ip range" +192.168.1.2 comment "an ip range" +192.168.1.3 comment "an ip range" +192.168.1.4 comment "an ip range" +192.168.1.5 comment "an ip range" +192.168.1.6 comment "an ip range" +192.168.1.7 comment "an ip range" +192.168.1.8 comment "an ip range" +192.168.1.9 comment "an ip range" +192.168.2.0 comment "a cidr range" +192.168.2.1 comment "a cidr range" +192.168.2.2 comment "a cidr range" +192.168.2.3 comment "a cidr range" +# ./bin/dpip ipset test foo 192.168.1.6 +true +# ./bin/dpip ipset test foo 192.168.2.1 +true +``` + + + +#### hash:ip,port + +The hash:ip,port set type uses a hash table to store IP address and port number pairs. Both IPv4 and IPv6 address are supported. The port number is interpreted together with a protocol. Supported protocols include TCP, UDP, ICMP, and ICMPv6, any other protocols are interpreted as unspec type with a protocol number of zero. Range is allowed when adding/deleting entries, but is transformed to specific host IP and port entries when stored into hash table. + + +```bash +# ./bin/dpip ipset -6 create bar hash:ip,port +# ./bin/dpip ipset add bar 2001::1,tcp:8080-8082 +# ./bin/dpip ipset add bar 2001::1,udp:80 +# ./bin/dpip ipset add bar 2001::2,0 +# ./bin/dpip ipset list bar +Name: bar +Type: hash:ip,port +Header: family inet6 hashsize 1024 maxelem 65535 +Size in memory: 17144 +References: 0 +Number of entries: 5 +Members: +2001::1,tcp:8081 +2001::1,tcp:8080 +2001::1,udp:80 +2001::1,tcp:8082 +2001::2,unspec:0 +# ./bin/dpip ipset test bar 2001::1,tcp:8081 +true +# ./bin/dpip ipset test bar 2001::1,udp:8081 +false +# ./bin/dpip ipset test bar 2001::1,udp:80 +true +# ./bin/dpip ipset test bar 2001::2 +true +``` + + + +#### hash:ip,port,ip + +The hash:ip,port,ip set type uses a hash table to store IP address, port number, and a second IP address triples. Both IPv4 and IPv6 address are supported, but the first and second IP should be of the same IP address family. The port number is interpreted together with a protocol. Supported protocols include TCP, UDP, ICMP, and ICMPv6, any other protocols are interpreted as unspec type with a protocol number of zero. Range is allowed when adding/deleting entries, but is transformed to specific host IP and port entries when stored into hash table. + +```bash +# ./bin/dpip ipset create foo hash:ip,port,ip +# ./bin/dpip ipset add foo 192.168.1.16/31,tcp:8080-8081,192.168.2.100-192.168.2.102 +# ./bin/dpip ipset list foo +Name: foo +Type: hash:ip,port,ip +Header: family inet hashsize 1024 maxelem 65535 +Size in memory: 18208 +References: 0 +Number of entries: 12 +Members: +192.168.1.16,tcp:8081,192.168.2.100 +192.168.1.16,tcp:8081,192.168.2.101 +192.168.1.16,tcp:8081,192.168.2.102 +192.168.1.17,tcp:8081,192.168.2.100 +192.168.1.17,tcp:8081,192.168.2.101 +192.168.1.17,tcp:8081,192.168.2.102 +192.168.1.16,tcp:8080,192.168.2.100 +192.168.1.16,tcp:8080,192.168.2.101 +192.168.1.16,tcp:8080,192.168.2.102 +192.168.1.17,tcp:8080,192.168.2.100 +192.168.1.17,tcp:8080,192.168.2.101 +192.168.1.17,tcp:8080,192.168.2.102 +# ./bin/dpip ipset test foo 192.168.1.18,tcp:8081,192.168.2.101 +false +# ./bin/dpip ipset test foo 192.168.1.17,tcp:8081,192.168.2.101 +true +# ./bin/dpip ipset test foo 192.168.1.16,udp:8081,192.168.2.101 +false +# ./bin/dpip ipset test foo 192.168.1.16,tcp:8081,192.168.2.103 +false +``` + + + +#### hash:net + +The hash:net set type uses a hash table to store different sized IP network addresses. Both IPv4 and IPv6 address are supported. IPv4 supports both IP range and IP CIDR when adding/deleting entries, while IPv6 supports IP CIDR only. Network address with zero prefix size is not supported, and is interpreted as host prefix size, i.e., 32 for IPv4 and 128 for IPv6. Option "nomatch" can be used to set exceptions to the set when add/deleting entries. If a test is matched against with a "nomatch" entry, then the result would end with false. + +```bash +# ./bin/dpip ipset create foo hash:net +# ./bin/dpip ipset add foo 192.168.1.123/24 +# ./bin/dpip ipset add foo 192.168.1.200-192.168.1.255 nomatch +# ./bin/dpip -v ipset list foo +Name: foo +Type: hash:net +Header: family inet hashsize 1024 maxelem 65535 +Size in memory: 16992 +References: 0 +Number of entries: 4 +Members: +192.168.1.200/29 nomatch +192.168.1.208/28 nomatch +192.168.1.224/27 nomatch +192.168.1.0/24 +# ./bin/dpip ipset test foo 192.168.1.0 +true +# ./bin/dpip ipset test foo 192.168.1.168 +true +# ./bin/dpip ipset test foo 192.168.1.200 +false +# ./bin/dpip ipset test foo 192.168.1.211 +false +``` + + + +#### hash:net,port + +The hash:net,port set type uses a hash table to store different sized IP network address and port pairs. Both IPv4 and IPv6 address are supported. IPv4 supports both IP range and IP CIDR when adding/deleting entries, while IPv6 supports IP CIDR only. Network address with zero prefix size is not supported, and is interpreted as host prefix size, i.e., 32 for IPv4 and 128 for IPv6. Option "nomatch" can be used to set exceptions to the set when add/deleting entries. If a test is matched against with a "nomatch" entry, then the result would end with false. The port number is interpreted together with a protocol. Supported protocols include TCP, UDP, ICMP, and ICMPv6, any other protocols are interpreted as unspec type with a protocol number of zero. + +```bash +# ./bin/dpip ipset -6 create bar hash:net,port maxelem 1024 +# ./bin/dpip ipset add bar 2001::/64,tcp:80-82 +# ./bin/dpip -v ipset list bar +Name: bar +Type: hash:net,port +Header: family inet6 hashsize 1024 maxelem 1024 +Size in memory: 17296 +References: 0 +Number of entries: 6 +Members: +2001::1:2:0:0/96,tcp:80 nomatch +2001::1:2:0:0/96,tcp:81 nomatch +2001::1:2:0:0/96,tcp:82 nomatch +2001::/64,tcp:80 +2001::/64,tcp:81 +2001::/64,tcp:82 +# ./bin/dpip ipset test bar 2001::a:b:c:d,tcp:82 +true +# ./bin/dpip ipset test bar 2001::,tcp:80 +true +# ./bin/dpip ipset test bar 2001::1:2:3:4,tcp:80 +false +``` + + + +#### hash:net,port,iface + +The hash:net,port,iface set type uses a hash table to store different sized IP network address, port and interface name triples. Generally, it is similar to the hash:net,port set type, except that a interface name should be specified, which should be correspond to a valid DPVS interface. + + +```bash +# ./bin/dpip ipset create foo hash:net,port,iface +# ./bin/dpip ipset add foo 10.64.13.131/16,tcp:80-82,dpdk0 +# ./bin/dpip ipset add foo 10.64.88.100-10.64.88.200,tcp:82,dpdk0 nomatch +# ./bin/dpip ipset list foo +# ./bin/dpip -v ipset list foo +Name: foo +Type: hash:net,port,iface +Header: family inet hashsize 1024 maxelem 65535 +Size in memory: 17752 +References: 0 +Number of entries: 9 +Members: +10.64.88.200/32,tcp:82,dpdk0 nomatch +10.64.88.100/30,tcp:82,dpdk0 nomatch +10.64.88.104/29,tcp:82,dpdk0 nomatch +10.64.88.192/29,tcp:82,dpdk0 nomatch +10.64.88.112/28,tcp:82,dpdk0 nomatch +10.64.88.128/26,tcp:82,dpdk0 nomatch +10.64.0.0/16,tcp:80,dpdk0 +10.64.0.0/16,tcp:81,dpdk0 +10.64.0.0/16,tcp:82,dpdk0 +# ./bin/dpip ipset test foo 10.64.12.34,tcp:80,dpdk0 +true +# ./bin/dpip ipset test foo 10.64.88.123,tcp:80,dpdk0 +true +# ./bin/dpip ipset test foo 10.64.88.123,tcp:82,dpdk0 +false +# ./bin/dpip ipset test foo 10.64.88.123,tcp:82,dpdk2 +set test failed +bin/dpip: invalid parameter +``` + + + +#### hash:ip,port,net + +The hash:ip,port,net set type uses a hash table to store IP address, port number and IP network address triples. Both IPv4 and IPv6 address are supported. The IP address of the IP and net should be of the same family. When adding/deleting entries, ranges are allowed but is transformed to specific host IP and port entries when stored into hash table for the "ip" and "port" segments. IPv4 supports both IP range and IP CIDR, while IPv6 supports IP CIDR only. Network address with zero prefix size is not supported, and is interpreted as host prefix size, i.e., 32 for IPv4 and 128 for IPv6. Option "nomatch" can be used to set exceptions to the set when add/deleting entries. If a test is matched against with a "nomatch" entry, then the result would end with false. The port number is interpreted together with a protocol. Supported protocols include TCP, UDP, ICMP, and ICMPv6, any other protocols are interpreted as unspec type with a protocol number of zero. + +```bash +# ./bin/dpip ipset create bar hash:ip,port,net +# ./bin/dpip ipset add bar 2001::1,8080-8082,2002::/64 +# ./bin/dpip ipset add bar 2001::1,8080-8082,2002::aaaa:bbbb:ccc0:0/108 nomatch +# ./bin/dpip ipset -v list bar +Name: bar +Type: hash:ip,port,net +Header: family inet6 hashsize 1024 maxelem 65535 +Size in memory: 17296 +References: 0 +Number of entries: 6 +Members: +2001::1,unspec:8080,2002::aaaa:bbbb:ccc0:0/108 nomatch +2001::1,unspec:8081,2002::aaaa:bbbb:ccc0:0/108 nomatch +2001::1,unspec:8082,2002::aaaa:bbbb:ccc0:0/108 nomatch +2001::1,unspec:8080,2002::/64 +2001::1,unspec:8081,2002::/64 +2001::1,unspec:8082,2002::/64 +# ./bin/dpip ipset test bar 2001::1,8081,2002::1:2:3:4 +true +# ./bin/dpip ipset test bar 2001::1,8081,2002::1:2:3:4:5 +false +# ./bin/dpip ipset test bar 2001::1,8081,2002::aaaa:bbbb:ccc1:2345 +false +``` + + + +#### hash:net,port,net + +The hash:net,port,net set type uses a hash table to store two different sized IP network addresses and a port number triples. It is similar to the hash:ip,port,net set type, except that the first segment indicates a network address rather than a single IP address. Option "nomatch" also supported, but should be used with caution. The first "net" segment takes precedence over the second one when matching against the set in descending order of cidr, regardless of whether the entry is of "nomatch" option. + +```bash +# ./bin/dpip ipset create foo hash:net,port,net +# ./bin/dpip ipset add foo 10.60.0.0/16,tcp:10240-10242,10.130.0.0/16 +# ./bin/dpip ipset add foo 10.60.100.100-10.60.100.200,tcp:10242,10.130.100.0/24 nomatch +# ./bin/dpip -v ipset show foo +Name: foo +Type: hash:net,port,net +Header: family inet hashsize 1024 maxelem 65535 +Size in memory: 17752 +References: 0 +Number of entries: 9 +Members: +10.60.100.200/32,tcp:10242,10.130.100.0/24 nomatch +10.60.100.100/30,tcp:10242,10.130.100.0/24 nomatch +10.60.100.104/29,tcp:10242,10.130.100.0/24 nomatch +10.60.100.192/29,tcp:10242,10.130.100.0/24 nomatch +10.60.100.112/28,tcp:10242,10.130.100.0/24 nomatch +10.60.100.128/26,tcp:10242,10.130.100.0/24 nomatch +10.60.0.0/16,tcp:10240,10.130.0.0/16 +10.60.0.0/16,tcp:10241,10.130.0.0/16 +10.60.0.0/16,tcp:10242,10.130.0.0/16 +# ./bin/dpip ipset test foo 10.60.0.0,tcp:10242,10.130.255.255 +true +# ./bin/dpip ipset test foo 10.60.100.111,tcp:10240,10.130.3.4 +true +# ./bin/dpip ipset test foo 10.60.100.111,tcp:10240,10.130.100.222 +true +# ./bin/dpip ipset test foo 10.60.100.111,tcp:10242,10.130.3.4 +true +# ./bin/dpip ipset test foo 10.60.100.111,tcp:10242,10.130.100.222 +false +# ./bin/dpip ipset flush foo +# ./bin/dpip ipset add foo 192.168.100.0/24,udp:6000,192.168.200.0/25 +# ./bin/dpip ipset add foo 192.168.100.0/25,udp:6000,192.168.200.0/24 nomatch +# ./bin/dpip ipset list foo +Name: foo +Type: hash:net,port,net +Header: family inet hashsize 1024 maxelem 65535 +Size in memory: 16688 +References: 0 +Number of entries: 2 +Members: +192.168.100.0/24,udp:6000,192.168.200.0/25 +192.168.100.0/25,udp:6000,192.168.200.0/24 nomatch +# ./bin/dpip ipset test foo 192.168.100.1,udp:6000,192.168.200.1 +false +# ./bin/dpip ipset flush foo +# ./bin/dpip ipset add foo 192.168.100.0/24,udp:6000,192.168.200.0/25 nomatch +# ./bin/dpip ipset add foo 192.168.100.0/25,udp:6000,192.168.200.0/24 +# ./bin/dpip ipset list foo +Name: foo +Type: hash:net,port,net +Header: family inet hashsize 1024 maxelem 65535 +Size in memory: 16688 +References: 0 +Number of entries: 2 +Members: +192.168.100.0/24,udp:6000,192.168.200.0/25 nomatch +192.168.100.0/25,udp:6000,192.168.200.0/24 +# ./bin/dpip ipset test foo 192.168.100.1,udp:6000,192.168.200.1 +true + +``` + + + +#### hash:net,port,net,port + +The hash:net,port,net,port set type uses a hash table to store tuples of two different sized IP network addresses and two port numbers. It is similar to the hash:net,port,net set type, except that an additional port number is added to the set entry. Both port numbers are interpreted together with protocols. which should always be of the same type. + + +```bash +# ./bin/dpip ipset -6 create bar hash:net,port,net,port comment +# ./bin/dpip ipset add bar 2001::a:b:c:d/64,udp:8080-8081,2002::/64,udp:6000-6001 +# ./bin/dpip -v ipset list bar +Name: bar +Type: hash:net,port,net,port +Header: family inet6 hashsize 1024 maxelem 65535 comment +Size in memory: 16992 +References: 0 +Number of entries: 4 +Members: +2001::/64,udp:8080,2002::/64,udp:6000 +2001::/64,udp:8080,2002::/64,udp:6001 +2001::/64,udp:8081,2002::/64,udp:6000 +2001::/64,udp:8081,2002::/64,udp:6001 +# ./bin/dpip ipset test bar 2001::1:2:3:4,udp:8080,2002::1:2:3:4,udp:6001 +true +# ./bin/dpip ipset test bar 2001::1:2:3:4,udp:8080,2002::1:2:3:4:5,udp:6001 +false +# ./bin/dpip ipset test bar 2001::1:2:3:4,8080,2002::1:2:3:4,udp:6001 +Error: port protocol doesn't match +bin/dpip: parse: invalid parameter +# ./bin/dpip ipset add bar 2001::/64,udp:8080,2002::1:2:0:0/96,udp:6000-6001 nomatch comment bad-guys +# ./bin/dpip ipset -v show bar +Name: bar +Type: hash:net,port,net,port +Header: family inet6 hashsize 1024 maxelem 65535 comment +Size in memory: 17296 +References: 0 +Number of entries: 6 +Members: +2001::/64,udp:8080,2002::1:2:0:0/96,udp:6000 nomatch comment "bad-guys" +2001::/64,udp:8080,2002::1:2:0:0/96,udp:6001 nomatch comment "bad-guys" 2001::/64,udp:8080,2002::/64,udp:6000 +2001::/64,udp:8080,2002::/64,udp:6001 +2001::/64,udp:8081,2002::/64,udp:6000 +2001::/64,udp:8081,2002::/64,udp:6001 +# ./bin/dpip ipset test bar 2001::1:2:3:4,udp:8080,2002::1:2:3:4,udp:6001 +false +``` + + + +# For Developers + +Ipset module follows a "Deduction and Induction" architecture. The diagram below dipicts the workflow of ipset "adt (add-delete-test)" operations. + +- Firstly, correct ipset type registered in the system is found by a given set name. +- Secondly, a type specific implemention, for example, hash_ip_adt4, is called. +- Then a common low-level function is invoked to finish the common, low-level works of bitmap or hash. +- Lastly comes the type specific low-level functions, which is responsible for the dedicated works of the type. + +![ipset arch](pics/ipset-arch.png) + +Type specific operations in the second and the last step are registered in the type's implementation, while common low-level functions in the third step are the basic routines of ipset:bitmap and ipset:hash. If the existing ipset types cannot satisfy your needs, it's convenient to create a new ipset type with the ipset framework. + +You may follow the procedures below to create a new ipset type. + +S1. Define a new ipset type `ipset_type` in a standalone C file. + +```C +struct ipset_type { + struct list_head l; + char name[IPSET_MAXNAMELEN]; + /* Create a set */ + int (*create)(struct ipset *set, struct ipset_param *param); + /* Destroy the set */ + void (*destroy)(struct ipset *set); + /* Flush the elements */ + void (*flush)(struct ipset *set); + /* List elements */ + void (*list)(struct ipset *set, struct ipset_info *info); + /* Low level test/add/del functions */ + ipset_adtfn *adtfn; +}; +``` + +For example, you can define a hash:ip,port type in file [ipset_hash_ipport.c](../src/ipset/ipset_hash_ipport.c). + +```C +struct ipset_type hash_ipport_type = { + .name = "hash:ip,port", + .create = hash_ipport_create, + .destroy = hash_destroy, + .flush = hash_flush, + .list = hash_list, + .adtfn = hash_adtfn, +}; +``` + +S2. Define and implement the type specific operations `ipset_type_variant` for both IPv4 and IPv6. + +```C +struct ipset_type_variant { + /* test/add/del entries called by dpip */ + int (*adt)(int opcode, struct ipset *set, struct ipset_param *param); + /* Internal test function */ + int (*test)(struct ipset *set, struct ipset_test_param *param); + /* Basic functions that each ipset type should implement partially */ + union { + struct { + int (*do_del)(struct bitmap_elem *e, struct bitmap_map *map); + int (*do_test)(struct bitmap_elem *e, struct bitmap_map *map, size_t dsize); + void (*do_list)(struct ipset *set, struct ipset_bitmap_header *header, + struct ipset_member *members); + } bitmap; + struct { + /* Type that contains 'net' element must implement */ + void (*do_netmask)(void *elem, uint8_t cidr, bool inner); + int (*do_compare)(const void *adt_elem, const void *set_elem); + void (*do_list)(struct ipset_member *members, void *elem, bool comment); + uint32_t (*do_hash)(void *data, int len, uint32_t mask); + } hash; + }; +}; +``` + +As for hash:ip,port type, we define the `ipset_type_variant` for IPv4 as follows: + +```C +struct ipset_type_variant hash_ipport_variant4 = { + .adt = hash_ipport_adt4, + .test = hash_ipport_test4, + .hash.do_compare = hash_ipport_data_equal4, + .hash.do_list = hash_ipport_do_list4, + .hash.do_hash = hash_ipport_hashkey4 +}; +``` + +S3. Implement the methods of the new `ipset_type`. + +We could make use of the low-level common methods if no particular work should do for this type. For example, `hash_ipport_type` uses the hash type's common methods "hash_destroy", "hash_flush", "hash_list", and "hash_adtfn", but add a type specific "hash_ipport_create" method in which the `ipset_type_variant` is related to the new type. + +S4. Register the new type in `ipset_init`. + +For example, `hash_ipport_type` is registered as follows. Refer to [ipset_core.c](../src/ipset/ipset_core.c) for details. + +```C +extern struct hash_ipport_type; + +int ipset_init(void) +{ + ... + ipset_type_register(&hash_ipport_type); + ... +}; +``` + +S5. Add supports for `dpip` tools. + +Generally, add a new `ipset_type` object to dpip's ipset array, and implement required methods. + +```C +struct ipset_type { + char *name; + int (* parse)(char *arg); + int (* check)(void); + void (* dump_header)(char *buf, struct ipset_info *info); + int (* dump_member)(char *buf, struct ipset_member *m, int af); + sort_compare_func sort_compare; +}; +``` + +Note that the `check` and `sort_compare` method is not compulsory. As for the hash:ip,port type, we implemented all the methods and registered the type object into dpip's ipset type array. Refer to [tools/dpip/ipset.c](../tools/dpip/ipset.c) for details. + +```C +struct ipset_type types[MAX_TYPE_NUM] = { + { + ... + }, + { + .name = "hash:ip,port", + .parse = ipport_parse, + .check = hash_ip_check, + .dump_header = hash_dump_header, + .dump_member = ipport_dump_member, + .sort_compare = ipport_sort_compare + }, + ... +}; +``` diff --git a/doc/Worker-Performance-Tuning.md b/doc/Worker-Performance-Tuning.md index 77d8d7d88..8616e2bf8 100644 --- a/doc/Worker-Performance-Tuning.md +++ b/doc/Worker-Performance-Tuning.md @@ -11,7 +11,7 @@ DPVS is a multi-thread DPDK application program. It is based on the "polling" fr * **Isolate Recieving Worker**: the optional workers used to take the responsibility of *Forwarding Worker* to receive packets to reduce NIC packets imiss. * **KNI Worker**: an optional worker used to do kni related jobs to avoid performance disturbance caused by work loads of *Master/Forwarding Worker*. -As all other DPDK applications, each DPVS Worker is bound to a distinct CPU core to avoid they interfere with each other. By default, the first N CPUs of the system are bound with DPVS Workers. The performance may not good enough when many other work loads are scheduled into these CPUs by the system. For example, CPU0, the first CPU core in the system, is generally a lot busier than other CPU cores, because many processes, interrupts, and kernel threads run on it by default. The following of this doc would tell you how to alleviate/offload irrelative work load on DPVS Workers. +Like other DPDK applications, each DPVS Worker is bound to a distinct CPU core to avoid they interfere with each other. By default, the first N CPUs of the system are bound with DPVS Workers. The performance may not good enough when many other work loads are scheduled into these CPUs by the system. For example, CPU0, the first CPU core in the system, is generally a lot busier than other CPU cores, because many processes, interrupts, and kernel threads run on it by default. The following of this doc would tell you how to alleviate/offload irrelative work load on DPVS Workers. ### When do you need to consider this performance tuning? @@ -282,3 +282,70 @@ KiB Swap: 4194300 total, 4194300 free, 0 used. 16171432 avail Mem ``` +### Assign a dedicated worker for KNI + +As is the diagram shown below, KNI traffic are processed by default on Master and Forwarding Workers. But we can configure a didecated worker for KNI traffic to avoid possible disturbances caused by overloaded dataplane. + +![kni-flow](pics/kni-flow-2.png) + +The configurations for KNI Worker are almost the same with the Forwarding Workers except that the `type` field should be set to `kni`. Rx/Tx queues should be configured for target NICs, receiving packets from network devices and transmitting to corresponding KNI devices, or vice versa. Note that we can configure either Rx or Tx queues only, which isolates processes of inbound or outbound traffic to/from KNI onto KNI worker, respectively. +Rx queues are required by DPVS's KNI address flow which directs KNI inbound traffic to the dedicated Rx queue using DPDK rte_flow. If Rx queue is not configured, Forwarding Workers are responsible for packets reception, handing over the received packets to KNI worker, and then the KNI Worker forwards the packets to KNI interfaces. If no RX queue isconfigured, creating KNI address flow would fail. On the other hand, the Tx queues must be configured if KNI Worker is enabled, or the outbound traffic from KNI interfaces is dropped due to a lack of Tx queue, as shown in the diagram below. + +![kni-flow](pics/kni-flow-1.png) + +**The steps to use dedicated worker for KNI** + +* S1. Add KNI worker configurations to `dpvs.conf`. For example: + +``` + worker cpu9 { + type kni + cpu_id 9 + port bond0 { + rx_queue_ids 8 + tx_queue_ids 8 + } + } +``` +* S2. Boot up DPVS, and configure KNI interface up. For example, we configured a KNI interface on bond0.101. + +``` +55: bond0.101.kni: mtu 1500 qdisc pfifo_fast state UP qlen 1000 + link/ether 98:03:9b:1b:40:a4 brd ff:ff:ff:ff:ff:ff + inet 192.168.88.88/24 scope global bond0.101.kni + valid_lft forever preferred_lft forever + inet6 2001::88/64 scope global + valid_lft forever preferred_lft forever +``` +Now, you can ping 192.168.88.88 and 2001::88, and all OK. + +> Notes: If DPVS routes matched the KNI IPs, you should add `kni_host` routes for the KNI IPs. + +* S3. (If supported) Configure KNI address flow. + +``` +dpip flow add type kni 192.168.88.88 dev bond0.101 +dpip flow add type kni 2001::88 dev bond0.101 +``` +Now, all packets destined to 192.168.88.88 or 2001::88 are sent to Rxq8 on bond0. + + +**Performance tests** + +We designed 5 cases to examine the performance of KNI worker, and listed the test results below. + +| Test Cases | ping (min/avg/max/mdev) | bandwidth (iperf tcp) | forwarding rate | +| ---------------------------------------------------- | ----------------------------- | --------------------- | --------------- | +| no kni worker (idle dataplane) | 0.468/3.196/6.893/1.547 ms | 2.64 Gbits/sec | 243K packets/s | +| with kni worker, no addr flow (idle dataplane) | 0.050/2.102/5.288/0.565 ms | 4.54 Gbits/sec | 413K packets/s | +| with kni worker, with addr flow (idle dataplane) | 0.409/2.346/11.650/1.179 ms | 4.57 Gbits/sec | 416K packets/s | +| with kni worker, no addr flow (overload dataplane) | 0.628/29.880/42.010/12.026 ms | 341 Mbits/sec | 29K packets/s | +| with kni worker, with addr flow (overload dataplane) | 0.544/2.139/3.554/0.406 ms | 4.53 Gbits/sec | 410K packets/s | + +*Notes: Overload dataplane is simulated by adding 1ms delay to each loop of forwarding workers.* + +We got the following conclusions from the test results. + +1. Dedicated KNI worker increases bandwidth of KNI interfaces. +2. KNI address flow protects KNI traffic from load disturbances of dataplane. + diff --git a/doc/pics/ipset-arch.png b/doc/pics/ipset-arch.png new file mode 100644 index 000000000..93fdb5798 Binary files /dev/null and b/doc/pics/ipset-arch.png differ diff --git a/doc/pics/kni-flow-1.png b/doc/pics/kni-flow-1.png new file mode 100644 index 000000000..1be9f5a98 Binary files /dev/null and b/doc/pics/kni-flow-1.png differ diff --git a/doc/pics/kni-flow-2.png b/doc/pics/kni-flow-2.png new file mode 100644 index 000000000..eab192f47 Binary files /dev/null and b/doc/pics/kni-flow-2.png differ diff --git a/doc/pics/kni-flow.drawio b/doc/pics/kni-flow.drawio new file mode 100644 index 000000000..1bbe2f7f8 --- /dev/null +++ b/doc/pics/kni-flow.drawio @@ -0,0 +1 @@ +7Vtdc5s4FP01nj5tBwkE5tFxkjazu53OZHbaPHUwyEAjI6+QY3t//UpGMh/CDnXsmHrshwy6EgLdc8+RdEUG9ni2+sSCefI3jTAZQCtaDezbAYS+5Yq/0rAuDMiyCkPM0qgwgdLwmP6HlVE3W6QRzmsNOaWEp/O6MaRZhkNeswWM0WW92ZSS+lPnQYwNw2MYENP6LY14UliH0Cvtn3EaJ/rJwPWLmlmgG6uR5EkQ0WXFZN8N7DGjlBdXs9UYE+k77ZfivvsdtdsXYzjjXW6AyeLH6OYf72Hhj8Jo9PJ19uPbH0B1k/O1HjGOhANUkTKe0JhmAbkrrTeMLrIIy24tUSrb/EXpXBiBMP7EnK8VmsGCU2FK+IyoWvHGbP1d3b8pPMnCR6SLt6tq5e1alcwh69enCxbifeNUoROwGPM97VDRTvqg8gDl0E+YzrB4H9GAYRLw9KUeJIGKtXjbroRDXChEfgEdb/g7oTOlGVedAqDKY0oo27y5bW1+8q5Vyr+XPYrSk36yuC47l4X3Q95zewW96vclIAv1pC8PYyMa6lgvk5Tjx3mw8cdSyHEd150+fMGM49XeQetaR4mZUnPHtovysqKNqklSkUXbOpGboOGmP788yJsyjtlUeuLcLkPI7pfLfHgOUekqDocTHXUkug96RXRkRPA9XQYsSrN4AF0iRnIzYeIqlldLyp4xy98W1NOUkIrrp1PshqGw55zRZ1ypiTx/sh+U7jSArtsvGgydS5xbD6eP23We9HpFH3fHBFAw5Tckiu3Wp1gEWogCWohin2oRanjYJE4WjeReS5RCEuR5GtYdWff6Tie9GlIVF6AWrdC2zpGnnvCViuVCiUADAMfx6j0UVFI3VTdcjX6MxZLjftSrTN1XQTejrw1Q24G/Ya43sBPB/oy5OYOI6OR10OpRntEMNyihTAFJ40wij+WaSxhkrKdiBz1SFbM0ijbq2Ua3jqHRnT9uY7kFHYM+bkvswJOtUM2V/BYDmSQIEzF6aD1nqXx6FDGZpSCCS/1EiAQTTG7EAOKNvSKC95tffa4a7pmqcuGDMDneQttz6sAPTd0EVotwQnQq5G0D+YxePKxHQBIBVJ8CkWdACVs4DE7FYdvk8Br3VUP7BaXfWPb7cq17VjDR8ArmgUtTWJdYsTQ9N5iuycyrxnbBEup9XF80dniWtFR/9+Ne1w15v/LW/vXIYvDWI4vO0Pv9ysV4ZjKm2Naw1b8D+94IDJbQ2WSRv56I+TXl3ItkIfz6KPdIW49tlnLvxOi3pTfhqeQUnZeHJfWeqnVH4iGosLDk5NF56HXkITw2DTe3jhgL1pUGc5klyis9N/JNyK+nPhD0G2FU9HjUDBPQ33OcKdDA4BIEv2ug9U3wzdxwIfj8wgXf8V9P0r+r3GvqX9qhrj6sfX8JfhscZzlc7BEcPduUmFmCQqfUKeFFSxVo5MWhudEH+ivNqlY5pzpQ9M1ZI2IixA3GEJLO8125lQoEQT4vPv2cpivJn12Y6DRPAw7T7c2D32GI2w9+J0PkoGNl13QGRM8prrmFsL2WOcU7EU7Auh5gHAYlAn6dc7Z5CPmuyTVgOQaU16R3N/20G1h65lLvnbE0PxkrT5Tlty8RzT7IfWEoxn2FuIPyOg2IoQlx2zcDB0AsiuUX/8UevPy3Cfvufw==7Vxdl6I4EP01nn1qD0n4fOyPmdnZnd3xjDuzs097okRlGsVF7Nb+9ZtAQGLSii0B7Gl90ASDkFv3UlUU6aHb+eZDjJezPyKfhD1o+JseuutBCBCA9IP1bLMe27Gzjmkc+PxHu45h8ER4p8F714FPVsIPkygKk2Apdo6jxYKME6EPx3H0KP5sEoXivy7xlEgdwzEO5d6/Az+ZZb0udHb9v5JgOsv/GdhetmWO8x/zM1nNsB89lrrQux66jaMoyb7NN7ckZJOXz8vtj2vXHrsD//N/w9/ez58Wi+3Xq2xn708ZUpxCTBbJi3f9gP96/BR/e5r/O4KDDz8+f/nqfr/i5/qAwzWfL36uyTafwDhaL3zCdmL00M3jLEjIcInHbOsjNRnaN0vmIW0B+nUSLRJuA8ClbR+vZulYwBsDnCQkXqQ90IBsSBCGt1EYxenfId8irm/S/mmM/YCecGmb5/iG49BtqySO7klpiwtHyLbpFn4+JE7IZs8AjsweKCClXCDRnCTxlo7jezH5THEWmIgbxePOppy8b1ayJ2TzTszteFrseocV/cLhOgE6YEtQEZ+aPm8uogX9uBHRi+JkFk2jBQ4/RdGSw/KDJMmWg4bXSSQiKiBYhjdvl1C4dtlbhY+Rvgp82IG+AB16stE6HpMDv0NqFGMS4iR4EP9UBQkfOogCejgF+sBxBPgtdw/VBMdTkvBRe8AWh3EG1t4lYk0Rjbff+eGkjX/KjbuN0NrmrU2QpIP6JjJ5m40DfcMAvL0byhrlkQMSB3S2Scz7GrE3eklL8T/wu0xYzzDMqlpx6ChLMn9L0YwjNnAQYmo9dak+NDSKMEB7Kmy6kgoDF8oqbEJNEwusg8TccfDdrvd8nu6I1bdOoZYaJ33kMCuSw2qTG6bEja8rEq/qZcSekzOx2Fvt5IzSt0pY7fSl08mxgcAvZMpeTuEOl/nl1ODkqI9XdnJ+GvZYFdnjtMke4DTimbw2zLxWMXPfMHsBZsBoFbRmQoBXB9ozV7xmQLMk30IGMQyD5YocdynwapnlxybBhiFc2cd43o845H3o8jEsyxV9eFfhY9iyi5H31R8bHc6jqF348Tp+KCLl+kgHBMrtGFiRdLvQ2SoHzgeDZiHm181apyJrUVNSq8y2KEiqV2nhS6UWwHZAUyep2lRaR1LayaPPVCWK70l8JIjoSHoD5WHUfpaxnGRWRF+erugLHXbuL1MaYUe10bsIbURNa6NpnCSOaWs/Cdy0Yiqg7J5iyrf+Lk8xC+exM4p5OLR+U8w6aZaXGHRcMs1GJHNf414SC7QBWPeEMT/Ki1ZGy+uaMh7OX70pY61Eyyutuq2Mh2+eanAmnbMC7VJlgumVge8bhnUQ/DYqE1Q20EGxBa9AbB2jY2JrymFanQ5GPcnG9orE8prWo/KY1Y20xg0oceN+EfSgHdJjvhlRctjTJJ0uG8+Z6S9GK/ZxSdQBeWlOZ6gju+sXQJ2iGk87daoWvLVMHbniLecNA6PH6s3zybP/W0cZjRB22bvclTFsjlfswr27KmW7ooeW7S3n4SUwDlodYxyQK7C+DIfSZNJpSMQZw2EwZZXkYzodzK26YZMVjHF4zTfMA9/PKu7IKnjCo3RXbG6XrFA3PQ/rpmfdsX1R0q64rycRjHui5RuseVdT4ZwpVmZZyOgbQMLNVuCGdOEGmylAr8N931NdZNkl3TX6nuue7L43Gs/BypmuNkUXyomTGcEhNU4qtzMyvq9XIPcfHMHEnYx7yqoGh2CbGKoLJz0JMppolVvx0YHigZByzbLqyRFLG28bD7uhaZeJe8W0yznC3mM3ci6j4AFWTsS0Slw5CL/5MKC7+jwcvP8ZSeuIzxl0gLOXVbLZkTgfVo3zQavBCmy+tNNEp8BbSYs7EqBWx7xdyZVzO7IR/GSloQhKQY4su4rSUFBDaai6ELDxCEdQXXCElpf2aG7VIAa2qsZIjmLemGmLNzgcFTMV/hDQlqWV5fP3Pz/Sjo8sFTRhcFxEMg7k6eZ8Yg0kT6zjyjOrb1kBaWK/0Bie9gxJ/PD2XGTadqFs/tqei1SiZCs0Skhv7wDKc9lsw1WW4rymPwBguZET3RS4e5JQkA1/+ZB+RGS1+CVNmNM5ZKcyitZJ1QS4nLOtmlitnttVWZxok+ryTw0qicRUj5OTW1BJSzaTOlK0ajLL17K7wTc5t175enZwfZdigRw5aOeoKmg9SV9sADW88axXzsvUD5HtihCZCEoQuZ6MkOnoQki++VE3k+eYzisFCRrZ3WPhXjE7J9+P6cckpNi98Vp1kTbER9YsoPJ+QIO8VmTac+hWS7x4udWQkKrAzgiynV2UEcRRQoOEiO0lrc7TZRTQdvpAdN4sA/Q9eUUShPoOVPnGfQPqsg/Zi6vHPoJJLiPFrXCDbIIVE5pXYjVsTReNWuJZfaP8QqIJmQr/HyikRav1KAJeHWmPc2/evDjloSMZeZCHR2swYVNFmOqHulTVMnsWMKV4L59lBV+9kZdY9Ipg6Fx3DfUtgR+upUoJKvhharv0NnMnpo47nRWs+4A5nGGMfGi+iN42lyxxRMYeac08afE9U8w97e9G89J7ynTg8UoynlWVLqA+TjDDIFt0zUhiPJkEY4VXLndJ7Qty1YXQUAway2FfVUWvdHU+ybbzOiu5pgqoEptQl7wgICHYVXk5tZCiSa2BXt8rvRwH1aI9x3arW4vkrPc5WjQu1oCU5ehNW7RpS25LqiSCItOvT2qeTz2dFySeU2m9l4AQze+1RJhIa2LC3buLbnpFqFjOSpiyqQGvyF+cYG20uVsEPZO63VLy6N3/ \ No newline at end of file diff --git a/doc/tc.md b/doc/tc.md index 086b0e177..9efef3cfe 100644 --- a/doc/tc.md +++ b/doc/tc.md @@ -10,14 +10,15 @@ DPVS Traffic Control (TC) - [Example 2. Traffic classification and flow control (Egress)](#example2) - [Example 3. Access control with TC (Ingress)](#example3) - [Example 4. Traffic policing for services (Ingress)](#example4) + - [Example 5. Dynamic allow/deny access list using TC ipset classifier](#example5) # Concepts -DPVS TC derives from [Linux Traffic Control](https://tldp.org/HOWTO/Traffic-Control-HOWTO/index.html), which encompasses the sets of mechanisms and operations by which packets are queued for transmission/reception on a network interface. The operations include enqueuing, policing, classifying, scheduling, shaping and dropping. +DPVS TC derives from [Linux Traffic Control](https://tldp.org/HOWTO/Traffic-Control-HOWTO/index.html), which encompasses the sets of mechanisms and operations by which packets are queued for transmission/reception on a network interface. The operations include enqueuing, policing, classifying, scheduling, shaping and dropping. -- Policing: the mechanism by which traffic can be limited. Policing is most frequently used on the network border to ensure that a peer is not consuming more than its allocated bandwidth. +- Policing: the mechanism by which traffic can be limited. Policing is most frequently used on the network border to ensure that a peer is not consuming more than its allocated bandwidth. - Classifying: the mechanism by which packets are separated for different treatment, possibly different output queues. - Scheduling: the mechanism by which packets are arranged (or rearranged) between input and output of a particular queue. - Shaping: the mechanism by which packets are delayed before transmission in an output queue to meet a desired output rate. @@ -106,7 +107,7 @@ dpip link set dpdk0 tc-ingress on # enable tc-ingress for dpdk0 You can verify if TC for dpdk0 is enabled by checking if "tc-egress" or "tc-ingress" flag exists in the output of the command `dpip link show dpdk0`. > It's safe to enable or disable TC of a device whenever you like, even if when TC is processing packets. - + **2. Add a root Qsch object.** ```bash @@ -132,7 +133,7 @@ dpip cls add dev dpdk0 qsch ingress match pattern 'icmp,iif=dpdk0' target 2: ``` # Check Qsch on dpdk0 -[root@dpvs-test]# dpip qsch show dev dpdk0 +[root@dpvs-test]# dpip qsch show dev dpdk0 qsch pfifo_fast root dev dpdk0 parent 0: flags 0x0 cls 1 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qsch pfifo ingress dev dpdk0 parent 0: flags 0x1 cls 1 limit 65536 qsch bfifo 2: dev dpdk0 parent ingress flags 0x1 cls 0 limit 100000 @@ -584,7 +585,7 @@ Finally, disable `tc-ingress` for `dpdk0`, and ping from 2001::15, and succeed t ``` [root@dpvs-test]# dpip link set dpdk0 tc-ingress off -[root@client ~]# ping6 -c 3 2001::112 -m 1 -I 2001::15 +[root@client ~]# ping6 -c 3 2001::112 -m 1 -I 2001::15 PING 2001::112(2001::112) from 2001::15 : 56 data bytes 64 bytes from 2001::112: icmp_seq=1 ttl=64 time=0.178 ms 64 bytes from 2001::112: icmp_seq=2 ttl=64 time=0.054 ms @@ -680,9 +681,9 @@ IP Virtual Server version 0.0.0 (size=0) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP 192.168.88.30:80 wlc - -> 192.168.88.215:80 FullNat 1 0 0 + -> 192.168.88.215:80 FullNat 1 0 0 TCP [2001::30]:8080 wlc - -> 192.168.88.215:80 FullNat 1 0 0 + -> 192.168.88.215:80 FullNat 1 0 0 ``` Next, we configure four client IP addresses on the Client to simulate four users. @@ -845,15 +846,15 @@ qsch tbf 2: dev dpdk0 parent ingress flags 0x1 cls 1 rate 200.00Mbps burst 20000 Firstly, change the route for network 2001::/64 on Client to use 2001::15 as the source IP address. ``` -[root@client]# ip -6 route change 2001::/64 dev eth0 proto kernel src 2001::15 -[root@client]# ip -6 route show 2001::/64 +[root@client]# ip -6 route change 2001::/64 dev eth0 proto kernel src 2001::15 +[root@client]# ip -6 route show 2001::/64 2001::/64 dev eth0 proto kernel src 2001::15 metric 1024 ``` Then, try access service B with tool `curl`, ``` -[root@client]# curl -m 2 -g [2001::30]:8080 +[root@client]# curl -m 2 -g [2001::30]:8080 curl: (28) Connection timed out after 2001 milliseconds ``` and get *failed* no surprisingly. Request from 2001::15 is rejected by service B. @@ -861,10 +862,320 @@ and get *failed* no surprisingly. Request from 2001::15 is rejected by service B As a contrast, we turn off the `tc-ingress` switch of `dpdk0`, and redo the test. ``` -[root@dpvs-test]# dpip link set dpdk0 tc-ingress off +[root@dpvs-test]# dpip link set dpdk0 tc-ingress off [root@client]# curl -m 2 -g [2001::30]:8080 nginx 192.168.88.215 ``` As what we expect, request from 2001::15 is accepted by service B this time. + + + + +## Example 5. Dynamic allow/deny access list using TC ipset classifier + +This example builds an IPv4 allow access list and an IPv6 deny access list using tc ipset classifiers. We can modify the access lists dynamically without changing the tc classifiers, and gain performance advantages over the match type classifier. + +As the diagram below shows, the tc cls 0:1 uses ipset "allowset" of type hash:net, and matches against source address of IPv4 packets. A target network cidr which covers the whole test subject range should be added to the allowset firstly, and "nomatch" entries would be added later as the allow list member. On the other hand, the deny access list is much more straightforward. It uses tc cls 0:2 to classify IPv6 packets with ipset "denyset" of type hash:ip,port,net to match packets sent from specified network to target service ip:port, and the matched packets are dropped immediately. + +``` +-+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+- + qsch ingress + (pfifo_fast) + | + ipv4 | ipv6 + ----------------------------- + | | + cls 0:1 | | cls 0:2 + match allowset,src | | match denyset,dst +(hash:net, "nomatch" entries)| | (hash:ip,port,net) + | | + Accept Reject +-+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+- +``` + +Firstly, create the empty "allowset" and "denyset" ipsets. + +```bash +dpip ipset create allowset hash:net comment +dpip ipset -6 create denyset hash:ip,port,net +``` + +Then, create a pfifo_fast qsch object to get all received packets from port dpdk0. + +```bash +dpip qsch add dev dpdk0 ingress pfifo_fast +``` + +Next, create the qsch cls object 0:1 and 0:2, which are coresponding to the IPv4 allowset and IPv6 denyset respectively. + +```bash +dpip cls add dev dpdk0 qsch ingress handle 0:1 pkttype ipv4 ipset match allowset target drop +dpip cls add dev dpdk0 qsch ingress handle 0:2 pkttype ipv6 ipset match denyset,dst target drop +``` +> Notes: +> 1. The target action of cls 0:1 is "drop" because the allow list is implemented by "nomatch" ipset entries. The idea is to add a large network that covers the whole test subjects and exclude the allowed ones with explicit nomatch ipset entries. +> 2. The "dst" in match configuration of cls 0:2 indicates that dst port number of packet is used in the denyset. In fact, the "ip", "port", "net" parts of denyset corespond to dst-ip, dst-port, src-ip in packets respectively. + +Finally, enable tc-ingress on port dpdk0. + +```bash +dpip link set dpdk0 tc-ingress on + +``` + +Check what we configured just now. + +```bash +[root@dpvs-test]# dpip link show dpdk0 +1: dpdk0: socket 0 mtu 1500 rx-queue 8 tx-queue 8 + UP 10000 Mbps full-duplex auto-nego tc-ingress + addr A0:36:9F:74:EC:F0 OF_RX_IP_CSUM OF_TX_IP_CSUM OF_TX_TCP_CSUM OF_TX_UDP_CSUM +[root@dpvs-test]# dpip ipset list +Name: allowset +Type: hash:net +Header: family inet hashsize 1024 maxelem 65535 comment +Size in memory: 16384 +References: 1 +Number of entries: 0 +Members: + +Name: denyset +Type: hash:ip,port,net +Header: family inet6 hashsize 1024 maxelem 65535 +Size in memory: 16384 +References: 1 +Number of entries: 0 +Members: + +[root@dpvs-test]# dpip qsch show dev dpdk0 +qsch pfifo_fast ingress dev dpdk0 parent 0: flags 0x1 cls 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 +[root@dpvs-test]# dpip cls show dev dpdk0 qsch ingress +cls ipset 0:1 dev dpdk0 qsch ingress pkttype 0x0800 prio 0 ipset match allowset,src target drop +cls ipset 0:2 dev dpdk0 qsch ingress pkttype 0x86dd prio 0 ipset match denyset,dst target drop +``` + +Now let's begin the tests. + +**Test 1. IPv4 allow list test** + +Envrionment setup: + +- Add a test IP address 192.168.88.112/24 to DPVS. +- Add test IP addresses 192.168.88.15/24, 192.168.88.115/24 on Client to simulate two users. + +```bash +[root@dpvs-test]# ./bin/dpip addr add 192.168.88.112/24 dev dpdk0 +[root@client]# ip addr add 192.168.88.15/24 dev eth0 +[root@client]# ip addr add 192.168.88.115/24 dev eth0 +``` + +The allowset is empty now, so 192.168.88.112 is supposed to be accessible from both clients. + +```bash +[root@client]# ping -c 3 192.168.88.112 -I 192.168.88.15 +PING 192.168.88.112 (192.168.88.112) from 192.168.88.15 : 56(84) bytes of data. +64 bytes from 192.168.88.112: icmp_seq=1 ttl=64 time=0.129 ms +64 bytes from 192.168.88.112: icmp_seq=2 ttl=64 time=0.035 ms +64 bytes from 192.168.88.112: icmp_seq=3 ttl=64 time=0.036 ms + +--- 192.168.88.112 ping statistics --- +3 packets transmitted, 3 received, 0% packet loss, time 1999ms +rtt min/avg/max/mdev = 0.035/0.066/0.129/0.045 ms +[root@client]# ping -c 3 192.168.88.112 -I 192.168.88.115 +PING 192.168.88.112 (192.168.88.112) from 192.168.88.115 : 56(84) bytes of data. +64 bytes from 192.168.88.112: icmp_seq=1 ttl=64 time=0.078 ms +64 bytes from 192.168.88.112: icmp_seq=2 ttl=64 time=0.033 ms +64 bytes from 192.168.88.112: icmp_seq=3 ttl=64 time=0.031 ms + +--- 192.168.88.112 ping statistics --- +3 packets transmitted, 3 received, 0% packet loss, time 2002ms +rtt min/avg/max/mdev = 0.031/0.047/0.078/0.022 ms +``` + +Suppose all clients are from network 192.168.0.0/16, we can block all clients access to DPVS by adding the network cidr to allowset. + +```bash +[root@dpvs-test]# ./bin/dpip ipset add allowset 192.168.0.0/16 comment "target subjects" +[root@dpvs-test]# ./bin/dpip ipset list allowset +Name: allowset +Type: hash:net +Header: family inet hashsize 1024 maxelem 65535 comment +Size in memory: 16536 +References: 1 +Number of entries: 1 +Members: +192.168.0.0/16 comment "target subjects" +``` + +Check the two clients again, and find both cannot ping DPVS now. + +```bash +[root@client]# ping -c 3 192.168.88.112 -I 192.168.88.15 +PING 192.168.88.112 (192.168.88.112) from 192.168.88.15 : 56(84) bytes of data. + +--- 192.168.88.112 ping statistics --- +3 packets transmitted, 0 received, 100% packet loss, time 1999ms + +[root@client]# ping -c 3 192.168.88.112 -I 192.168.88.115 +PING 192.168.88.112 (192.168.88.112) from 192.168.88.115 : 56(84) bytes of data. + +--- 192.168.88.112 ping statistics --- +3 packets transmitted, 0 received, 100% packet loss, time 2003ms + +``` + +Actually, all clients from network 192.168.0.0/16 are rejected by DPVS. Then we can allow some clients by adding them to the allowset with nomatch option. For example, the following command allows client 192.168.88.115 to access DPVS. + +```bash +[root@dpvs-test]# ./bin/dpip ipset add allowset 192.168.88.115/32 nomatch +[root@dpvs-test]# ./bin/dpip ipset list allowset +Name: allowset +Type: hash:net +Header: family inet hashsize 1024 maxelem 65535 comment +Size in memory: 16688 +References: 1 +Number of entries: 2 +Members: +192.168.0.0/16 comment "target subjects" +192.168.88.115/32 nomatch +``` +As expected, ping DPVS 192.168.88.112 from client 192.168.88.115 is OK, while client 192.168.88.15 is still rejected. + +```bash +[root@client]# ping -c 3 192.168.88.112 -I 192.168.88.115 +PING 192.168.88.112 (192.168.88.112) from 192.168.88.115 : 56(84) bytes of data. +64 bytes from 192.168.88.112: icmp_seq=1 ttl=64 time=0.103 ms +64 bytes from 192.168.88.112: icmp_seq=2 ttl=64 time=0.036 ms +64 bytes from 192.168.88.112: icmp_seq=3 ttl=64 time=0.032 ms + +--- 192.168.88.112 ping statistics --- +3 packets transmitted, 3 received, 0% packet loss, time 2002ms +rtt min/avg/max/mdev = 0.032/0.057/0.103/0.032 ms +``` + +Benefited from hash:net ipset types, it's also possible to add network members. For example, the command below adds the whole subnet 192.168.88.0/28 to allowset. + +```bash +[root@dpvs-test]# ./bin/dpip ipset add allowset 192.168.88.0/28 nomatch +[root@dpvs-test]# ./bin/dpip ipset show allowset +Name: allowset +Type: hash:net +Header: family inet hashsize 1024 maxelem 65535 comment +Size in memory: 16840 +References: 1 +Number of entries: 3 +Members: +192.168.0.0/16 comment "target subjects" +192.168.88.0/28 nomatch +192.168.88.115/32 nomatch +``` +No suprisingly, all clients from subnet 192.168.88.0/28 can ping DPVS now. + + +```bash +[root@client]# ping -c 3 192.168.88.112 -I 192.168.88.15 +PING 192.168.88.112 (192.168.88.112) from 192.168.88.15 : 56(84) bytes of data. +64 bytes from 192.168.88.112: icmp_seq=1 ttl=64 time=0.102 ms +64 bytes from 192.168.88.112: icmp_seq=2 ttl=64 time=0.036 ms +64 bytes from 192.168.88.112: icmp_seq=3 ttl=64 time=0.039 ms + +--- 192.168.88.112 ping statistics --- +3 packets transmitted, 3 received, 0% packet loss, time 1999ms +rtt min/avg/max/mdev = 0.036/0.059/0.102/0.030 ms +``` + +**Test 2. IPv6 deny list test** + +Envrionment setup: + +- Flush the allowset in Test 1, or fullnat outbound packets to local IP in this test would be blocked. +- Add a test IP address 192.168.88.112/24 to DPVS (already done in Test 1). +- Add test IP addresses 192.168.88.15/24, 192.168.88.115/24 on Client to simulate two users (already done in Test 1). +- Set up two fullnat services -- [2001::1]:80, [2001::1]:8080 + +```bash +[root@dpvs-test]# ./bin/dpip ipset flush allowset +[root@dpvs-test]# #dpip addr add 192.168.88.112/24 dev dpdk0 +[root@dpvs-test]# dpip -6 addr add 2001::1/64 dev dpdk0 +[root@dpvs-test]# ipvsadm -At [2001::1]:80 -s wrr +[root@dpvs-test]# ipvsadm -at [2001::1]:80 -r 192.168.88.215:80 -b +[root@dpvs-test]# ipvsadm -Pt [2001::1]:80 -z 192.168.88.241 -F dpdk0 +[root@dpvs-test]# ipvsadm -At [2001::1]:8080 -s wrr +[root@dpvs-test]# ipvsadm -at [2001::1]:8080 -r 192.168.88.215:80 -b +[root@dpvs-test]# ipvsadm -Pt [2001::1]:8080 -z 192.168.88.241 -F dpdk0 +[root@client]# #ip addr add 192.168.88.15/24 dev eth0 +[root@client]# #ip addr add 192.168.88.115/24 dev eth0 +[root@dpvs-test]# ./bin/dpip addr show +inet 192.168.88.112/24 scope global dpdk0 + valid_lft forever preferred_lft forever +inet6 2001::1/64 scope global dpdk0 + valid_lft forever preferred_lft forever +inet 192.168.88.241/32 scope global dpdk0 + valid_lft forever preferred_lft forever +[root@dpvs-test]# ./bin/dpip -6 route show +inet6 2001::1/128 dev dpdk0 mtu 1500 scope host +inet6 2001::/64 src 2001::1 dev dpdk0 mtu 1500 scope link +[root@dpvs-test]# ./bin/ipvsadm -ln +IP Virtual Server version 0.0.0 (size=0) +Prot LocalAddress:Port Scheduler Flags + -> RemoteAddress:Port Forward Weight ActiveConn InActConn +TCP [2001::1]:80 wrr + -> 192.168.88.215:80 FullNat 1 0 0 +TCP [2001::1]:8080 wrr + -> 192.168.88.215:80 FullNat 1 0 0 +``` + +The denyset is empty, so both fullnat services can be accessed by both clients. Note that we can switch between the two simulated clients by changing the route source. + +```bash +[root@client]# ip route change 2001::1/128 dev eth0 src 2001::15 +[root@client]# ip -6 route show 2001::1 +2001::1 dev eth0 src 2001::15 metric 1024 +[root@client]# curl -g http://[2001::1]:80/ +nginx 192.168.88.215 +[root@client]# curl -g http://[2001::1]:8080/ +nginx 192.168.88.215 +[root@client]# ip route change 2001::1/128 dev eth0 src 2001::1:15 +[root@client]# ip -6 route show 2001::1 +2001::1 dev eth0 src 2001::1:15 metric 1024 +[root@client]# curl -g http://[2001::1]:80/ +nginx 192.168.88.215 +[root@client]# curl -g http://[2001::1]:8080/ +nginx 192.168.88.215 +``` + +As shown above, http requests from clients 2001::1 and 2001::1:15 to services [2001::1]:80 and [2001::1]:8080 are allowed and responsed by default. + +Now let's add a deny member to the denyset, which blocks clients from subnet 2001::1:0/112 to request service [2001::1]:80. + +```bash +[root@dpvs-test]# ./bin/dpip ipset add denyset 2001::1,tcp:80,2001::1:0/112 +[root@dpvs-test]# ./bin/dpip ipset list denyset +Name: denyset +Type: hash:ip,port,net +Header: family inet6 hashsize 1024 maxelem 65535 +Size in memory: 16536 +References: 1 +Number of entries: 1 +Members: +2001::1,tcp:80,2001::1:0/112 +``` + +Do the previous tests again, and we can see that http request from client 2001::1:15 to service [2001::1]:80 is refused, but to service [2001::1]:8080 is still OK, while http requests from client 2001::15 to both services also remains OK. + +```bash +[root@client]# ip route change 2001::1/128 dev eth0 src 2001::15 +[root@client]# ip -6 route show 2001::1 +2001::1 dev eth0 src 2001::15 metric 1024 +[root@client]# curl -g http://[2001::1]:80/ +nginx 192.168.88.215 +[root@client]# curl -g http://[2001::1]:8080/ +nginx 192.168.88.215 +[root@client]# ip route change 2001::1/128 dev eth0 src 2001::1:15 +[root@client]# curl -g http://[2001::1]:80/ +curl: (7) Failed connect to 2001::1:80; Connection timed out +[root@client]# curl -g http://[2001::1]:8080/ +nginx 192.168.88.215 +``` diff --git a/include/conf/ipset.h b/include/conf/ipset.h index 1ae1a639f..7a080a226 100644 --- a/include/conf/ipset.h +++ b/include/conf/ipset.h @@ -22,21 +22,104 @@ #ifndef __DPVS_IPSET_CONF_H__ #define __DPVS_IPSET_CONF_H__ +#include +#include "conf/inet.h" #include "conf/sockopts.h" -struct dp_vs_ipset_conf { - int af; - union inet_addr addr; +#define IPSET_MAXNAMELEN 32 +#define IPSET_MAXCOMLEN 32 + +#define IPSET_F_FORCE 0x0001 + +enum ipset_op { + IPSET_OP_ADD, + IPSET_OP_DEL, + IPSET_OP_TEST, + IPSET_OP_CREATE, + IPSET_OP_DESTROY, + IPSET_OP_FLUSH, + IPSET_OP_LIST, + IPSET_OP_MAX +}; + +struct ipset_option { + int family; + union { + struct { + bool comment; + int hashsize; + int maxelem; + } create; + struct { + bool nomatch; + } add; + }; +}; + +struct ipset_param { + char type[IPSET_MAXNAMELEN]; + char name[IPSET_MAXNAMELEN]; + char comment[IPSET_MAXCOMLEN]; + int opcode; + struct ipset_option option; + uint16_t flag; + + uint8_t proto; + uint8_t cidr; + struct inet_addr_range range; /* port in host byteorder */ + uint8_t mac[6]; + char iface[IFNAMSIZ]; + + /* for type with 2 nets */ + uint8_t cidr2; + struct inet_addr_range range2; + //uint8_t mac[2]; +}; + +struct ipset_member { + char comment[IPSET_MAXCOMLEN]; + + union inet_addr addr; + uint8_t cidr; + uint8_t proto; + uint16_t port; + uint8_t mac[6]; + char iface[IFNAMSIZ]; + bool nomatch; + + /* second net */ + union inet_addr addr2; + uint8_t cidr2; + uint16_t port2; }; -struct dp_vs_multi_ipset_conf { - int num; - struct dp_vs_ipset_conf ipset_conf[0]; +struct ipset_info { + char name[IPSET_MAXNAMELEN]; + char type[IPSET_MAXNAMELEN]; + bool comment; + + union { + struct ipset_bitmap_header { + struct inet_addr_range range; + uint8_t cidr; + } bitmap; + struct ipset_hash_header { + int hashsize; + int maxelem; + } hash; + }; + + int af; + size_t size; + int entries; + int references; + + void *members; }; -struct dp_vs_ipset_conf_array { - int nipset; - struct dp_vs_ipset_conf ips[0]; +struct ipset_info_array { + int nipset; + struct ipset_info infos[0]; } __attribute__((__packed__)); #endif /* __DPVS_IPSET_CONF_H__ */ diff --git a/include/conf/kni.h b/include/conf/kni.h new file mode 100644 index 000000000..88565ef02 --- /dev/null +++ b/include/conf/kni.h @@ -0,0 +1,51 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __DPVS_KNI_CONF_H__ +#define __DPVS_KNI_CONF_H__ + +#include +#include "conf/inet.h" + +enum kni_data_type { + KNI_DTYPE_ADDR_FLOW = 1, +}; + +struct kni_addr_flow_entry { + int af; + union inet_addr addr; +}; + +struct kni_addr_flow_info { + int nentries; + struct kni_addr_flow_entry entries[0]; +} __attribute__((__packed__)); + +struct kni_conf_param { + enum kni_data_type type; + char ifname[IFNAMSIZ]; + union { + struct kni_addr_flow_entry flow; + } data; +} __attribute__((__packed__)); + +struct kni_info { + int len; + struct kni_conf_param entries[0]; +} __attribute__((__packed__)); + +#endif /* __DPVS_KNI_CONF_H__ */ diff --git a/include/conf/neigh.h b/include/conf/neigh.h index 618cad2a7..bb70c7363 100644 --- a/include/conf/neigh.h +++ b/include/conf/neigh.h @@ -45,7 +45,7 @@ struct dp_vs_neigh_conf { char ifname[IFNAMSIZ]; uint8_t flag; uint8_t cid; -}__attribute__((__packed__)); +}__attribute__((__packed__, aligned(2))); struct dp_vs_neigh_conf_array { int neigh_nums; diff --git a/include/conf/sockopts.h b/include/conf/sockopts.h index 487af7356..3313b9937 100644 --- a/include/conf/sockopts.h +++ b/include/conf/sockopts.h @@ -123,11 +123,16 @@ enum { SOCKOPT_SET_WHTLST_FLUSH, SOCKOPT_GET_WHTLST_GETALL = 1300, + /* kni */ + SOCKOPT_SET_KNI_ADD = 1400, + SOCKOPT_SET_KNI_DEL, + SOCKOPT_SET_KNI_FLUSH, + SOCKOPT_GET_KNI_LIST = 1400, + /* ipset */ - SOCKOPT_SET_IPSET_ADD = 3300, - SOCKOPT_SET_IPSET_DEL, - SOCKOPT_SET_IPSET_FLUSH, - SOCKOPT_GET_IPSET_SHOW = 3300, + SOCKOPT_SET_IPSET = 3300, + SOCKOPT_GET_IPSET_TEST = 3300, + SOCKOPT_GET_IPSET_LIST, /* route6 */ SOCKOPT_SET_ROUTE6_ADD_DEL = 6300, diff --git a/include/conf/tc.h b/include/conf/tc.h index d9a4981fd..38f17bd7f 100644 --- a/include/conf/tc.h +++ b/include/conf/tc.h @@ -74,6 +74,7 @@ struct tc_cls_param { union { struct tc_cls_match_copt match; + struct tc_cls_ipset_copt set; } copt; } __attribute__((__packed__)); diff --git a/include/ctrl.h b/include/ctrl.h index 72f7df02a..98f652395 100644 --- a/include/ctrl.h +++ b/include/ctrl.h @@ -200,9 +200,6 @@ int msg_dump(const struct dpvs_msg *msg, char *buf, int len); #define MSG_TYPE_IPV6_STATS 16 #define MSG_TYPE_ROUTE6 17 #define MSG_TYPE_NEIGH_GET 18 -#define MSG_TYPE_IPSET_ADD 19 -#define MSG_TYPE_IPSET_DEL 20 -#define MSG_TYPE_IPSET_FLUSH 21 #define MSG_TYPE_IFA_GET 22 #define MSG_TYPE_IFA_SET 23 #define MSG_TYPE_IFA_SYNC 24 @@ -212,6 +209,7 @@ int msg_dump(const struct dpvs_msg *msg, char *buf, int len); #define MSG_TYPE_TC_QSCH_SET 28 #define MSG_TYPE_TC_CLS_GET 29 #define MSG_TYPE_TC_CLS_SET 30 +#define MSG_TYPE_IPSET_SET 40 #define MSG_TYPE_IPVS_RANGE_START 100 /* for svc per_core, refer to service.h*/ diff --git a/include/dpdk.h b/include/dpdk.h index 2fdcd418d..feb94f6e1 100644 --- a/include/dpdk.h +++ b/include/dpdk.h @@ -63,13 +63,15 @@ #include #endif +#ifdef CONFIG_DPVS_LOG #ifdef RTE_LOG -extern int dpvs_log(uint32_t level, uint32_t logtype, const char *func, int line, const char *format, ...); +extern int dpvs_log(uint32_t level, uint32_t logtype, const char *func, int line, + const char *format, ...) __rte_format_printf(5, 6); #undef RTE_LOG #define RTE_LOG(l, t, ...) \ dpvs_log(RTE_LOG_ ## l, \ RTE_LOGTYPE_ ## t, __func__, __LINE__, # t ": " __VA_ARGS__) #endif - +#endif #endif /* __DPVS_DPDK_H__ */ diff --git a/include/ipset.h b/include/ipset.h deleted file mode 100644 index 77e103cae..000000000 --- a/include/ipset.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * DPVS is a software load balancer (Virtual Server) based on DPDK. - * - * Copyright (C) 2021 iQIYI (www.iqiyi.com). - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ -#ifndef __DPVS_IPSET_H__ -#define __DPVS_IPSET_H__ - -#include -#include -#include - -#include "list.h" -#include "netif.h" -#include "conf/common.h" -#include "flow.h" - -#define RTE_LOGTYPE_IPSET RTE_LOGTYPE_USER1 - -#define IPSET_CFG_FILE_NAME "/etc/gfwip.conf" -#define IPSET_CFG_MEMBERS "members:" - -struct ipset_addr { - int af; - union inet_addr addr; -}; - -struct ipset_entry { - struct list_head list; - struct ipset_addr daddr; -}; - -int ipset_init(void); -int ipset_add(int af, union inet_addr *dest); -int ipset_del(int af, union inet_addr *dest); -int ipset_term(void); - -struct ipset_entry *ipset_addr_lookup(int af, union inet_addr *dest); - -#ifdef CONFIG_DPVS_IPSET_DEBUG -int ipset_list(void); -int ipset_test(void); -#endif - - -#endif diff --git a/include/ipset/bitops.h b/include/ipset/bitops.h new file mode 100644 index 000000000..05f2e4bf2 --- /dev/null +++ b/include/ipset/bitops.h @@ -0,0 +1,75 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __DPVS_IPSET_BITOPS_H__ +#define __DPVS_IPSET_BITOPS_H__ + +#include +#include +#include + +/* Defines */ +#define BIT_PER_LONG (CHAR_BIT * sizeof(unsigned long)) +#define BIT_MASK(idx) (1UL << ((idx) % BIT_PER_LONG)) +#define BIT_WORD(idx) ((idx) / BIT_PER_LONG) + +#define DIV_ROUND_UP(x,y) (((x) + (y) - 1) / (y)) +#define BITS_TO_LONGS(n) DIV_ROUND_UP(n, BIT_PER_LONG) + +/* Helpers */ +static inline void set_bit(unsigned idx, unsigned long *bmap) +{ + bmap[BIT_WORD(idx)] |= BIT_MASK(idx); +} + +static inline void clear_bit(unsigned idx, unsigned long *bmap) +{ + bmap[BIT_WORD(idx)] &= ~BIT_MASK(idx); +} + +static inline bool test_bit(unsigned idx, const unsigned long *bmap) +{ + return !!(bmap[BIT_WORD(idx)] & BIT_MASK(idx)); +} + +static inline bool test_and_set_bit(unsigned idx, unsigned long *bmap) +{ + if (test_bit(idx, bmap)) + return true; + + set_bit(idx, bmap); + + return false; +} + +static inline bool test_and_clear_bit(unsigned idx, unsigned long *bmap) +{ + if (test_bit(idx, bmap)) { + clear_bit(idx, bmap); + return true; + } + + return false; +} + +static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) +{ + unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memset(dst, 0, len); +} + +#endif diff --git a/include/ipset/ipset.h b/include/ipset/ipset.h new file mode 100644 index 000000000..68ded707a --- /dev/null +++ b/include/ipset/ipset.h @@ -0,0 +1,154 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __DPVS_IPSET_H__ +#define __DPVS_IPSET_H__ + +#include +#include +#include "list.h" +#include "netif.h" +#include "dpdk.h" +#include "conf/common.h" +#include "conf/ipset.h" +#include "ipvs/ipvs.h" + +#define IPSET +#define RTE_LOGTYPE_IPSET RTE_LOGTYPE_USER1 + +#define IPSET_ADT_MAX 3 + +struct ipset; + +struct bitmap_elem; +struct bitmap_map; + +/* add/del/test func prototype for ipset */ +typedef int (*ipset_adtfn)(struct ipset *set, void *value, uint16_t flag); + +struct ipset_type { + struct list_head l; + + char name[IPSET_MAXNAMELEN]; + + /* Create a set */ + int (*create)(struct ipset *set, struct ipset_param *param); + /* Destroy the set */ + void (*destroy)(struct ipset *set); + /* Flush the elements */ + void (*flush)(struct ipset *set); + /* List elements */ + void (*list)(struct ipset *set, struct ipset_info *info); + /* Low level test/add/del functions */ + ipset_adtfn *adtfn; +}; + +/* functions that are determined when the set is being created */ +struct ipset_type_variant { + /* test/add/del entries called by dpip */ + int (*adt)(int opcode, struct ipset *set, struct ipset_param *param); + /* Internal test function */ + int (*test)(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match); + /* Basic functions that each ipset type should implement partially */ + union { + struct { + int (*do_del)(struct bitmap_elem *e, struct bitmap_map *map); + int (*do_test)(struct bitmap_elem *e, struct bitmap_map *map, size_t dsize); + void (*do_list)(struct ipset *set, struct ipset_bitmap_header *header, + struct ipset_member *members); + } bitmap; + struct { + /* Type that contains 'net' element must implement */ + void (*do_netmask)(void *elem, uint8_t cidr, bool inner); + int (*do_compare)(const void *adt_elem, const void *set_elem); + void (*do_list)(struct ipset_member *members, void *elem, bool comment); + uint32_t (*do_hash)(void *data, int len, uint32_t mask); + } hash; + }; +}; + +struct ipset { + struct list_head list; + + char name[IPSET_MAXNAMELEN]; + struct ipset_type *type; // Set type + struct ipset_type_variant *variant; // Type specific functions + + uint32_t elements; // Number of elements of this set + size_t dsize; // Size of each element + int hash_len; // Length of hash data + int family; // Address family + int net_count; // Number of net elements(<= 2) + int references; // Reference count + bool comment; // Is comment enabled + void *data; // Type specific data +}; + +/* IPset APIs */ + +/* + * Function name : ipset_get + * Description : Get the set pointer by name + * Parameter : + * @name name of the set + * Return : pointer to the set - success + * NULL - fail + */ +struct ipset *ipset_get(const char *name); + +/* + * Function name : ipset_put + * Description : Put back the set + * Parameter : + * @set pointer to the IPset + */ +static inline void +ipset_put(struct ipset *set) +{ + set->references--; +} + +/* + * Function name : elem_in_set + * Description : Judge if element 'mbuf' is in the set + * Parameter : + * @set pointer to the IPset + * @mbuf pointer to the mbuf + * @dst_match true if to match dst addr/port in mbuf, otherwise false + * Return : 1 - in set + * 0 - NOT in set + */ +static inline int +elem_in_set(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + assert(set->variant->test); + + return set->variant->test(set, mbuf, dst_match); +} + +int ipset_ctrl_init(void); +int ipset_ctrl_term(void); + +int ipset_hash_init(void); + +int ipset_init(void); +int ipset_term(void); + +int ipset_local_action(struct ipset_param * param); +int ipset_do_list(const void *conf, void **out, size_t *outsize); + +#endif diff --git a/include/ipset/ipset_bitmap.h b/include/ipset/ipset_bitmap.h new file mode 100644 index 000000000..3636eb932 --- /dev/null +++ b/include/ipset/ipset_bitmap.h @@ -0,0 +1,47 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __DPVS_IPSET_BITMAP_H__ +#define __DPVS_IPSET_BITMAP_H__ + +#include +#include +#include "ipset.h" + +#define get_elem(extensions, id, dsize) \ + (void *)(extensions + (id) * (dsize)) + +/* each bitmap type should follow this order */ +struct bitmap_map { + size_t size; + uint32_t elements; + unsigned long *members; + unsigned char *extensions; +}; + +/* common bitmap elemnt difinition */ +struct bitmap_elem { + uint32_t id; +}; + +extern ipset_adtfn bitmap_adtfn[IPSET_ADT_MAX]; + +void bitmap_flush(struct ipset *set); +void bitmap_destroy(struct ipset *set); +void bitmap_list(struct ipset *set, struct ipset_info *data); + +#endif diff --git a/include/ipset/ipset_hash.h b/include/ipset/ipset_hash.h new file mode 100644 index 000000000..022b5efac --- /dev/null +++ b/include/ipset/ipset_hash.h @@ -0,0 +1,69 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __DPVS_IPSET_HASH_H__ +#define __DPVS_IPSET_HASH_H__ + +#include +#include "ipset.h" +#include "linux_ipv6.h" + +/* return value for hash.do_compare */ +enum HASH_COMPARE_RESULT { + COMPARE_INEQUAL = 0, + COMPARE_EQUAL_ACCEPT, + COMPARE_EQUAL_REJECT, +}; + +struct hash_type { + struct list_head *htable; /* the hash table */ + uint32_t hashsize; /* size of the hash table */ + uint32_t mask; /* mask of the hash size */ + uint32_t maxelem; /* max elements in the hash */ + uint32_t initval; /* random jhash init value */ + uint32_t cidr_map[129][2]; /* cidr map */ +}; + +struct hash_entry { + struct list_head list; /* list node */ + + void *elem; /* type specific data */ +}; + +extern ipset_adtfn hash_adtfn[IPSET_ADT_MAX]; + +void install_ipset_hash_keywords(void); + +/* common hash type functions */ +int hash_create(struct ipset *set, struct ipset_param *param); +void hash_flush(struct ipset *set); +void hash_destroy(struct ipset *set); +void hash_list(struct ipset *set, struct ipset_info *info); + +void hash_data_netmask4(void *elem, uint8_t cidr, bool inner); +void hash_data_netmask6(void *elem, uint8_t cidr, bool inner); +uint32_t jhash_hashkey(void *data, int len, uint32_t mask); + +static inline int hash_proto_support(uint16_t proto) +{ + return proto == IPPROTO_TCP || + proto == IPPROTO_UDP || + proto == IPPROTO_ICMP || + proto == IPPROTO_ICMPV6; +} + +#endif diff --git a/include/ipset/pfxlen.h b/include/ipset/pfxlen.h new file mode 100644 index 000000000..7b4b45995 --- /dev/null +++ b/include/ipset/pfxlen.h @@ -0,0 +1,73 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __DPVS_IPSET_PFXLEN_H__ +#define __DPVS_IPSET_PFXLEN_H__ + +#include +#include +#include +#include "conf/inet.h" + +/* Prefixlen maps, by Jan Engelhardt */ +extern const union nf_inet_addr ip_set_netmask_map[]; +extern const union nf_inet_addr ip_set_hostmask_map[]; + +static inline __be32 +ip_set_netmask(__u8 pfxlen) +{ + return ip_set_netmask_map[pfxlen].ip; +} + +static inline const __be32 * +ip_set_netmask6(__u8 pfxlen) +{ + return &ip_set_netmask_map[pfxlen].ip6[0]; +} + +static inline __u32 +ip_set_hostmask(__u8 pfxlen) +{ + return (__u32) ip_set_hostmask_map[pfxlen].ip; +} + +static inline const __be32 * +ip_set_hostmask6(__u8 pfxlen) +{ + return &ip_set_hostmask_map[pfxlen].ip6[0]; +} + +extern __u32 ip_set_range_to_cidr(__u32 from, __u32 to, __u8 *cidr); + +#define ip_set_mask_from_to(from, to, cidr) \ +do { \ + from &= ip_set_hostmask(cidr); \ + to = from | ~ip_set_hostmask(cidr); \ +} while (0) + +static inline void +ip6_netmask(union inet_addr *ip, __u8 prefix) +{ + __be32 *ip6 = ip->in6.__in6_u.__u6_addr32; + + ip6[0] &= ip_set_netmask6(prefix)[0]; + ip6[1] &= ip_set_netmask6(prefix)[1]; + ip6[2] &= ip_set_netmask6(prefix)[2]; + ip6[3] &= ip_set_netmask6(prefix)[3]; +} + +#endif diff --git a/include/ipvs/conn.h b/include/ipvs/conn.h index b80acabef..3aee76b00 100644 --- a/include/ipvs/conn.h +++ b/include/ipvs/conn.h @@ -131,8 +131,10 @@ struct dp_vs_conn { union inet_addr in_nexthop; /* to rs*/ union inet_addr out_nexthop; /* to client*/ +#ifdef CONFIG_DPVS_IPVS_STATS_DEBUG /* statistics */ struct dp_vs_conn_stats stats; +#endif /* synproxy related members */ struct dp_vs_seq syn_proxy_seq; /* seq used in synproxy */ @@ -154,7 +156,9 @@ struct dp_vs_conn { /* controll members */ struct dp_vs_conn *control; /* master who controlls me */ rte_atomic32_t n_control; /* number of connections controlled by me*/ +#ifdef CONFIG_DPVS_IPVS_STATS_DEBUG uint64_t ctime; /* create time */ +#endif /* connection redirect in fnat/snat/nat modes */ struct dp_vs_redirect *redirect; diff --git a/include/ipvs/service.h b/include/ipvs/service.h index 8c1adeb14..0edba60e7 100644 --- a/include/ipvs/service.h +++ b/include/ipvs/service.h @@ -31,7 +31,7 @@ #include "conf/match.h" #include "conf/service.h" -#define RTE_LOGTYPE_SERVICE RTE_LOGTYPE_USER3 +#define RTE_LOGTYPE_SERVICE RTE_LOGTYPE_USER1 /* DP_VS_SVC_F_XXX should always be the same with IP_VS_SVC_F_XXX */ #define DP_VS_SVC_F_PERSISTENT IP_VS_SVC_F_PERSISTENT diff --git a/include/kni.h b/include/kni.h index d9fcd95d0..cab329cb3 100644 --- a/include/kni.h +++ b/include/kni.h @@ -28,6 +28,19 @@ #define __DPVS_KNI_H__ #include #include "netif.h" +#include "netif_flow.h" + +#define MAX_KNI_FLOW 2 + +struct kni_addr_flow { + struct list_head node; + int af; + int nflows; + lcoreid_t kni_worker; + struct netif_port *dev; + union inet_addr addr; + struct netif_flow_handler flows[MAX_KNI_FLOW]; +}; /* * @dev - real device kni attach to. @@ -37,6 +50,8 @@ int kni_add_dev(struct netif_port *dev, const char *kniname); int kni_del_dev(struct netif_port *dev); int kni_init(void); +int kni_ctrl_init(void); +int kni_ctrl_term(void); static inline bool kni_dev_exist(const struct netif_port *dev) { diff --git a/include/log.h b/include/log.h index cfac74fd2..6e4d5a4ae 100644 --- a/include/log.h +++ b/include/log.h @@ -18,34 +18,17 @@ #ifndef _DPVS_LOG_H_ #define _DPVS_LOG_H_ -#define DPVS_LOG_RING_SIZE_DEF 4096 -#define DPVS_LOG_RING_SIZE_MIN 256 -#define DPVS_LOG_RING_SIZE_MAX 524288 - - -#define TIMEZONE 0 -#define DAY (60*60*24) -#define YEARFIRST 2001 -#define YEARSTART (365*(YEARFIRST-1970) + 8) -#define YEAR400 (365*4*100 + (4*(100/4 - 1) + 1)) -#define YEAR100 (365*100 + (100/4 - 1)) -#define YEAR004 (365*4 + 1) -#define YEAR001 365 - #define LOG_SYS_TIME_LEN 20 - -#define LOG_INTERNAL_TIME 5 - -#define LOG_SLOW_INTERNAL_TIME (60*10) - #define DPVS_LOG_MAX_LINE_LEN 1024 - #define LOG_BUF_MAX_LEN 4096 -#define DPVS_LOG_POOL_SIZE_DEF 2097151 -#define DPVS_LOG_POOL_SIZE_MIN 65536 -#define DPVS_LOG_CACHE_SIZE_DEF 256 +#define DPVS_LOG_POOL_SIZE_DEF 16383 +#define DPVS_LOG_POOL_SIZE_MIN 1023 +#define DPVS_LOG_CACHE_SIZE_DEF 64 + +extern bool g_dpvs_log_async_mode; +extern uint8_t g_dpvs_log_tslen; struct dpvs_log { lcoreid_t cid; @@ -63,14 +46,16 @@ typedef struct log_buf { } log_buf_t; typedef struct log_stats{ - int log_hash; + unsigned int log_hash; uint64_t log_begin; int slow; uint64_t slow_begin; uint32_t missed; } log_stats_t; -int dpvs_log(uint32_t level, uint32_t logtype, const char *func, int line, const char *format, ...); +int dpvs_log(uint32_t level, uint32_t logtype, const char *func, int line, + const char *format, ...) __rte_format_printf(5, 6); int log_slave_init(void); +void dpvs_set_log_pool_size(int size); #endif diff --git a/include/mbuf.h b/include/mbuf.h index a08cf1bdc..a8ccde221 100644 --- a/include/mbuf.h +++ b/include/mbuf.h @@ -157,4 +157,49 @@ static inline void mbuf_userdata_reset(struct rte_mbuf *m) int mbuf_init(void); +/* + * Return a pointer to L2 header, and set mbuf->l2_len. + * The start of data in the mbuf should be L2 data. + * It assumes that L2 header is in the first seg if the mbuf is not continuous. + * Only support outer headers for tunnelling packets. + * */ +void *mbuf_header_l2(struct rte_mbuf *mbuf); + +/* + * Return a pointer to L3 header, and set mbuf->l3_len. + * The start of data in the mbuf should be L2 data. + * It assumes that L3 header is in the first seg if the mbuf is not continuous. + * Only support outer headers for tunnelling packets. + * */ +void *mbuf_header_l3(struct rte_mbuf *mbuf); + +/* + * Return a pointer to L4 header, and set mbuf->l4_len. + * The start of data in the mbuf should be L2 data. + * It assumes that L4 header is in the first seg if the mbuf is not continuous. + * Only support outer headers for tunnelling packets. + * */ +void *mbuf_header_l4(struct rte_mbuf *mbuf); + +/* + * Return ether type (ETHER_TYPE_XXX) in the mbuf. + * The start of data in the mbuf should be L2 data, + * and vlan is ignored. + * Only support outer headers for tunnelling packets. + * */ +uint16_t mbuf_ether_type(struct rte_mbuf *mbuf); + +/* + * Return socket address family (AF_INET | AF_INET6) derived from ether type + * in the mbuf. The function is based on "mbuf_ether_type". + * */ +int mbuf_address_family(struct rte_mbuf *mbuf); + +/* + * Return protocol type (IPPROTO_XX) in the mbuf. + * The start of data in the mbuf should be L2 data. + * Only support outer headers for tunnelling packets. + * */ +uint8_t mbuf_protocol(struct rte_mbuf *mbuf); + #endif /* __DP_VS_MBUF_H__ */ diff --git a/include/neigh.h b/include/neigh.h index f29f6f30d..bbc449063 100644 --- a/include/neigh.h +++ b/include/neigh.h @@ -45,7 +45,7 @@ #include "netif.h" #include "linux_ipv6.h" -#define RTE_LOGTYPE_NEIGHBOUR RTE_LOGTYPE_USER2 +#define RTE_LOGTYPE_NEIGHBOUR RTE_LOGTYPE_USER1 #define NEIGH_TAB_BITS 8 #define NEIGH_TAB_SIZE (1 << NEIGH_TAB_BITS) #define NEIGH_TAB_MASK (NEIGH_TAB_SIZE - 1) diff --git a/include/netif.h b/include/netif.h index c80a1746c..544708ec1 100644 --- a/include/netif.h +++ b/include/netif.h @@ -173,6 +173,7 @@ struct netif_kni { struct dpvs_timer kni_rtnl_timer; int kni_rtnl_fd; struct rte_ring *rx_ring; + struct list_head kni_flows; } __rte_cache_aligned; union netif_bond { @@ -320,6 +321,11 @@ static inline void *netif_priv(struct netif_port *dev) return (char *)dev + __ALIGN_KERNEL(sizeof(struct netif_port), NETIF_ALIGN); } +static inline const void *netif_priv_const(const struct netif_port *dev) +{ + return (const char *)dev + __ALIGN_KERNEL(sizeof(struct netif_port), NETIF_ALIGN); +} + static inline struct netif_tc *netif_tc(struct netif_port *dev) { return &dev->tc[rte_lcore_id()]; diff --git a/include/netif_flow.h b/include/netif_flow.h index 372a40899..956afc8f1 100644 --- a/include/netif_flow.h +++ b/include/netif_flow.h @@ -83,6 +83,45 @@ int netif_sapool_flow_del(struct netif_port *dev, lcoreid_t cid, __be16 port_base, __be16 port_mask, netif_flow_handler_param_t *flows); +/* + * Add kni flow rules. + * @param dev [in] + * Target device for the flow rules, supporting bonding/physical ports. + * @param cid [in] + * Lcore id to which to route the target flow. + * @param af [in] + * IP address family. + * @param addr [in] + * Dedicated IP address of kni interface. + * @param flows [in] + * Containing netif flow handlers to delete. + * + * @return + * DPVS error code. + */ +int netif_kni_flow_add(struct netif_port *dev, lcoreid_t cid, + int af, const union inet_addr *addr, + netif_flow_handler_param_t *flows); + +/* + * Delete kni flow rules. + * @param dev [in] + * Target device for the flow rules, supporting bonding/physical ports. + * @param cid [in] + * Lcore id to which to route the target flow. + * @param af [in] + * IP address family. + * @param addr [in] + * Dedicated IP address of kni interface. + * @param flows [out] + * Containing netif flow handlers if success, undefined otherwise. + * + * @return + * DPVS error code. + */ +int netif_kni_flow_del(struct netif_port *dev, lcoreid_t cid, + int af, const union inet_addr *addr, + netif_flow_handler_param_t *flows); /* * Flush all flow rules on a port. * * @param dev diff --git a/include/tc/cls.h b/include/tc/cls.h index 419acea72..dc0750598 100644 --- a/include/tc/cls.h +++ b/include/tc/cls.h @@ -24,6 +24,7 @@ #define __DPVS_TC_CLS_H__ #include "conf/common.h" #include "conf/match.h" +#include "conf/ipset.h" #ifdef __DPVS__ #include "dpdk.h" #endif /* __DPVS__ */ @@ -39,6 +40,12 @@ struct tc_cls_match_copt { struct tc_cls_result result; } __attribute__((__packed__)); +struct tc_cls_ipset_copt { + char setname[IPSET_MAXNAMELEN]; + bool dst_match; + struct tc_cls_result result; +} __attribute__((__packed__)); + #ifdef __DPVS__ struct tc_cls; diff --git a/kmod/toa/toa.c b/kmod/toa/toa.c index f6fb108a5..c8935a826 100644 --- a/kmod/toa/toa.c +++ b/kmod/toa/toa.c @@ -21,6 +21,15 @@ * Address include ip+port, Now support IPV4 and IPV6 */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,7,0) +#include +static struct kprobe kp = { + .symbol_name = "kallsyms_lookup_name" +}; + +typedef unsigned long (*kallsyms_lookup_name_t)(const char *name); +#endif + unsigned long sk_data_ready_addr = 0; #define TOA_NIPQUAD_FMT "%u.%u.%u.%u" @@ -681,29 +690,34 @@ inet6_getname_toa(struct socket *sock, struct sockaddr *uaddr, static inline int get_kernel_ipv6_symbol(void) { - inet6_stream_ops_p = - (struct proto_ops *)kallsyms_lookup_name("inet6_stream_ops"); - if (inet6_stream_ops_p == NULL) { - TOA_INFO("CPU [%u] kallsyms_lookup_name cannot find symbol inet6_stream_ops\n", - smp_processor_id()); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,7,0) + kallsyms_lookup_name_t kallsyms_lookup_name; + kallsyms_lookup_name = (kallsyms_lookup_name_t) kp.addr; +#endif - return -1; - } - ipv6_specific_p = - (struct inet_connection_sock_af_ops *)kallsyms_lookup_name("ipv6_specific"); - if (ipv6_specific_p == NULL) { - TOA_INFO("CPU [%u] kallsyms_lookup_name cannot find symbol ipv6_specific\n", - smp_processor_id()); - return -1; - } - tcp_v6_syn_recv_sock_org_pt = - (syn_recv_sock_func_pt)kallsyms_lookup_name("tcp_v6_syn_recv_sock"); - if (tcp_v6_syn_recv_sock_org_pt == NULL) { - TOA_INFO("CPU [%u] kallsyms_lookup_name cannot find symbol tcp_v6_syn_recv_sock\n", - smp_processor_id()); - return -1; + inet6_stream_ops_p = + (struct proto_ops *)kallsyms_lookup_name("inet6_stream_ops"); + if (inet6_stream_ops_p == NULL) { + TOA_INFO("CPU [%u] kallsyms_lookup_name cannot find symbol inet6_stream_ops\n", + smp_processor_id()); + + return -1; + } + ipv6_specific_p = + (struct inet_connection_sock_af_ops *)kallsyms_lookup_name("ipv6_specific"); + if (ipv6_specific_p == NULL) { + TOA_INFO("CPU [%u] kallsyms_lookup_name cannot find symbol ipv6_specific\n", + smp_processor_id()); + return -1; + } + tcp_v6_syn_recv_sock_org_pt = + (syn_recv_sock_func_pt)kallsyms_lookup_name("tcp_v6_syn_recv_sock"); + if (tcp_v6_syn_recv_sock_org_pt == NULL) { + TOA_INFO("CPU [%u] kallsyms_lookup_name cannot find symbol tcp_v6_syn_recv_sock\n", + smp_processor_id()); + return -1; } - return 0; + return 0; } #endif @@ -996,6 +1010,14 @@ static int toa_stats_seq_open(struct inode *inode, struct file *file) return single_open(file, toa_stats_show, NULL); } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) +static const struct proc_ops toa_stats_fops = { + .proc_open = toa_stats_seq_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; +#else static const struct file_operations toa_stats_fops = { .owner = THIS_MODULE, .open = toa_stats_seq_open, @@ -1003,6 +1025,7 @@ static const struct file_operations toa_stats_fops = { .llseek = seq_lseek, .release = single_release, }; +#endif #ifdef TOA_NAT64_ENABLE static struct nf_sockopt_ops toa_sockopts = { @@ -1019,13 +1042,21 @@ static struct nf_sockopt_ops toa_sockopts = { /* * TOA module init and destory */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) +static struct proc_dir_entry *proc_net_fops_create(struct net *net, + const char *name, mode_t mode, const struct proc_ops *proc_ops) +{ + return proc_create(name, mode, net->proc_net, proc_ops); +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0) static struct proc_dir_entry *proc_net_fops_create(struct net *net, const char *name, mode_t mode, const struct file_operations *fops) { return proc_create(name, mode, net->proc_net, fops); } +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0) static void proc_net_remove(struct net *net, const char *name) { remove_proc_entry(name, net->proc_net); @@ -1036,6 +1067,15 @@ static void proc_net_remove(struct net *net, const char *name) static int __init toa_init(void) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,7,0) + kallsyms_lookup_name_t kallsyms_lookup_name; + int ret = register_kprobe(&kp); + if (ret < 0) { + TOA_INFO("register_kprobe failed, returned %d\n", ret); + return 1; + } + kallsyms_lookup_name = (kallsyms_lookup_name_t) kp.addr; +#endif TOA_INFO("TOA " TOA_VERSION " by qlb of iqiyi.\n"); @@ -1110,6 +1150,10 @@ toa_exit(void) } #endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,7,0) + unregister_kprobe(&kp); +#endif + proc_net_remove(&init_net, "toa_stats"); if (NULL != ext_stats) { free_percpu(ext_stats); diff --git a/kmod/toa/toa.h b/kmod/toa/toa.h index 34a94be8c..e99fae80a 100644 --- a/kmod/toa/toa.h +++ b/kmod/toa/toa.h @@ -38,6 +38,9 @@ #include #include #include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,7,0) +#include +#endif #define TOA_VERSION "2.0.0.0" diff --git a/kmod/uoa/example/udp_serv.c b/kmod/uoa/example/udp_serv.c index b67b05a88..5149d0b61 100644 --- a/kmod/uoa/example/udp_serv.c +++ b/kmod/uoa/example/udp_serv.c @@ -41,7 +41,8 @@ #define MAX_SUPP_AF 2 #define MAX_EPOLL_EVENTS 2 #define SA struct sockaddr -#define SERV_PORT 6000 + +static __u16 SERV_PORT = 6000; void handle_reply(int efd, int fd) { @@ -109,6 +110,7 @@ void handle_reply(int efd, int fd) len = sizeof(peer); sendto(fd, buff, n, 0, (SA *)&peer, len); } + fflush(stdout); } int main(int argc, char *argv[]) @@ -120,6 +122,10 @@ int main(int argc, char *argv[]) struct sockaddr_in local; struct sockaddr_in6 local6; + if (argc > 1) + SERV_PORT = atoi(argv[1]); + printf("start udp echo server on 0.0.0.0:%u\n", SERV_PORT); + if ((sockfd[0] = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { perror("Fail to create INET socket!\n"); exit(1); diff --git a/kmod/uoa/uoa.c b/kmod/uoa/uoa.c index a71eb529b..476786620 100644 --- a/kmod/uoa/uoa.c +++ b/kmod/uoa/uoa.c @@ -94,6 +94,10 @@ static int uoa_map_tab_bits = 12; module_param_named(uoa_map_tab_bits, uoa_map_tab_bits, int, 0444); MODULE_PARM_DESC(uoa_map_tab_bits, "UOA mapping table hash size"); +static int uoa_hook_forward = 0; +module_param_named(uoa_hook_forward, uoa_hook_forward, int, 0444); +MODULE_PARM_DESC(uoa_hook_forward, "also parse UOA data in netfilter FORWARD chain (INPUT chain only by default)"); + static int uoa_map_tab_size __read_mostly; static int uoa_map_tab_mask __read_mostly; @@ -818,10 +822,9 @@ static struct uoa_map *uoa_opp_rcv(__be16 af, void *iph, struct sk_buff *skb) if (AF_INET == af) { if (((struct iphdr *)iph)->ihl + (opplen >> 2) < 16) { ((struct iphdr *)iph)->ihl += (opplen >> 2); - memset(opph, opplen, IPOPT_NOOP); - /* need change it to parse transport layer */ ((struct iphdr *)iph)->protocol = opph->protocol; + memset(opph, IPOPT_NOOP, opplen); } else { pr_warn("IP header has no room to convert uoa data into option.\n"); } @@ -907,19 +910,33 @@ static unsigned int uoa_ip_local_in(unsigned int hooknum, static struct nf_hook_ops uoa_nf_hook_ops[] __read_mostly = { { .hook = uoa_ip_local_in, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC + 1, }, + { + // do NOT register unless module param `uoa_hook_forward` is enabled + .hook = uoa_ip_local_in, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_FORWARD, + .priority = NF_IP_PRI_LAST - 1, + }, }; static struct nf_hook_ops uoa_nf_hook_ops6[] __read_mostly = { { .hook = uoa_ip_local_in, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_IN, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC + 1, }, + { + // do NOT register unless module param `uoa_hook_forward` is enabled + .hook = uoa_ip_local_in, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_FORWARD, + .priority = NF_IP_PRI_LAST - 1, + }, }; static __init int uoa_init(void) @@ -942,20 +959,22 @@ static __init int uoa_init(void) */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0) err = nf_register_net_hooks(&init_net, uoa_nf_hook_ops, - ARRAY_SIZE(uoa_nf_hook_ops)); + uoa_hook_forward ? ARRAY_SIZE(uoa_nf_hook_ops) : ARRAY_SIZE(uoa_nf_hook_ops) - 1); if (err < 0) { pr_err("fail to register netfilter hooks.\n"); goto hook_failed; } err = nf_register_net_hooks(&init_net, uoa_nf_hook_ops6, - ARRAY_SIZE(uoa_nf_hook_ops6)); + uoa_hook_forward ? ARRAY_SIZE(uoa_nf_hook_ops6) : ARRAY_SIZE(uoa_nf_hook_ops6) - 1); #else - err = nf_register_hooks(uoa_nf_hook_ops, ARRAY_SIZE(uoa_nf_hook_ops)); + err = nf_register_hooks(uoa_nf_hook_ops, + uoa_hook_forward ? ARRAY_SIZE(uoa_nf_hook_ops) : ARRAY_SIZE(uoa_nf_hook_ops) - 1); if (err < 0) { pr_err("fail to register netfilter hooks.\n"); goto hook_failed; } - err = nf_register_hooks(uoa_nf_hook_ops6, ARRAY_SIZE(uoa_nf_hook_ops6)); + err = nf_register_hooks(uoa_nf_hook_ops6, + uoa_hook_forward ? ARRAY_SIZE(uoa_nf_hook_ops6) : ARRAY_SIZE(uoa_nf_hook_ops6) - 1); #endif if (err < 0) { pr_err("fail to register netfilter hooks.\n"); @@ -976,12 +995,14 @@ static __exit void uoa_exit(void) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0) nf_unregister_net_hooks(&init_net, uoa_nf_hook_ops, - ARRAY_SIZE(uoa_nf_hook_ops)); + uoa_hook_forward ? ARRAY_SIZE(uoa_nf_hook_ops) : ARRAY_SIZE(uoa_nf_hook_ops) - 1); nf_unregister_net_hooks(&init_net, uoa_nf_hook_ops6, - ARRAY_SIZE(uoa_nf_hook_ops6)); + uoa_hook_forward ? ARRAY_SIZE(uoa_nf_hook_ops6) : ARRAY_SIZE(uoa_nf_hook_ops6) - 1); #else - nf_unregister_hooks(uoa_nf_hook_ops, ARRAY_SIZE(uoa_nf_hook_ops)); - nf_unregister_hooks(uoa_nf_hook_ops6, ARRAY_SIZE(uoa_nf_hook_ops6)); + nf_unregister_hooks(uoa_nf_hook_ops, + uoa_hook_forward ? ARRAY_SIZE(uoa_nf_hook_ops) : ARRAY_SIZE(uoa_nf_hook_ops) - 1); + nf_unregister_hooks(uoa_nf_hook_ops6, + uoa_hook_forward ? ARRAY_SIZE(uoa_nf_hook_ops6) : ARRAY_SIZE(uoa_nf_hook_ops6) - 1); #endif synchronize_net(); diff --git a/scripts/dpdk-build.sh b/scripts/dpdk-build.sh index 9e7741292..8416120b0 100755 --- a/scripts/dpdk-build.sh +++ b/scripts/dpdk-build.sh @@ -106,5 +106,5 @@ echo -e "DPDK library installed successfully into directory: \033[32m$(pwd)/dpdk ## export dpdk lib echo -e "You can use this library in dpvs by running the command below:" echo -e "\033[32m" -echo -e "export PKG_CONFIG_PATH=$(pwd)/dpdklib/lib64/pkgconfig" +echo -e "export PKG_CONFIG_PATH=$(find $(pwd) -name pkgconfig)" echo -e "\033[0m" diff --git a/src/Makefile b/src/Makefile index 1ef63e5ea..64c9471b6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -50,6 +50,7 @@ CFLAGS += -Wall -Werror -Wstrict-prototypes -Wmissing-prototypes -mcmodel=medium ifeq ($(shell test $(GCC_VERSION) -ge 70 && echo 1), 1) CFLAGS += -Wno-format-truncation CFLAGS += -Wno-stringop-truncation + CFLAGS += -Wno-address-of-packed-member CFLAGS += -Wstringop-overflow=0 endif diff --git a/src/VERSION b/src/VERSION index 8b6c402ce..c545d6e10 100755 --- a/src/VERSION +++ b/src/VERSION @@ -1,33 +1,53 @@ -#!/bin/sh - +#!/bin/sh # program: dpvs -# Jul 28, 2021 -# -# Major changes: -# - Adapt dpvs to dpdk 20.11 (dpdk-stable-20.11.1). -# - Create branch DPVS-1.8-LTS to support dpdk 18.11. -# - Obsolete supports for dpdk 17.11. -# -# Featurs: -# - Dpvs: Add netif_flow module using generic flow api (rte_flow), and replace flow director with rte_flow. -# - Dpvs: Replace mbuf userdata with mbuf dynfields. -# - Dpvs: Adapt dpvs to several renamed type names in dpdk 20.11. -# - Dpvs: Update Makefiles to support dpdk 20.11. -# - Dpvs: Add config option "dedicated_queues" for bonding mode 4 (802.3ad). -# - Dpdk: Add helper script to facilitate dpdk build. -# - Dpdk: Porting patches to dpdk 20.11 and remove patches of previous dpdk versions (18.11, 17.11). -# - Dpdk: Patch dpdk ixgbe pmd driver to support dpvs's flow api. -# - Dpdk: Patch dpdk bonding mode 4 for mlx5 to fix crash problem when debug. -# - Keeaplived: Add UDP_CHECK health checker. -# - Docs: Refine tutorial doc of section 'Full-NAT with Keepalived (one-arm)'. -# - Docs: Update docs for dpvs use with dpdk 20.11. -# - Ci: Update dpvs ci to support dpdk 20.11. -# -# Bugfix: -# - Dpvs: Fix ipvs rr/wrr/wlc problem of uneven load distribution across dests. -# - Dpvs: Fix bonding mode 4 problem caused by LACP failure. +# Jul 19, 2022 # export VERSION=1.9 -export RELEASE=0 +export RELEASE=2 echo $VERSION-$RELEASE + +## Features +#* Dpvs: Add ipset framework and 12 set types. +#* Dpvs: Add an ipset based tc classifier -- tc_cls_ipset. +#* Dpvs: Add l2/l3/l4 header parse apis for mbuf. +#* Dpvs: Add config option "dedicated_queues" for bonding mode 4 (802.3ad). +#* Dpvs: Isolate kni ingress traffic using kni address flow. +#* Dpvs: Update rss reta table according to configured workers after device bootup. +#* Dpvs: Expire quiescent connections after realserver was removed. +#* Dpvs: Make async log mempool size and log timestamp configurable. +#* Dpvs: Enable dpvs log only when macro CONFIG_DPVS_LOG is defined. +#* Dpvs: Make debug fields in dp_vs_conn configurable for memory optimization. +#* Toa: Support linux kernel verison v5.7.0+. +#* Keepalived: Add UDP_CHECK health checker. +#* Test: Add flame graph scripts for performance tests. +#* Test: Add performance benchmark tests of DPVS v1.9.2. +#* Docs: Update some docs. +# +## Bugfix +#* Dpvs: Fix a crash problem when timer is scheduled from within another timer's callback. +#* Dpvs: Fix a crash problem caused by incorrect mbuf pointer in IPv4 fragmentation. +#* Dpvs: Fix a crash problem caused by using unsafe list macro in conhash. +#* Dpvs: Fix the fullnat tcp forwarding failure problem when defer_rs_syn enabled. +#* Dpvs: Fix the ipvs rr/wrr/wlc problem of uneven load distribution across dests. +#* Dpvs: Fix the weight ratio update problem in conhash schedule algorithm. +#* Dpvs: Send tcp rst to both ends when snat conneciton expired. +#* Dpvs: Use unified dest validation in mh scheduling algorithm. +#* Dpvs: Fix the icmp sending failure problem when no route cached in mbuf. +#* Dpvs: Fix the compiling failure problem when icmp debug is enabled. +#* Dpvs: Fix the icmpv6 sending failure problem caused by incorrect mtu. +#* Dpvs: Fix icmpv6 checksum error caused by incorrect payload length endian in ipv6 header. +#* Dpvs: Fix the checksum problem caused by incorrect netif interface. +#* Dpvs: Fix the bonding mode 4 problem caused by LACP failure. +#* Dpvs: Fix the ipv6 neighbour ring full problem to kni isolated lcore. +#* Dpvs: Fix the list/edit problem for MATCH type service (snat service). +#* Dpvs: Fix incorrect oifname typo in MATCH type. +#* Dpvs: Fix the dpvs worker blocking problem when async log is enabled. +#* Dpvs: Fix some memory overflow problems when log messages are truncated. +#* Dpvs: Fix the msg sequence duplicated problem in ipvs allow list. +#* Dpvs: Fix the incorrect uoa client source port problem in fnat64. +#* Uoa: Fix uoa data parse problem of ipv4 opp, and add a module parameter to parse uoa data in netfilter forward chain. +#* Keepalived: Fix an exit problem when reload. +#* Keepalived: Fix some compile problems found on ubuntu. +#* Ipvsadm: Use correct flag in listing ipvs connections. diff --git a/src/cfgfile.c b/src/cfgfile.c index b8b53d108..e6e88da20 100644 --- a/src/cfgfile.c +++ b/src/cfgfile.c @@ -33,6 +33,7 @@ #include "ipvs/proto_tcp.h" #include "ipvs/proto_udp.h" #include "ipvs/synproxy.h" +#include "ipset/ipset_hash.h" #include "scheduler.h" typedef void (*sighandler_t)(int); @@ -71,6 +72,8 @@ static vector_t install_keywords(void) install_ipv4_keywords(); install_ip4_frag_keywords(); + install_ipset_hash_keywords(); + install_control_keywords(); install_keyword_root("ipvs_defs", NULL); diff --git a/src/config.mk b/src/config.mk index 930353ac6..9c291739a 100644 --- a/src/config.mk +++ b/src/config.mk @@ -27,6 +27,9 @@ CONFIG_PDUMP=y CFLAGS += -D DPVS_MAX_SOCKET=2 CFLAGS += -D DPVS_MAX_LCORE=64 +CFLAGS += -D CONFIG_DPVS_LOG +#CFLAGS += -D CONFIG_ICMP_REDIRECT_CORE + #CFLAGS += -D CONFIG_DPVS_NEIGH_DEBUG #CFLAGS += -D CONFIG_RECORD_BIG_LOOP #CFLAGS += -D CONFIG_DPVS_SAPOOL_DEBUG @@ -44,7 +47,8 @@ CFLAGS += -D DPVS_MAX_LCORE=64 #CFLAGS += -D CONFIG_NDISC_DEBUG #CFLAGS += -D CONFIG_MSG_DEBUG #CFLAGS += -D CONFIG_DPVS_MP_DEBUG -#CFLAGS += -D CONFIG_ICMP_REDIRECT_CORE +#CFLAGS += -D CONFIG_DPVS_NETIF_DEBUG +#CFLAGS += -D CONFIG_DPVS_ICMP_DEBUG # for ixgbe nic ifeq ($(CONFIG_IXGEB_PMD), y) diff --git a/src/global_conf.c b/src/global_conf.c index fd7628117..d4255b3f2 100644 --- a/src/global_conf.c +++ b/src/global_conf.c @@ -18,8 +18,7 @@ #include #include #include "global_conf.h" - -extern bool g_dpvs_log_async_mode; +#include "log.h" bool g_dpvs_pdump = false; @@ -137,6 +136,42 @@ static void log_async_mode_handler(vector_t tokens) FREE_PTR(str); } +static void log_with_timestamp_handler(vector_t tokens) +{ + char *str = set_value(tokens); + assert(str); + if (strcasecmp(str, "on") == 0) + g_dpvs_log_tslen = LOG_SYS_TIME_LEN; + else if (strcasecmp(str, "off") == 0) + g_dpvs_log_tslen = 0; + else + RTE_LOG(WARNING, CFG_FILE, "invalid log_with_timestamp %s\n", str); + + RTE_LOG(INFO, CFG_FILE, "log_with_timestamp = %s\n", g_dpvs_log_tslen > 0 ? "on" : "off"); + + FREE_PTR(str); +} + +static void log_async_pool_size_handler(vector_t tokens) +{ + char *str = set_value(tokens); + int poolsize; + + assert(str); + poolsize = atoi(str); + if (poolsize < DPVS_LOG_POOL_SIZE_MIN) { + RTE_LOG(WARNING, CFG_FILE, "invalid log_async_pool_size %s, using default %d\n", + str, DPVS_LOG_POOL_SIZE_DEF); + dpvs_set_log_pool_size(DPVS_LOG_POOL_SIZE_DEF); + } else { + is_power2(poolsize, 1, &poolsize); + RTE_LOG(INFO, CFG_FILE, "log_async_pool_size = %d (round to 2^n-1)\n", poolsize); + dpvs_set_log_pool_size(poolsize - 1); + } + + FREE_PTR(str); +} + #ifdef CONFIG_DPVS_PDUMP static void pdump_handler(vector_t tokens) { @@ -161,6 +196,8 @@ void install_global_keywords(void) install_keyword("log_level", log_level_handler, KW_TYPE_NORMAL); install_keyword("log_file", log_file_handler, KW_TYPE_NORMAL); install_keyword("log_async_mode", log_async_mode_handler, KW_TYPE_INIT); + install_keyword("log_with_timestamp", log_with_timestamp_handler, KW_TYPE_NORMAL); + install_keyword("log_async_pool_size", log_async_pool_size_handler, KW_TYPE_INIT); #ifdef CONFIG_DPVS_PDUMP install_keyword("pdump", pdump_handler, KW_TYPE_INIT); #endif diff --git a/src/icmp.c b/src/icmp.c index 81d7598d4..67de0ea35 100644 --- a/src/icmp.c +++ b/src/icmp.c @@ -175,11 +175,6 @@ void icmp_send(struct rte_mbuf *imbuf, int type, int code, uint32_t info) uint16_t csum; int room, err; - if (!rt) { - RTE_LOG(DEBUG, ICMP, "%s: no route.\n", __func__); - return; - } - /* no replies to physical multicast/broadcast */ if (etype != ETH_PKT_HOST) { RTE_LOG(DEBUG, ICMP, "%s: phy-multi/broadcast.\n", __func__); @@ -220,7 +215,7 @@ void icmp_send(struct rte_mbuf *imbuf, int type, int code, uint32_t info) } /* determing source address */ - if (rt->flag & RTF_LOCALIN) { /* original pkt's dest is us ? */ + if (rt && rt->flag & RTF_LOCALIN) { /* original pkt's dest is us ? */ saddr.s_addr = iph->dst_addr; } else { /* linux select IP of ingress iface only when param diff --git a/src/inet.c b/src/inet.c index b8494f6b8..d5ebcdea1 100644 --- a/src/inet.c +++ b/src/inet.c @@ -28,7 +28,6 @@ #include "icmp.h" #include "icmp6.h" #include "inetaddr.h" -#include "ipset.h" #define INET #define RTE_LOGTYPE_INET RTE_LOGTYPE_USER1 @@ -82,8 +81,6 @@ int inet_init(void) { int err; - if ((err = ipset_init()) != 0) - return err; if ((err = neigh_init()) != 0) return err; if ((err = route_init()) != 0) @@ -126,8 +123,6 @@ int inet_term(void) return err; if ((err = neigh_term()) != 0) return err; - if ((err = ipset_term()) != 0) - return err; return EDPVS_OK; } diff --git a/src/inetaddr.c b/src/inetaddr.c index 13081e4e4..981cc7d96 100644 --- a/src/inetaddr.c +++ b/src/inetaddr.c @@ -769,7 +769,7 @@ static int ifa_expire(void *arg) err = inet_addr_del(ifa->af, ifa->idev->dev, &ifa->addr, ifa->plen); if (err != EDPVS_OK) { - RTE_LOG(ERR, IFA, "inet_addr_del failed\n", __func__); + RTE_LOG(ERR, IFA, "%s: inet_addr_del failed\n", __func__); return DTIMER_OK; } @@ -1615,8 +1615,8 @@ static int ifaddr_get_verbose(struct inet_device *idev, struct inet_addr_data_ar sizeof(union inet_addr)) != 0) { RTE_LOG(WARNING, IFA, "%s: ifa addr does not match -- master=%X, " "slave[%02d]=%X\n", __func__, - array->addrs[ii].ifa_entry.addr, cur->cid, - arrmsg->addrs[ii].ifa_entry.addr); + array->addrs[ii].ifa_entry.addr.in.s_addr, cur->cid, + arrmsg->addrs[ii].ifa_entry.addr.in.s_addr); } if (off >= ifa_cnt) break; diff --git a/src/ipset.c b/src/ipset.c deleted file mode 100644 index 762d6a185..000000000 --- a/src/ipset.c +++ /dev/null @@ -1,555 +0,0 @@ -/* - * DPVS is a software load balancer (Virtual Server) based on DPDK. - * - * Copyright (C) 2021 iQIYI (www.iqiyi.com). - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ -#include -#include -#include -#include -#include -#include -#include "ipset.h" -#include "conf/ipset.h" -#include "ctrl.h" -#include "conf/common.h" -#include "parser/parser.h" - -#define IPSET_TAB_SIZE (1<<8) -#define IPSET_TAB_MASK (IPSET_TAB_SIZE - 1) - -#define this_ipset_lcore (RTE_PER_LCORE(ipset_lcore)) -#define this_ipset_table_lcore (this_ipset_lcore.ipset_table) -#define this_num_ipset (RTE_PER_LCORE(num_ipset)) - -struct ipset_lcore{ - struct list_head ipset_table[IPSET_TAB_SIZE]; -}; - -static RTE_DEFINE_PER_LCORE(struct ipset_lcore, ipset_lcore); -static RTE_DEFINE_PER_LCORE(uint32_t, num_ipset); - -static inline unsigned int ipset_addr_hash(int af, union inet_addr *addr) -{ - uint32_t addr_fold; - - addr_fold = inet_addr_fold(af, addr); - - if (!addr_fold) { - RTE_LOG(DEBUG, IPSET, "%s: IP proto not support.\n", __func__); - return 0; - } - - return rte_be_to_cpu_32(addr_fold)&IPSET_TAB_MASK; -} - - -static struct ipset_entry *ipset_new_entry(int af, union inet_addr *dest) -{ - struct ipset_entry *new_ipset=NULL; - if(!dest) - return NULL; - new_ipset = rte_zmalloc("new_ipset_entry", sizeof(struct ipset_entry), 0); - if (new_ipset == NULL){ - return NULL; - } - new_ipset->daddr.af = af; - memcpy(&new_ipset->daddr.addr, dest, sizeof(union inet_addr)); - return new_ipset; -} - - -int ipset_add(int af, union inet_addr *dest) -{ - unsigned int hashkey; - struct ipset_entry *ipset_node, *ipset_new; - - hashkey = ipset_addr_hash(af, dest); - - list_for_each_entry(ipset_node, &this_ipset_table_lcore[hashkey], list){ - if (ipset_node->daddr.af == af && inet_addr_equal(af, &ipset_node->daddr.addr, dest)) { - return EDPVS_EXIST; - } - } - - ipset_new = ipset_new_entry(af, dest); - if (!ipset_new){ - return EDPVS_NOMEM; - } - - list_add(&ipset_new->list, &this_ipset_table_lcore[hashkey]); - this_num_ipset++; - return EDPVS_OK; -} - -struct ipset_entry *ipset_addr_lookup(int af, union inet_addr *dest) -{ - unsigned int hashkey; - struct ipset_entry *ipset_node; - - hashkey = ipset_addr_hash(af, dest); - list_for_each_entry(ipset_node, &this_ipset_table_lcore[hashkey], list){ - if (ipset_node->daddr.af == af && inet_addr_equal(af, &ipset_node->daddr.addr, dest)) { - return ipset_node; - } - } - return NULL; -} - - -int ipset_del(int af, union inet_addr *dest) -{ - struct ipset_entry *ipset_node; - - ipset_node = ipset_addr_lookup(af, dest); - if (!ipset_node) - return EDPVS_NOTEXIST; - list_del(&ipset_node->list); - rte_free(ipset_node); - this_num_ipset--; - return EDPVS_OK; -} - -static int ipset_add_del(bool add, struct dp_vs_multi_ipset_conf *cf) -{ - lcoreid_t cid = rte_lcore_id(); - struct dpvs_msg *msg; - struct dp_vs_ipset_conf *ip_cf; - int err = 0; - int i, multi_ipset_msg_size; - - for (i = 0; i < cf->num; i++) { - ip_cf = &cf->ipset_conf[i]; - if (ip_cf->af != AF_INET && ip_cf->af != AF_INET6) - continue; - if (add) - err = ipset_add(ip_cf->af, &ip_cf->addr); - else - err = ipset_del(ip_cf->af, &ip_cf->addr); - } - - if (err != EDPVS_OK) { - return err; - } - - multi_ipset_msg_size = sizeof(struct dp_vs_multi_ipset_conf) - + cf->num*sizeof(struct dp_vs_ipset_conf); - if (add) - msg = msg_make(MSG_TYPE_IPSET_ADD, 0, DPVS_MSG_MULTICAST, - cid, multi_ipset_msg_size, cf); - else - msg = msg_make(MSG_TYPE_IPSET_DEL, 0, DPVS_MSG_MULTICAST, - cid, multi_ipset_msg_size, cf); - - err = multicast_msg_send(msg, 0/*DPVS_MSG_F_ASYNC*/, NULL); - if (err != EDPVS_OK) { - msg_destroy(&msg); - return err; - } - - msg_destroy(&msg); - return EDPVS_OK; -} - - -static int ipset_flush_lcore(void *arg) -{ - struct ipset_entry *ipset_node, *next; - int i; - if (!rte_lcore_is_enabled(rte_lcore_id())) - return EDPVS_DISABLED; - - for (i = 0; i < IPSET_TAB_SIZE; i++) { - list_for_each_entry_safe(ipset_node, next, &this_ipset_table_lcore[i], list){ - if (ipset_node) { - list_del(&ipset_node->list); - rte_free(ipset_node); - this_num_ipset--; - } - } - } - return 0; -} - -static int ipset_flush(void) -{ - lcoreid_t cid = rte_lcore_id(); - struct dpvs_msg *msg; - int err = 0; - - ipset_flush_lcore(NULL); - msg = msg_make(MSG_TYPE_IPSET_FLUSH, 0, DPVS_MSG_MULTICAST, - cid, 0, NULL); - - err = multicast_msg_send(msg, 0/*DPVS_MSG_F_ASYNC*/, NULL); - if (err != EDPVS_OK) { - msg_destroy(&msg); - return err; - } - msg_destroy(&msg); - - return EDPVS_OK; -} - -static int ipset_sockopt_set(sockoptid_t opt, const void *conf, size_t size) -{ - struct dp_vs_multi_ipset_conf *cf = (void *)conf; - int err; - - if (opt == SOCKOPT_SET_IPSET_FLUSH) - return ipset_flush(); - - if (!conf || size < sizeof(struct dp_vs_multi_ipset_conf) + sizeof(struct dp_vs_ipset_conf)) - return EDPVS_INVAL; - - switch (opt) { - case SOCKOPT_SET_IPSET_ADD: - err = ipset_add_del(true, cf); - break; - case SOCKOPT_SET_IPSET_DEL: - err = ipset_add_del(false, cf); - break; - default: - return EDPVS_NOTSUPP; - } - - return err; -} - -static int ipset_sockopt_get(sockoptid_t opt, const void *conf, size_t size, - void **out, size_t *outsize) -{ - size_t nips; - struct ipset_entry *ipset_node; - struct dp_vs_ipset_conf_array *array; - int i; - int off = 0; - - nips = this_num_ipset; - *outsize = sizeof(struct dp_vs_ipset_conf_array) + \ - nips * sizeof(struct dp_vs_ipset_conf); - *out = rte_calloc(NULL, 1, *outsize, 0); - if (!(*out)) - return EDPVS_NOMEM; - array = *out; - - for (i = 0; i < IPSET_TAB_SIZE; i++) { - list_for_each_entry(ipset_node, &this_ipset_table_lcore[i], list) { - if (off >= nips) - break; - memcpy(&array->ips[off].addr.in, &ipset_node->daddr.addr, sizeof(union inet_addr)); - array->ips[off++].af = ipset_node->daddr.af; - } - } - array->nipset = off; - - return 0; -} - -static int ipset_msg_process(bool add, struct dpvs_msg *msg) -{ - struct dp_vs_multi_ipset_conf *cf; - struct dp_vs_ipset_conf *ip_cf; - int err = 0; - int i; - - assert(msg); - - if (msg->len < sizeof(struct dp_vs_multi_ipset_conf) + sizeof(struct dp_vs_ipset_conf)) { - return EDPVS_INVAL; - } - - cf = (struct dp_vs_multi_ipset_conf *)msg->data; - - for (i = 0; i < cf->num; i++) { - ip_cf = &cf->ipset_conf[i]; - if (add) - err = ipset_add(ip_cf->af, &ip_cf->addr); - else - err = ipset_del(ip_cf->af, &ip_cf->addr); - } - - if (err != EDPVS_OK) - RTE_LOG(ERR, IPSET, "%s: fail to %s ipset.\n", __func__, add? "add":"del"); - - return err; - } - - -static int ipset_add_msg_cb(struct dpvs_msg *msg) -{ - return ipset_msg_process(true, msg); -} - -static int ipset_del_msg_cb(struct dpvs_msg *msg) -{ - return ipset_msg_process(false, msg); -} - -static int ipset_flush_msg_cb(struct dpvs_msg *msg) -{ - return ipset_flush_lcore(NULL); -} - -static int ipset_lcore_init(void *arg) -{ - int i; - - if (!rte_lcore_is_enabled(rte_lcore_id())) - return EDPVS_DISABLED; - - if (!netif_lcore_is_fwd_worker(rte_lcore_id())) - return EDPVS_NOTSUPP; - - for (i = 0; i < IPSET_TAB_SIZE; i++) - INIT_LIST_HEAD(&this_ipset_table_lcore[i]); - - return EDPVS_OK; -} - - -static struct dpvs_sockopts ipset_sockopts = { - .version = SOCKOPT_VERSION, - .set_opt_min = SOCKOPT_SET_IPSET_ADD, - .set_opt_max = SOCKOPT_SET_IPSET_FLUSH, - .set = ipset_sockopt_set, - .get_opt_min = SOCKOPT_GET_IPSET_SHOW, - .get_opt_max = SOCKOPT_GET_IPSET_SHOW, - .get = ipset_sockopt_get, -}; - -static int ipset_parse_conf_file(void) -{ - char *buf, ch; - struct dp_vs_multi_ipset_conf *ips = NULL; - int ip_num = 0, ipset_size = 0, ip_index = 0; - - buf = (char *) MALLOC(CFG_FILE_MAX_BUF_SZ); - if (buf == NULL) { - RTE_LOG(WARNING, IPSET, "no memory for ipset buf\n"); - return -1; - } - while (!feof(g_current_stream)) { - if ((ch=getc(g_current_stream)) == '\n') - ip_num++; - } - if (!ip_num) { - RTE_LOG(WARNING, IPSET, "no ip in the gfwip \n"); - FREE(buf); - return -1; - } - - RTE_LOG(DEBUG, IPSET, "gfwip list has %u ips\n", ip_num); - - fseek(g_current_stream, 0, SEEK_SET); - - ipset_size = sizeof(struct dp_vs_multi_ipset_conf) + ip_num*sizeof(struct dp_vs_ipset_conf); - ips = rte_calloc(NULL, 1, ipset_size, 0); - if (ips == NULL) { - RTE_LOG(WARNING, IPSET, "no memory for ipset conf\n"); - FREE(buf); - return -1; - } - ips->num = ip_num; - - while (read_line(buf, CFG_FILE_MAX_BUF_SZ)) { - if (inet_pton(AF_INET, buf, &ips->ipset_conf[ip_index].addr) <= 0) - ips->ipset_conf[ip_index].af = 0; - else - ips->ipset_conf[ip_index].af = AF_INET; - ip_index++; - } - if (ips != NULL) { - ipset_sockopt_set(SOCKOPT_SET_IPSET_ADD, ips, ipset_size); - rte_free(ips); - FREE(buf); - return 0; - } - - FREE(buf); - return -1; -} - -static void ipset_read_conf_file(char *conf_file) -{ - FILE *stream; - int i; - char *confpath; - char prev_path[CFG_FILE_MAX_BUF_SZ]; - - glob_t globbuf = { .gl_offs = 0, }; - glob(conf_file, 0, NULL, &globbuf); - - for (i = 0; i < globbuf.gl_pathc; i++) { - RTE_LOG(INFO, CFG_FILE, "Opening gfwip file '%s'.\n", globbuf.gl_pathv[i]); - stream = fopen(globbuf.gl_pathv[i], "r"); - if (!stream) { - RTE_LOG(WARNING, CFG_FILE, "Fail to open gfwip file '%s': %s.\n", - globbuf.gl_pathv[i], strerror(errno)); - return; - } - g_current_stream = stream; - if (getcwd(prev_path, CFG_FILE_MAX_BUF_SZ) != NULL) { - confpath= strdup(globbuf.gl_pathv[i]); - dirname(confpath); - if (chdir(confpath) == 0) { - if (ipset_parse_conf_file() < 0) { - RTE_LOG(ERR, IPSET, "Fail to parse gfwip conf\n"); - } - if (chdir(prev_path) != 0) - RTE_LOG(ERR, CFG_FILE, "Fail to chdir()\n"); - } - free(confpath); - } - fclose(stream); - } - - globfree(&globbuf); -} - -static int ipset_register_msg_cb(void) -{ - struct dpvs_msg_type msg_type; - int err; - - memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); - msg_type.type = MSG_TYPE_IPSET_ADD; - msg_type.mode = DPVS_MSG_MULTICAST; - msg_type.cid = rte_lcore_id(); - msg_type.unicast_msg_cb = ipset_add_msg_cb; - err = msg_type_mc_register(&msg_type); - if (err != EDPVS_OK) { - RTE_LOG(ERR, IPSET, "%s: fail to register add msg.\n", __func__); - return err; - } - - memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); - msg_type.type = MSG_TYPE_IPSET_DEL; - msg_type.mode = DPVS_MSG_MULTICAST; - msg_type.cid = rte_lcore_id(); - msg_type.unicast_msg_cb = ipset_del_msg_cb; - err = msg_type_mc_register(&msg_type); - if (err != EDPVS_OK) { - RTE_LOG(ERR, IPSET, "%s: fail to register del msg.\n", __func__); - return err; - } - - memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); - msg_type.type = MSG_TYPE_IPSET_FLUSH; - msg_type.mode = DPVS_MSG_MULTICAST; - msg_type.cid = rte_lcore_id(); - msg_type.unicast_msg_cb = ipset_flush_msg_cb; - err = msg_type_mc_register(&msg_type); - if (err != EDPVS_OK) { - RTE_LOG(ERR, IPSET, "%s: fail to register flush msg.\n", __func__); - return err; - } - return EDPVS_OK; -} - -static int ipset_unregister_msg_cb(void) -{ - struct dpvs_msg_type msg_type; - int err; - - memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); - msg_type.type = MSG_TYPE_IPSET_ADD; - msg_type.mode = DPVS_MSG_MULTICAST; - msg_type.cid = rte_lcore_id(); - msg_type.unicast_msg_cb = ipset_add_msg_cb; - err = msg_type_mc_unregister(&msg_type); - if (err != EDPVS_OK) { - RTE_LOG(ERR, IPSET, "%s: fail to register add msg.\n", __func__); - return err; - } - - memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); - msg_type.type = MSG_TYPE_IPSET_DEL; - msg_type.mode = DPVS_MSG_MULTICAST; - msg_type.cid = rte_lcore_id(); - msg_type.unicast_msg_cb = ipset_del_msg_cb; - err = msg_type_mc_unregister(&msg_type); - if (err != EDPVS_OK) { - RTE_LOG(ERR, IPSET, "%s: fail to register del msg.\n", __func__); - return err; - } - - memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); - msg_type.type = MSG_TYPE_IPSET_FLUSH; - msg_type.mode = DPVS_MSG_MULTICAST; - msg_type.cid = rte_lcore_id(); - msg_type.unicast_msg_cb = ipset_flush_msg_cb; - err = msg_type_mc_unregister(&msg_type); - if (err != EDPVS_OK) { - RTE_LOG(ERR, IPSET, "%s: fail to register flush msg.\n", __func__); - return err; - } - return EDPVS_OK; -} - -int ipset_init(void) -{ - int err, i; - lcoreid_t cid; - - this_num_ipset = 0; - - for (i = 0; i < IPSET_TAB_SIZE; i++) - INIT_LIST_HEAD(&this_ipset_table_lcore[i]); - - rte_eal_mp_remote_launch(ipset_lcore_init, NULL, CALL_MAIN); - RTE_LCORE_FOREACH_WORKER(cid) { - if ((err = rte_eal_wait_lcore(cid)) < 0) { - RTE_LOG(WARNING, IPSET, "%s: lcore %d: %s.\n", - __func__, cid, dpvs_strerror(err)); - } - } - - if ((err = ipset_register_msg_cb()) != EDPVS_OK) { - RTE_LOG(WARNING, IPSET, "fail to register ipset msg type.\n"); - ipset_unregister_msg_cb(); - return err; - } - - if ((err = sockopt_register(&ipset_sockopts)) != EDPVS_OK) { - ipset_unregister_msg_cb(); - return err; - } - ipset_read_conf_file(IPSET_CFG_FILE_NAME); - - return EDPVS_OK; -} - -int ipset_term(void) -{ - int err; - lcoreid_t cid; - - if ((err = ipset_unregister_msg_cb()) != EDPVS_OK) - return err; - if ((err = sockopt_unregister(&ipset_sockopts)) != EDPVS_OK) - return err; - - rte_eal_mp_remote_launch(ipset_flush_lcore, NULL, CALL_MAIN); - RTE_LCORE_FOREACH_WORKER(cid) { - if ((err = rte_eal_wait_lcore(cid)) < 0) { - RTE_LOG(WARNING, IPSET, "%s: lcore %d: %s.\n", - __func__, cid, dpvs_strerror(err)); - } - } - - return EDPVS_OK; -} - diff --git a/src/ipset/ipset_bitmap.c b/src/ipset/ipset_bitmap.c new file mode 100644 index 000000000..eaa6d376e --- /dev/null +++ b/src/ipset/ipset_bitmap.c @@ -0,0 +1,103 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "ipset/bitops.h" +#include "ipset/ipset_bitmap.h" + +#define do(adt, ...) set->variant->bitmap.do_##adt(__VA_ARGS__) + +static int +bitmap_add(struct ipset *set, void *value, uint16_t flag) +{ + struct bitmap_map *map = set->data; + struct bitmap_elem *e = value; + int ret = do(test, value, map, set->dsize); + + if (e->id >= map->elements) + return EDPVS_INVAL; + + /* To avoid same IP, different MAC or other elements */ + if (ret || test_bit(e->id, map->members)) { + if (flag & IPSET_F_FORCE) + return EDPVS_OK; + return EDPVS_EXIST; + } + + set_bit(e->id, map->members); + set->elements++; + return EDPVS_OK; +} + +static int +bitmap_del(struct ipset *set, void *value, uint16_t flag) +{ + struct bitmap_map *map = set->data; + struct bitmap_elem *e = value; + + if (e->id >= map->elements) + return EDPVS_INVAL; + + if (!do(del, value, map)) + return EDPVS_NOTEXIST; + + set->elements--; + return EDPVS_OK; +} + +static int +bitmap_test(struct ipset *set, void *value, uint16_t flag) +{ + struct bitmap_map *map = set->data; + struct bitmap_elem *e = value; + + if (e->id >= map->elements) + return 0; + + return do(test, value, map, set->dsize); +} + +ipset_adtfn bitmap_adtfn[IPSET_ADT_MAX] = { bitmap_add, bitmap_del, bitmap_test }; + +void +bitmap_flush(struct ipset *set) +{ + struct bitmap_map *map = set->data; + + bitmap_zero(map->members, map->elements); + set->elements = 0; +} + +void +bitmap_destroy(struct ipset *set) +{ + rte_free(set->data); +} + +void +bitmap_list(struct ipset *set, struct ipset_info *info) +{ + struct bitmap_map *map = set->data; + + strcpy(info->name, set->name); + strcpy(info->type, set->type->name); + info->comment = set->comment? true : false; + info->af = AF_INET; + info->entries = set->elements; + info->size = map->size; + + do(list, set, &info->bitmap, info->members); +} diff --git a/src/ipset/ipset_bitmap_ip.c b/src/ipset/ipset_bitmap_ip.c new file mode 100644 index 000000000..b30c79b9d --- /dev/null +++ b/src/ipset/ipset_bitmap_ip.c @@ -0,0 +1,211 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "ipset/bitops.h" +#include "ipset/ipset.h" +#include "ipset/pfxlen.h" +#include "ipset/ipset_bitmap.h" + +struct bitmap_ip { + size_t size; /* total size */ + uint32_t elements; /* number of max elements in the set */ + unsigned long *members; /* the set members */ + unsigned char *extensions; /* the extensions for each member */ + + uint32_t first_ip; /* host byte order, included in range */ + uint32_t last_ip; /* host byte order, included in range */ + uint8_t cidr; /* range cidr */ +}; + +typedef struct bitmap_elem elem_t; + +typedef struct bitmap_ip_ext { + char comment[IPSET_MAXCOMLEN]; +} ext_t; + +static uint32_t +ip_to_id(struct bitmap_ip *m, uint32_t ip) +{ + return ip - m->first_ip; +} + +static int +bitmap_ip_do_del(struct bitmap_elem *e, struct bitmap_map *map) +{ + return test_and_clear_bit(e->id, map->members); +} + +static int +bitmap_ip_do_test(struct bitmap_elem *e, struct bitmap_map *map, size_t dsize) +{ + return test_bit(e->id, map->members); +} + +static void +bitmap_ip_do_list(struct ipset *set, struct ipset_bitmap_header *header, + struct ipset_member *members) +{ + struct bitmap_ip *map = set->data; + struct ipset_member *member; + int id; + ext_t *ext; + + header->range.min_addr.in.s_addr = htonl(map->first_ip); + if (map->cidr) + header->cidr = map->cidr; + else + header->range.max_addr.in.s_addr = htonl(map->last_ip); + + member = members; + for (id = 0; id < map->elements; id++) { + if (test_bit(id, map->members)) { + member->addr.in.s_addr = htonl(map->first_ip + id); + if (set->comment) { + ext = get_elem(map->extensions, id, set->dsize); + rte_strlcpy(member->comment, ext->comment, IPSET_MAXCOMLEN); + } + member++; + } + } +} + +static int +bitmap_ip_adt(int opcode, struct ipset *set, struct ipset_param *param) +{ + int ret; + elem_t e; + ext_t *ext; + uint32_t ip = 0, ip_to = 0; + struct bitmap_ip *map = set->data; + ipset_adtfn adtfn = set->type->adtfn[opcode]; + + ip = ntohl(param->range.min_addr.in.s_addr); + + if (opcode == IPSET_OP_TEST) { + e.id = ip_to_id(map, ip); + return adtfn(set, &e, 0); + } + + if (param->cidr) { + ip_set_mask_from_to(ip, ip_to, param->cidr); + } else { + ip_to = ntohl(param->range.max_addr.in.s_addr); + } + for (; ip <= ip_to; ip++) { + if (ip < map->first_ip || ip > map->last_ip) + continue; + e.id = ip_to_id(map, ip); + ret = adtfn(set, &e, param->flag); + + if (ret) + return ret; + + if (set->comment && opcode == IPSET_OP_ADD) { + ext = get_elem(map->extensions, e.id, set->dsize); + rte_strlcpy(ext->comment, param->comment, IPSET_MAXCOMLEN); + } + } + return EDPVS_OK; +} + +static int +bitmap_ip_test(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + struct rte_ipv4_hdr *ip4hdr; + struct bitmap_ip *map = set->data; + + if (set->family != AF_INET || mbuf_address_family(mbuf) != AF_INET) + return 0; + + ip4hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip4hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + + if (dst_match) + e.id = ip_to_id(map, ntohl(ip4hdr->dst_addr)); + else + e.id = ip_to_id(map, ntohl(ip4hdr->src_addr)); + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant bitmap_ip_variant = { + .adt = bitmap_ip_adt, + .test = bitmap_ip_test, + .bitmap.do_del = bitmap_ip_do_del, + .bitmap.do_test = bitmap_ip_do_test, + .bitmap.do_list = bitmap_ip_do_list +}; + +static int +bitmap_ip_create(struct ipset *set, struct ipset_param *param) +{ + void *mem; + size_t size, map_size; + uint32_t elements; + struct bitmap_ip *map; + uint32_t first_ip = 0, last_ip = 0; + struct inet_addr_range *range = ¶m->range; + + first_ip = ntohl(range->min_addr.in.s_addr); + if (param->cidr) { + ip_set_mask_from_to(first_ip, last_ip, param->cidr); + } else { + last_ip = ntohl(param->range.max_addr.in.s_addr); + } + + elements = last_ip - first_ip + 1; + set->comment = param->option.create.comment? true : false; + set->dsize = set->comment? sizeof(ext_t) : 0; + set->variant = &bitmap_ip_variant; + + /* allocate memory */ + size = sizeof(*map); + map_size = BITS_TO_LONGS(elements) * sizeof(unsigned long); + size += map_size; + size += elements * set->dsize; + + mem = rte_zmalloc("ipset bitmap:ip", size, RTE_CACHE_LINE_SIZE); + if (unlikely(mem == NULL)) + return EDPVS_NOMEM; + /* memory layout : + | map | members | extensions | */ + map = mem; + map->size = size; + map->elements = elements; + map->members = mem + sizeof(*map); + map->extensions = mem + sizeof(*map) + map_size; + + map->first_ip = first_ip; + map->last_ip = last_ip; + map->cidr = param->cidr; + set->data = mem; + + return EDPVS_OK; +} + +struct ipset_type bitmap_ip_type = { + .name = "bitmap:ip", + .create = bitmap_ip_create, + .destroy = bitmap_destroy, + .flush = bitmap_flush, + .list = bitmap_list, + .adtfn = bitmap_adtfn +}; diff --git a/src/ipset/ipset_bitmap_ipmac.c b/src/ipset/ipset_bitmap_ipmac.c new file mode 100644 index 000000000..554ec03ba --- /dev/null +++ b/src/ipset/ipset_bitmap_ipmac.c @@ -0,0 +1,232 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "ipset/bitops.h" +#include "ipset/ipset.h" +#include "ipset/pfxlen.h" +#include "ipset/ipset_bitmap.h" + +struct bitmap_ipmac { + size_t size; /* total size */ + uint32_t elements; /* number of max elements in the set */ + unsigned long *members; /* the set members */ + unsigned char *extensions; /* the extensions for each member */ + + uint32_t first_ip; /* host byte order, included in range */ + uint32_t last_ip; /* host byte order, included in range */ + uint8_t cidr; /* range cidr */ +}; + +typedef struct bitmap_ipmac_elem { + uint32_t id; + uint8_t *mac; +} elem_t; + +typedef struct bitmap_ipmac_ext { + uint8_t mac[6]; + char comment[IPSET_MAXCOMLEN]; +} ext_t; + +static inline int +is_zero_mac_addr(const uint8_t *mac) +{ + const uint16_t *w = (const uint16_t *)mac; + + return !(w[0] | w[1] | w[2]); +} + +static uint32_t +ip_to_id(struct bitmap_ipmac *m, uint32_t ip) +{ + return ip - m->first_ip; +} + +static int +bitmap_ipmac_do_del(struct bitmap_elem *e, struct bitmap_map *map) +{ + return test_and_clear_bit(e->id, map->members); +} + +static int +bitmap_ipmac_do_test(struct bitmap_elem *elem, struct bitmap_map *map, size_t dsize) +{ + ext_t *ext; + elem_t *e = (elem_t *)elem; + + if (test_bit(e->id, map->members) == 0) + return 0; + + ext = get_elem(map->extensions, e->id, dsize); + + if (is_zero_mac_addr(ext->mac) || is_zero_mac_addr(e->mac)) + return 1; + return !memcmp(ext->mac, e->mac, 6)? 1 : 0; +} + +static void +bitmap_ipmac_do_list(struct ipset *set, struct ipset_bitmap_header *header, + struct ipset_member *members) +{ + struct bitmap_ipmac *map = set->data; + struct ipset_member *member; + int id; + ext_t *ext; + + header->range.min_addr.in.s_addr = htonl(map->first_ip); + if (map->cidr) + header->cidr = map->cidr; + else + header->range.max_addr.in.s_addr = htonl(map->last_ip); + + member = members; + for (id = 0; id < map->elements; id++) { + if (test_bit(id, map->members)) { + member->addr.in.s_addr = htonl(map->first_ip + id); + ext = get_elem(map->extensions, id, set->dsize); + rte_memcpy(member->mac, ext->mac, 6); + if (set->comment) + rte_strlcpy(member->comment, ext->comment, IPSET_MAXCOMLEN); + member++; + } + } +} + +static int +bitmap_ipmac_adt(int opcode, struct ipset *set, struct ipset_param *param) +{ + int ret; + elem_t e; + ext_t *ext; + uint32_t ip = 0; + struct bitmap_ipmac *map = set->data; + ipset_adtfn adtfn = set->type->adtfn[opcode]; + + ip = ntohl(param->range.min_addr.in.s_addr); + if (ip < map->first_ip || ip > map->last_ip) + return EDPVS_INVAL; + + e.id = ip_to_id(map, ip); + e.mac = param->mac; + + ret = adtfn(set, &e, param->flag); + + if (ret) + return ret; + + if (opcode == IPSET_OP_ADD) { + ext = get_elem(map->extensions, e.id, set->dsize); + rte_memcpy(ext->mac, param->mac, 6); + if (set->comment) + rte_strlcpy(ext->comment, param->comment, IPSET_MAXCOMLEN); + } + + return EDPVS_OK; +} + +static int +bitmap_ipmac_test(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + struct bitmap_ipmac *map = set->data; + struct rte_ether_hdr *ehdr; + struct rte_ipv4_hdr *ip4hdr; + + if (set->family != AF_INET || mbuf_address_family(mbuf) != AF_INET) + return 0; + ip4hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip4hdr)) + return 0; + ehdr = mbuf_header_l2(mbuf); + if (unlikely(!ehdr)) + return 0; + + memset(&e, 0, sizeof(e)); + + if (dst_match) { + e.id = ip_to_id(map, ntohl(ip4hdr->dst_addr)); + e.mac = &ehdr->d_addr.addr_bytes[0]; + } else { + e.id = ip_to_id(map, ntohl(ip4hdr->src_addr)); + e.mac = &ehdr->s_addr.addr_bytes[0]; + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant bitmap_ipmac_variant = { + .adt = bitmap_ipmac_adt, + .test = bitmap_ipmac_test, + .bitmap.do_del = bitmap_ipmac_do_del, + .bitmap.do_test = bitmap_ipmac_do_test, + .bitmap.do_list = bitmap_ipmac_do_list +}; + +static int +bitmap_ipmac_create(struct ipset *set, struct ipset_param *param) +{ + void *mem; + size_t size, map_size; + uint32_t elements; + struct bitmap_ipmac *map; + uint32_t first_ip = 0, last_ip = 0; + struct inet_addr_range *range = ¶m->range; + + first_ip = ntohl(range->min_addr.in.s_addr); + if (param->cidr) { + ip_set_mask_from_to(first_ip, last_ip, param->cidr); + } else { + last_ip = ntohl(param->range.max_addr.in.s_addr); + } + + elements = last_ip - first_ip + 1; + set->comment = param->option.create.comment? true : false; + set->dsize = set->comment? sizeof(ext_t) : offsetof(ext_t, comment); + set->variant = &bitmap_ipmac_variant; + + /* allocate memory */ + size = sizeof(*map); + map_size = BITS_TO_LONGS(elements) * sizeof(unsigned long); + size += map_size; + size += elements * set->dsize; + + mem = rte_zmalloc("ipset bitmap:ip,mac", size, RTE_CACHE_LINE_SIZE); + if (unlikely(mem == NULL)) + return EDPVS_NOMEM; + /* memory layout : + | map | members | extensions | */ + map = mem; + map->size = size; + map->elements = elements; + map->members = mem + sizeof(*map); + map->extensions = mem + sizeof(*map) + map_size; + + map->first_ip = first_ip; + map->last_ip = last_ip; + map->cidr = param->cidr; + set->data = mem; + + return EDPVS_OK; +} + +struct ipset_type bitmap_ipmac_type = { + .name = "bitmap:ip,mac", + .create = bitmap_ipmac_create, + .destroy = bitmap_destroy, + .flush = bitmap_flush, + .list = bitmap_list, + .adtfn = bitmap_adtfn +}; diff --git a/src/ipset/ipset_bitmap_port.c b/src/ipset/ipset_bitmap_port.c new file mode 100644 index 000000000..1c38b3f6e --- /dev/null +++ b/src/ipset/ipset_bitmap_port.c @@ -0,0 +1,212 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "ipset/bitops.h" +#include "ipset/ipset.h" +#include "ipset/pfxlen.h" +#include "ipset/ipset_bitmap.h" + +struct bitmap_port { + size_t size; /* total size */ + uint32_t elements; /* number of max elements in the set */ + unsigned long *members; /* the set members */ + unsigned char *extensions; /* the extensions for each member */ + + uint16_t first_port; /* host byte order, included in range */ + uint16_t last_port; /* host byte order, included in range */ +}; + +typedef struct bitmap_elem elem_t; + +typedef struct bitmap_port_ext { + char comment[IPSET_MAXCOMLEN]; +} ext_t; + +/* port layout + | TCP 1st-last | UDP 1st-last | + */ +static uint32_t +port_to_id(struct bitmap_port *m, uint16_t port, uint8_t proto) +{ + if (proto == IPPROTO_TCP) + return port - m->first_port; + else + return port - m->first_port + m->elements/2; +} + +static int +bitmap_port_do_del(struct bitmap_elem *e, struct bitmap_map *map) +{ + return test_and_clear_bit(e->id, map->members); +} + +static int +bitmap_port_do_test(struct bitmap_elem *e, struct bitmap_map *map, size_t dsize) +{ + return test_bit(e->id, map->members); +} + +static void +bitmap_port_do_list(struct ipset *set, struct ipset_bitmap_header *header, + struct ipset_member *members) +{ + struct bitmap_port *map = set->data; + struct ipset_member *member; + int id; + ext_t *ext; + + header->range.min_port = map->first_port; + header->range.max_port = map->last_port; + + member = members; + for (id = 0; id < map->elements; id++) { + if (test_bit(id, map->members)) { + if (id >= map->elements/2) { + member->port = map->first_port + id - map->elements/2; + member->proto = IPPROTO_UDP; + } else { + member->port = map->first_port + id; + member->proto = IPPROTO_TCP; + } + if (set->comment) { + ext = get_elem(map->extensions, id, set->dsize); + rte_strlcpy(member->comment, ext->comment, IPSET_MAXCOMLEN); + } + member++; + } + } +} + +static int +bitmap_port_adt(int opcode, struct ipset *set, struct ipset_param *param) +{ + int ret; + elem_t e; + ext_t *ext; + uint16_t port, port_from, port_to; + struct bitmap_port *map = set->data; + ipset_adtfn adtfn = set->type->adtfn[opcode]; + + port_from = param->range.min_port; + port_to = param->range.max_port; + + if (opcode == IPSET_OP_TEST) { + e.id = port_to_id(map, port_from, param->proto); + return adtfn(set, &e, 0); + } + + for (port = port_from; port >= port_from && port <= port_to; port++) { + if (port < map->first_port || port > map->last_port) + continue; + e.id = port_to_id(map, port, param->proto); + ret = adtfn(set, &e, param->flag); + + if (ret) + return ret; + + if (set->comment && opcode == IPSET_OP_ADD) { + ext = get_elem(map->extensions, e.id, set->dsize); + rte_strlcpy(ext->comment, param->comment, IPSET_MAXCOMLEN); + } + } + return EDPVS_OK; +} + +static int +bitmap_port_test(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + uint16_t proto; + struct rte_udp_hdr *l4hdr; + struct bitmap_port *map = set->data; + + proto = mbuf_protocol(mbuf); + if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) + return 0; + l4hdr = mbuf_header_l4(mbuf); + if (!l4hdr) + return 0; + + memset(&e, 0, sizeof(e)); + + if (dst_match) + e.id = port_to_id(map, ntohs(l4hdr->dst_port), proto); + else + e.id = port_to_id(map, ntohs(l4hdr->src_port), proto); + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant bitmap_port_variant = { + .adt = bitmap_port_adt, + .test = bitmap_port_test, + .bitmap.do_del = bitmap_port_do_del, + .bitmap.do_test = bitmap_port_do_test, + .bitmap.do_list = bitmap_port_do_list +}; + +static int +bitmap_port_create(struct ipset *set, struct ipset_param *param) +{ + void *mem; + size_t size, map_size; + uint32_t elements; + struct bitmap_port *map; + uint16_t first_port = 0, last_port = 0; + struct inet_addr_range *range = ¶m->range; + + first_port = range->min_port; + last_port = range->max_port; + + /* TCP and UDP both included */ + elements = (last_port - first_port + 1) * 2; + set->comment = param->option.create.comment? true : false; + set->dsize = set->comment? sizeof(ext_t) : 0; + set->variant = &bitmap_port_variant; + + /* allocate memory */ + size = sizeof(*map); + map_size = BITS_TO_LONGS(elements) * sizeof(unsigned long); + size += map_size; + size += elements * set->dsize; + + mem = rte_zmalloc("ipset bitmap:port", size, RTE_CACHE_LINE_SIZE); + if (unlikely(mem == NULL)) + return EDPVS_NOMEM; + /* memory layout : + | map | members | extensions | */ + map = mem; + map->size = size; + map->elements = elements; + map->members = mem + sizeof(*map); + map->extensions = mem + sizeof(*map) + map_size; + + map->first_port = first_port; + map->last_port = last_port; + set->data = mem; + + return EDPVS_OK; +} + +struct ipset_type bitmap_port_type = { + .name = "bitmap:port", + .create = bitmap_port_create, + .destroy = bitmap_destroy, + .flush = bitmap_flush, + .list = bitmap_list, + .adtfn = bitmap_adtfn +}; diff --git a/src/ipset/ipset_core.c b/src/ipset/ipset_core.c new file mode 100644 index 000000000..2d752be41 --- /dev/null +++ b/src/ipset/ipset_core.c @@ -0,0 +1,335 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include "ctrl.h" +#include "ipv4.h" +#include "ipv6.h" +#include "ipset/ipset.h" +#include "conf/common.h" + +#define this_ipsets_tbl (RTE_PER_LCORE(ip_sets)) + +#define IPSETS_TBL_BITS 7 +#define IPSETS_TBL_SIZE (1 << IPSETS_TBL_BITS) +#define IPSETS_TBL_MASK (IPSETS_TBL_SIZE - 1) + +/* Registered ipset types list */ +static struct list_head ipset_types; +/* Ip sets hash table */ +static RTE_DEFINE_PER_LCORE(struct list_head *, ip_sets); + +static struct ipset * +ipset_lookup(const char *name) +{ + struct ipset *set; + uint32_t hash; + + hash = rte_jhash(name, strlen(name), 0) & IPSETS_TBL_MASK; + + list_for_each_entry(set, &this_ipsets_tbl[hash], list) { + if (!strcmp(set->name, name)) + return set; + } + return NULL; +} + +struct ipset * +ipset_get(const char *name) +{ + struct ipset *set = ipset_lookup(name); + + if (set == NULL) + return NULL; + + set->references++; + return set; +} + +static struct ipset_type * +ipset_type_lookup(char *name) +{ + struct ipset_type *type; + + list_for_each_entry(type, &ipset_types, l) { + if (!strcmp(type->name, name)) + return type; + } + return NULL; +} + +static int +ipset_local_create(struct ipset_param *param) +{ + struct ipset *set; + struct ipset_type *type; + uint32_t hash; + int ret = 0; + struct ipset_option *opt = ¶m->option; + + if ((type = ipset_type_lookup(param->type)) == NULL) { + RTE_LOG(ERR, IPSET, "IP set type %s not supported.\n", param->type); + return EDPVS_NOTSUPP; + } + + if ((set = ipset_lookup(param->name)) != NULL) { + RTE_LOG(ERR, IPSET, "IP set %s already exists.\n", param->name); + return EDPVS_EXIST; + } + + set = rte_zmalloc("ip set", sizeof(struct ipset), RTE_CACHE_LINE_SIZE); + + rte_strlcpy(set->name, param->name, IPSET_MAXNAMELEN); + set->type = type; + + if (opt->family) + set->family = opt->family; + else + set->family = AF_INET; + + if (opt->create.comment) + set->comment = true; + + ret = set->type->create(set, param); + if (ret) + goto out; + + hash = rte_jhash(set->name, strlen(set->name), 0) & IPSETS_TBL_MASK; + list_add_tail(&set->list, &this_ipsets_tbl[hash]); + + return EDPVS_OK; + + out: + rte_free(set); + return ret; +} + +int ipset_local_action(struct ipset_param *param) +{ + struct ipset *set; + int opcode = param->opcode; + + if (opcode == IPSET_OP_CREATE) + return ipset_local_create(param); + + if ((set = ipset_lookup(param->name)) == NULL) { + return EDPVS_NOTEXIST; + } + + switch (opcode) { + case IPSET_OP_ADD: + case IPSET_OP_DEL: + case IPSET_OP_TEST: + return set->variant->adt(opcode, set, param); + case IPSET_OP_FLUSH: + set->type->flush(set); + return EDPVS_OK; + case IPSET_OP_DESTROY: + if (set->references != 0) + return EDPVS_BUSY; + set->type->destroy(set); + list_del(&set->list); + rte_free(set); + return EDPVS_OK; + default: + return EDPVS_NOTSUPP; + } +} + +int ipset_do_list(const void *conf, void **out, size_t *outsize) +{ + void *data, *ptr; + struct ipset *set; + struct ipset_param *param = (struct ipset_param *)conf; + struct ipset_info_array *array; + struct ipset_info *info; + int nipset = 0, nelem = 0, i = 0, j; + + /* list the specific set */ + if (strlen(param->name) != 0) { + if ((set = ipset_lookup(param->name)) == NULL) + return EDPVS_NOTEXIST; + + *outsize = sizeof(*array) + sizeof(struct ipset_info) + + set->elements * sizeof(struct ipset_member); + data = rte_zmalloc(NULL, *outsize, RTE_CACHE_LINE_SIZE); + if (data == NULL) + return EDPVS_NOMEM; + + array = (struct ipset_info_array *)data; + array->nipset = 1; + info = &array->infos[0]; + info->members = info + 1; + info->references = set->references; + + set->type->list(set, info); + + *out = data; + return EDPVS_OK; + } + + /* list all sets */ + /* obtain the total size */ + for (j = 0; j < IPSETS_TBL_SIZE; j++) { + list_for_each_entry(set, &this_ipsets_tbl[j], list) { + nipset++; + nelem += set->elements; + } + } + + /* allocate memory */ + *outsize = sizeof(*array) + nipset * sizeof(struct ipset_info) + + nelem * sizeof(struct ipset_member); + data = rte_zmalloc(NULL, *outsize, RTE_CACHE_LINE_SIZE); + if (data == NULL) + return EDPVS_NOMEM; + + array = (struct ipset_info_array *)data; + array->nipset = nipset; + /* Let the set do the actual listing job + Memory layout : + | array | info[0] | info[1] | ... | members[0] | members[1] | ... | + */ + ptr = data + sizeof(*array) + nipset * sizeof(*info); + for (j = 0; j < IPSETS_TBL_SIZE; j++) { + list_for_each_entry(set, &this_ipsets_tbl[j], list) { + info = &array->infos[i++]; + info->members = ptr; + info->references = set->references; + ptr += set->elements * sizeof(struct ipset_member); + set->type->list(set, info); + } + } + *out = data; + + return EDPVS_OK; +} + +static int +ipset_flush_lcore(void *arg) +{ + int i; + struct ipset *set; + + for (i = 0; i < IPSETS_TBL_SIZE; i++) { + list_for_each_entry(set, &this_ipsets_tbl[i], list) + set->type->destroy(set); + } + + if (this_ipsets_tbl) { + rte_free(this_ipsets_tbl); + this_ipsets_tbl = NULL; + } + + return EDPVS_OK; +} + +static int +ipset_lcore_init(void *arg) +{ + int i; + + if (!rte_lcore_is_enabled(rte_lcore_id())) + return EDPVS_DISABLED; + + this_ipsets_tbl = rte_zmalloc(NULL, + sizeof(struct list_head) * IPSETS_TBL_SIZE, + RTE_CACHE_LINE_SIZE); + + if (!this_ipsets_tbl) + return EDPVS_NOMEM; + + for (i = 0; i < IPSETS_TBL_SIZE; i++) + INIT_LIST_HEAD(&this_ipsets_tbl[i]); + + return EDPVS_OK; +} + +static void +ipset_type_register(struct ipset_type *type) +{ + list_add_tail(&type->l, &ipset_types); +} + +/* IPset types */ +extern struct ipset_type bitmap_ip_type, bitmap_ipmac_type, bitmap_port_type, + hash_ip_type, hash_net_type, hash_ipport_type, hash_netport_type, + hash_netportiface_type, hash_ipportip_type, hash_netportnet_type, + hash_ipportnet_type, hash_netportnetport_type; + +int ipset_init(void) +{ + int err; + lcoreid_t cid; + + INIT_LIST_HEAD(&ipset_types); + + ipset_type_register(&bitmap_ip_type); + ipset_type_register(&bitmap_ipmac_type); + ipset_type_register(&bitmap_port_type); + ipset_type_register(&hash_ip_type); + ipset_type_register(&hash_net_type); + ipset_type_register(&hash_ipport_type); + ipset_type_register(&hash_netport_type); + ipset_type_register(&hash_netportiface_type); + ipset_type_register(&hash_ipportip_type); + ipset_type_register(&hash_netportnet_type); + ipset_type_register(&hash_ipportnet_type); + ipset_type_register(&hash_netportnetport_type); + + if ((err = ipset_ctrl_init()) < 0) { + RTE_LOG(ERR, IPSET, "ipset ctrl init: %s.\n", dpvs_strerror(err)); + return err; + }; + + if ((err = ipset_hash_init()) < 0) { + RTE_LOG(ERR, IPSET, "ipset hash init: %s.\n", dpvs_strerror(err)); + return err; + } + + rte_eal_mp_remote_launch(ipset_lcore_init, NULL, CALL_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { + if ((err = rte_eal_wait_lcore(cid)) < 0) { + RTE_LOG(WARNING, IPSET, "%s: lcore %d: %s.\n", + __func__, cid, dpvs_strerror(err)); + } + } + + return EDPVS_OK; +} + +int ipset_term(void) +{ + int err; + lcoreid_t cid; + + if ((err = ipset_ctrl_term()) < 0) { + RTE_LOG(ERR, IPSET, "ipset ctrl term: %s.\n", dpvs_strerror(err)); + }; + + rte_eal_mp_remote_launch(ipset_flush_lcore, NULL, CALL_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { + if ((err = rte_eal_wait_lcore(cid)) < 0) { + RTE_LOG(WARNING, IPSET, "%s: lcore %d: %s.\n", + __func__, cid, dpvs_strerror(err)); + } + } + + return EDPVS_OK; +} diff --git a/src/ipset/ipset_ctrl.c b/src/ipset/ipset_ctrl.c new file mode 100644 index 000000000..6a9a5a24c --- /dev/null +++ b/src/ipset/ipset_ctrl.c @@ -0,0 +1,170 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "ctrl.h" +#include "dpdk.h" +#include "conf/sockopts.h" +#include "ipset/ipset.h" + +static uint32_t ipset_msg_seq(void) +{ + static uint32_t counter = 0; + return counter++; +} + +static int ipset_sockopt_check(const void *conf, size_t size, void **out, size_t *outsize) +{ + int *result; + struct ipset_param *param = (struct ipset_param *)conf; + + if (!conf || size < sizeof(struct ipset_param)) + return EDPVS_INVAL; + + if (unlikely(param->opcode != IPSET_OP_TEST)) + return EDPVS_INVAL; + + result = rte_zmalloc(NULL, sizeof(int), 0); + if (unlikely(result == NULL)) + return EDPVS_NOMEM; + + /* check on master lcore only */ + *result = ipset_local_action(param); + + *out = result; + *outsize = sizeof(*result); + return EDPVS_OK; +} + +static int ipset_sockopt_set(sockoptid_t opt, const void *conf, size_t size) +{ + struct ipset_param *param = (struct ipset_param *)conf; + struct dpvs_msg *msg; + int err; + + if (!conf || size < sizeof(struct ipset_param)) + return EDPVS_INVAL; + + if (unlikely(param->opcode == IPSET_OP_TEST)) + return EDPVS_INVAL; + + /* set master lcore */ + err = ipset_local_action(param); + if (err != EDPVS_OK) + return err; + + /* set slave lcores */ + msg = msg_make(MSG_TYPE_IPSET_SET, ipset_msg_seq(), DPVS_MSG_MULTICAST, + rte_lcore_id(), sizeof(struct ipset_param), param); + if (unlikely(!msg)) + return EDPVS_NOMEM; + + err = multicast_msg_send(msg, DPVS_MSG_F_ASYNC, NULL); + if (err != EDPVS_OK) { + msg_destroy(&msg); + return err; + } + + msg_destroy(&msg); + return EDPVS_OK; +} + +static int ipset_sockopt_get(sockoptid_t opt, const void *conf, size_t size, + void **out, size_t *outsize) +{ + switch(opt) { + case SOCKOPT_GET_IPSET_LIST: + return ipset_do_list(conf, out, outsize); + case SOCKOPT_GET_IPSET_TEST: + return ipset_sockopt_check(conf, size, out, outsize); + default: + return EDPVS_NOTSUPP; + } + + return EDPVS_OK; +} + +static struct dpvs_sockopts ipset_sockopts = { + .version = SOCKOPT_VERSION, + .set_opt_min = SOCKOPT_SET_IPSET, + .set_opt_max = SOCKOPT_SET_IPSET, + .set = ipset_sockopt_set, + .get_opt_min = SOCKOPT_GET_IPSET_TEST, + .get_opt_max = SOCKOPT_GET_IPSET_LIST, + .get = ipset_sockopt_get, +}; + +static int ipset_set_cb(struct dpvs_msg *msg) +{ + struct ipset_param *param; + + if (msg->len != sizeof(struct ipset_param)) + return EDPVS_INVAL; + param = (struct ipset_param *)msg->data; + + return ipset_local_action(param); +} + +struct dpvs_msg_type ipset_msg_types[] = { + { + .type = MSG_TYPE_IPSET_SET, + .prio = MSG_PRIO_NORM, + .mode = DPVS_MSG_MULTICAST, + .unicast_msg_cb = ipset_set_cb, + }, +}; + +int ipset_ctrl_init(void) +{ + int i, err; + + err = sockopt_register(&ipset_sockopts); + if (err != EDPVS_OK) + return err; + + for (i = 0; i < NELEMS(ipset_msg_types); i++) { + err = msg_type_mc_register(&ipset_msg_types[i]); + if (err != EDPVS_OK) + break; + } + if (err != EDPVS_OK) { + for (--i; i >= 0; i--) + msg_type_mc_unregister(&ipset_msg_types[i]); + sockopt_unregister(&ipset_sockopts); + return err; + } + + return EDPVS_OK; +} + +int ipset_ctrl_term(void) +{ + int i, err; + + for (i = 0; i < NELEMS(ipset_msg_types); i++) { + err = msg_type_mc_unregister(&ipset_msg_types[i]); + if (err != EDPVS_OK) + RTE_LOG(ERR, IPSET, "%s: fail to unregister ipset_msg_types[%d]\n", __func__, i); + } + + err = sockopt_unregister(&ipset_sockopts); + if (err != EDPVS_OK) { + RTE_LOG(ERR, IPSET, "%s: fail to unregister ipset_sockopts\n", __func__); + return err; + } + + return EDPVS_OK; +} diff --git a/src/ipset/ipset_hash.c b/src/ipset/ipset_hash.c new file mode 100644 index 000000000..e4b93e18f --- /dev/null +++ b/src/ipset/ipset_hash.c @@ -0,0 +1,395 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "conf/common.h" +#include "ipset/ipset_hash.h" +#include "ipset/pfxlen.h" +#include "parser/parser.h" + +#define DEF_HASHSIZE 1024 +#define DEF_MAXELEM 65535 + +#define do(act, ...) set->variant->hash.do_##act(__VA_ARGS__) + +/* hash mempool */ +#define IPSET_HASH_POOL_SIZE_MIN 65536 +#define IPSET_HASH_POOL_SIZE_DEF 262143 +#define IPSET_HASH_CACHE_SIZE_DEF 256 +/* this should be larger than the element of all types */ +#define HASH_ELEM_SIZE_MAX 128 +#define this_hash_cache (ipset_hash_cache[rte_socket_id()]) + +static struct rte_mempool *ipset_hash_cache[DPVS_MAX_SOCKET]; + +static int ipset_hash_pool_size = IPSET_HASH_POOL_SIZE_DEF; + +/* common hash element difinition + (hash type that contains net element must follow the order) */ +typedef struct hash_elem { + union inet_addr ip1; + uint8_t cidr; + union inet_addr ip2; + uint8_t cidr2; +} elem_t; + +void +hash_data_netmask4(void *elem, uint8_t cidr, bool inner) +{ + elem_t *e = (elem_t *)elem; + + if (inner) { + e->ip2.in.s_addr &= ip_set_netmask(cidr); + e->cidr2 = cidr; + } else { + e->ip1.in.s_addr &= ip_set_netmask(cidr); + e->cidr = cidr; + } +} + +void +hash_data_netmask6(void *elem, uint8_t cidr, bool inner) +{ + elem_t *e = (elem_t *)elem; + + if (inner) { + ip6_netmask(&e->ip2, cidr); + e->cidr2 = cidr; + } else { + ip6_netmask(&e->ip1, cidr); + e->cidr = cidr; + } +} + +uint32_t +jhash_hashkey(void *data, int len, uint32_t mask) +{ + return rte_jhash(data, len, 0) & mask; +} + +static int +hash_add(struct ipset *set, void *value, uint16_t flag) +{ + struct hash_type *htype = set->data; + struct hash_entry *hnode; + struct list_head *head; + void *obj, *elem; + uint32_t key; + elem_t *e; + + if (unlikely(set->elements >= htype->maxelem)) + return EDPVS_NOMEM; + + key = do(hash, value, set->hash_len, htype->mask); + head = &htype->htable[key]; + list_for_each_entry(hnode, head, list) { + if (do(compare, value, hnode->elem) != COMPARE_INEQUAL) { + if (!flag & IPSET_F_FORCE) + return EDPVS_EXIST; + //overwrite extension + rte_memcpy(hnode->elem + set->hash_len, + value + set->hash_len, set->dsize - set->hash_len); + return EDPVS_OK; + } + } + + /* obj memory layout + | hnode | elem | */ + rte_mempool_get(this_hash_cache, &obj); + if (unlikely(!obj)) + return EDPVS_NOMEM; + + memset(obj, 0, sizeof(struct hash_entry) + + HASH_ELEM_SIZE_MAX); + + hnode = (struct hash_entry *)obj; + list_add_tail(&hnode->list, head); + + elem = obj + sizeof(*hnode); + rte_memcpy(elem, value, set->dsize); + hnode->elem = elem; + set->elements++; + + /* update cidr map */ + e = (elem_t *)value; + if (set->net_count > 0) + htype->cidr_map[e->cidr][0]++; + if (set->net_count == 2) + htype->cidr_map[e->cidr2][1]++; + + return EDPVS_OK; +} + +static int +hash_del(struct ipset *set, void *value, uint16_t flag) +{ + struct hash_type *htype = set->data; + struct hash_entry *hnode, *next; + struct list_head *head; + uint32_t key; + elem_t *e; + + key = do(hash, value, set->hash_len, htype->mask); + head = &htype->htable[key]; + list_for_each_entry_safe(hnode, next, head, list) { + if (do(compare, value, hnode->elem) != COMPARE_INEQUAL) { + list_del(&hnode->list); + rte_mempool_put(this_hash_cache, hnode); + set->elements--; + + /* update cidr map */ + e = (elem_t *)value; + if (set->net_count > 0) + htype->cidr_map[e->cidr][0]--; + if (set->net_count == 2) + htype->cidr_map[e->cidr2][1]--; + + return EDPVS_OK; + } + } + return EDPVS_NOTEXIST; +} + +static inline int +do_test(struct ipset *set, struct hash_type *htype, void *elem) +{ + int res; + uint32_t key; + struct hash_entry *hnode; + struct list_head *head ; + + key = do(hash, elem, set->hash_len, htype->mask); + head = &htype->htable[key]; + list_for_each_entry(hnode, head, list) { + res = do(compare, elem, hnode->elem); + if (res == COMPARE_EQUAL_ACCEPT || + res == COMPARE_EQUAL_REJECT) + return res; + } + return COMPARE_INEQUAL; +} + +static int +test_cidrs(struct ipset *set, struct hash_type *htype, void *value) +{ + int i, j, res; + uint8_t host_mask = set->family == AF_INET? 32 : 128; + + if (set->net_count == 1) { + for (i = host_mask; i >= 0; i--) { + if (htype->cidr_map[i][0] <= 0) + continue; + do(netmask, value, i, false); + + res = do_test(set, htype, value); + if (res == COMPARE_EQUAL_ACCEPT) + return 1; + if (res == COMPARE_EQUAL_REJECT) // nomatch + return 0; + } + return 0; + } else { + elem_t *e = (elem_t *)value; + union inet_addr ip2_save = e->ip2; + for (i = host_mask; i >= 0; i--) { + e->ip2 = ip2_save; + if (htype->cidr_map[i][0] <= 0) + continue; + do(netmask, value, i, false); + for (j = host_mask; j >= 0; j--) { + if (htype->cidr_map[j][1] <= 0) + continue; + do(netmask, value, j, true); + + res = do_test(set, htype, value); + if (res == COMPARE_EQUAL_ACCEPT) + return 1; + if (res == COMPARE_EQUAL_REJECT) // nomatch + return 0; + } + } + return 0; + } +} + +static int +hash_test(struct ipset *set, void *value, uint16_t flag) +{ + struct hash_type *htype = set->data; + elem_t *e = (elem_t *)value; + /* If we test an IP address and not a network cidr, + * try all possible network sizes + */ + if ((set->net_count == 1 && !e->cidr) || + (set->net_count == 2 && !(e->cidr || e->cidr2))) { + + return test_cidrs(set, htype, value); + } + + if (do_test(set, htype, value) == COMPARE_EQUAL_ACCEPT) + return 1; + + return 0; +} + +ipset_adtfn hash_adtfn[IPSET_ADT_MAX] = { hash_add, hash_del, hash_test }; + +void +hash_flush(struct ipset *set) +{ + int i; + struct hash_type *htype = set->data; + struct hash_entry *hnode, *next; + + for (i = 0; i < htype->hashsize; i++) { + list_for_each_entry_safe(hnode, next, &htype->htable[i], list) { + list_del(&hnode->list); + rte_mempool_put(this_hash_cache, hnode); + set->elements--; + } + } + + assert(set->elements == 0); + memset(htype->cidr_map, 0, sizeof(htype->cidr_map)); +} + +void +hash_destroy(struct ipset *set) +{ + hash_flush(set); + rte_free(set->data); +} + +void +hash_list(struct ipset *set, struct ipset_info *info) +{ + int i; + struct hash_type *htype = set->data; + struct hash_entry *hnode; + struct ipset_member *member = info->members; + + strcpy(info->name, set->name); + strcpy(info->type, set->type->name); + info->comment = set->comment? true : false; + info->af = set->family; + info->entries = set->elements; + info->size = htype->hashsize * sizeof(struct list_head) + + (sizeof(struct hash_entry) + HASH_ELEM_SIZE_MAX) * set->elements; + + info->hash.hashsize = htype->hashsize; + info->hash.maxelem = htype->maxelem; + + for (i = 0; i < htype->hashsize; i++) { + list_for_each_entry(hnode, &htype->htable[i], list) { + do(list, member, hnode->elem, set->comment); + member++; + } + } +} + +/* common create func for hash type */ +int +hash_create(struct ipset *set, struct ipset_param *param) +{ + int i; + void *mem; + size_t size; + uint32_t hashsize; + struct hash_type *htype; + struct ipset_option *opt = ¶m->option; + + if (opt->create.hashsize) { + is_power2(opt->create.hashsize, 0, &opt->create.hashsize); + hashsize = opt->create.hashsize; + } else { + hashsize = DEF_HASHSIZE; + } + + /* allocate memory */ + size = sizeof(*htype) + hashsize * sizeof(struct list_head); + mem = rte_zmalloc("ipset hashtype", size, RTE_CACHE_LINE_SIZE); + if (unlikely(mem == NULL)) + return EDPVS_NOMEM; + /* memroy layout: + | htype | htable | */ + htype = mem; + htype->htable = mem + sizeof(*htype); + + htype->hashsize = hashsize; + htype->mask = htype->hashsize - 1; + + if (opt->create.maxelem) + htype->maxelem = opt->create.maxelem; + else + htype->maxelem = DEF_MAXELEM; + + for (i = 0; i < htype->hashsize; i++) + INIT_LIST_HEAD(&htype->htable[i]); + + set->data = mem; + + return EDPVS_OK; +} + +int +ipset_hash_init(void) +{ + int i; + char poolname[32]; + + for (i = 0; i < get_numa_nodes(); i++) { + snprintf(poolname, sizeof(poolname), "ipset_hash_pool_%d", i); + ipset_hash_cache[i] = rte_mempool_create(poolname, + IPSET_HASH_POOL_SIZE_DEF, + sizeof(struct hash_entry) + HASH_ELEM_SIZE_MAX, + IPSET_HASH_CACHE_SIZE_DEF, + 0, NULL, NULL, NULL, NULL, i, 0); + if (!ipset_hash_cache[i]) { + return EDPVS_NOMEM; + } + } + return EDPVS_OK; +} + +static void +ipset_hash_pool_size_handler(vector_t tokens) +{ + char *str = set_value(tokens); + int pool_size; + + assert(str); + + pool_size = atoi(str); + + if (pool_size < IPSET_HASH_POOL_SIZE_MIN) { + RTE_LOG(WARNING, IPSET, "invalid ipset_hash_pool_size %s, using default %d\n", + str, IPSET_HASH_POOL_SIZE_DEF); + ipset_hash_pool_size = IPSET_HASH_POOL_SIZE_DEF; + } else { + is_power2(pool_size, 1, &pool_size); + RTE_LOG(INFO, IPSET, "ipset_hash_pool_size = %d (round to 2^n-1)\n", pool_size); + ipset_hash_pool_size = pool_size - 1; + } + + FREE_PTR(str); +} + +void +install_ipset_hash_keywords(void) +{ + install_keyword_root("ipset_defs", NULL); + install_keyword("ipset_hash_pool_size", ipset_hash_pool_size_handler, KW_TYPE_INIT); +} diff --git a/src/ipset/ipset_hash_ip.c b/src/ipset/ipset_hash_ip.c new file mode 100644 index 000000000..4e114cc5a --- /dev/null +++ b/src/ipset/ipset_hash_ip.c @@ -0,0 +1,211 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "ipset/ipset_hash.h" +#include "ipset/pfxlen.h" + +typedef struct hash_ip_elem4 { + uint32_t ip; + + char comment[IPSET_MAXCOMLEN]; +} elem4_t; + +static int +hash_ip_data_equal4(const void *e1, const void *e2) +{ + return ((elem4_t *)e1)->ip == ((elem4_t *)e2)->ip; +} + +static void +hash_ip_do_list4(struct ipset_member *member, void *elem, bool comment) +{ + elem4_t *e = (elem4_t *)elem; + + member->addr.in.s_addr = e->ip; + if (comment) + rte_strlcpy(member->comment, e->comment, IPSET_MAXCOMLEN); +} + +static uint32_t +hash_ip_hashkey4(void *data, int len, uint32_t mask) +{ + return (((elem4_t *)data)->ip * 31) & mask; +} + +static int +hash_ip_adt4(int opcode, struct ipset *set, struct ipset_param *param) +{ + elem4_t e; + int ret; + uint32_t ip, ip_to; + + ipset_adtfn adtfn = set->type->adtfn[opcode]; + + if (param->option.family != AF_INET) + return EDPVS_INVAL; + + if (opcode == IPSET_OP_TEST) { + e.ip = param->range.min_addr.in.s_addr; + + return adtfn(set, &e, 0); + } + + if (set->comment && opcode == IPSET_OP_ADD) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + + ip = ntohl(param->range.min_addr.in.s_addr); + if (param->cidr) { + ip_set_mask_from_to(ip, ip_to, param->cidr); + } else { + ip_to = ntohl(param->range.max_addr.in.s_addr); + } + for (; ip <= ip_to; ip++) { + e.ip = htonl(ip); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + return EDPVS_OK; +} + +static int +hash_ip_test4(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem4_t e; + struct rte_ipv4_hdr *ip4hdr; + + if (set->family != AF_INET || mbuf_address_family(mbuf) != AF_INET) + return 0; + + ip4hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip4hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + + if (dst_match) + e.ip = ip4hdr->dst_addr; + else + e.ip = ip4hdr->src_addr; + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_ip_variant4 = { + .adt = hash_ip_adt4, + .test = hash_ip_test4, + .hash.do_compare = hash_ip_data_equal4, + .hash.do_list = hash_ip_do_list4, + .hash.do_hash = hash_ip_hashkey4 +}; + +typedef struct hash_ip_elem6 { + struct in6_addr ip; + + char comment[IPSET_MAXNAMELEN]; +} elem6_t; + +static int +hash_ip_data_equal6(const void *e1, const void *e2) +{ + return inet_addr_equal(AF_INET6, e1, e2); +} + +static void +hash_ip_do_list6(struct ipset_member *member, void *elem, bool comment) +{ + elem6_t *e = (elem6_t *)elem; + + member->addr.in6 = e->ip; + if (comment) + rte_strlcpy(member->comment, e->comment, IPSET_MAXCOMLEN); +} + +static int +hash_ip_adt6(int opcode, struct ipset *set, struct ipset_param *param) +{ + elem6_t e; + + ipset_adtfn adtfn = set->type->adtfn[opcode]; + + if (param->option.family != AF_INET6) + return EDPVS_INVAL; + + e.ip = param->range.min_addr.in6; + + if (set->comment && opcode == IPSET_OP_ADD) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + + return adtfn(set, &e, param->flag); +} + +static int +hash_ip_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem6_t e; + struct rte_ipv6_hdr *ip6hdr; + + if (set->family != AF_INET6 || mbuf_address_family(mbuf) != AF_INET6) + return 0; + + ip6hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip6hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + + if (dst_match) + memcpy(&e.ip, ip6hdr->dst_addr, sizeof(e.ip)); + else + memcpy(&e.ip, ip6hdr->src_addr, sizeof(e.ip)); + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_ip_variant6 = { + .adt = hash_ip_adt6, + .test = hash_ip_test6, + .hash.do_compare = hash_ip_data_equal6, + .hash.do_list = hash_ip_do_list6, + .hash.do_hash = jhash_hashkey +}; + +static int +hash_ip_create(struct ipset *set, struct ipset_param *param) +{ + hash_create(set, param); + if (param->option.family == AF_INET) { + set->dsize = sizeof(elem4_t); + set->hash_len = offsetof(elem4_t, comment); + set->variant = &hash_ip_variant4; + } else { + set->dsize = sizeof(elem6_t); + set->hash_len = offsetof(elem6_t, comment); + set->variant = &hash_ip_variant6; + } + + return EDPVS_OK; +} + +struct ipset_type hash_ip_type = { + .name = "hash:ip", + .create = hash_ip_create, + .destroy = hash_destroy, + .flush = hash_flush, + .list = hash_list, + .adtfn = hash_adtfn, +}; diff --git a/src/ipset/ipset_hash_ipport.c b/src/ipset/ipset_hash_ipport.c new file mode 100644 index 000000000..f94ce34a5 --- /dev/null +++ b/src/ipset/ipset_hash_ipport.c @@ -0,0 +1,291 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "ipset/ipset_hash.h" +#include "ipset/pfxlen.h" + +typedef struct hash_ipport_elem4 { + uint32_t ip; + uint16_t port; + uint8_t proto; + + char comment[IPSET_MAXCOMLEN]; +} elem4_t; + +static int +hash_ipport_data_equal4(const void *elem1, const void *elem2) +{ + elem4_t *e1 = (elem4_t *)elem1; + elem4_t *e2 = (elem4_t *)elem2; + + return e1->ip == e2->ip && e1->port == e2->port && e1->proto == e2->proto; +} + +static void +hash_ipport_do_list4(struct ipset_member *member, void *elem, bool comment) +{ + elem4_t *e = (elem4_t *)elem; + + member->port = ntohs(e->port); + member->proto = e->proto; + member->addr.in.s_addr = e->ip; + if (comment) + rte_strlcpy(member->comment, e->comment, IPSET_MAXCOMLEN); +} + +static uint32_t +hash_ipport_hashkey4(void *data, int len, uint32_t mask) +{ + elem4_t *e = (elem4_t *)data; + + return (e->ip * 31 + e->port * 31 + e->proto) & mask; +} + +static int +hash_ipport_adt4(int opcode, struct ipset *set, struct ipset_param *param) +{ + elem4_t e; + int ret; + uint16_t port; + uint32_t ip, ip_to; + ipset_adtfn adtfn = set->type->adtfn[opcode]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + if (opcode == IPSET_OP_TEST) { + e.ip = param->range.min_addr.in.s_addr; + e.proto = param->proto; + e.port = htons(param->range.min_port); + + return adtfn(set, &e, 0); + } + + if (set->comment && opcode == IPSET_OP_ADD) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + + ip = ntohl(param->range.min_addr.in.s_addr); + if (param->cidr) { + ip_set_mask_from_to(ip, ip_to, param->cidr); + } else { + ip_to = ntohl(param->range.max_addr.in.s_addr); + } + for (; ip <= ip_to; ip++) { + e.ip = htonl(ip); + e.proto = param->proto; + for (port = param->range.min_port; port >= param->range.min_port && + port <= param->range.max_port; port++) { + e.port = htons(port); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + } + return EDPVS_OK; +} + +static int +hash_ipport_test4(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem4_t e; + uint16_t proto; + struct rte_ipv4_hdr *ip4hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET || mbuf_address_family(mbuf) != AF_INET) + return 0; + + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip4hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip4hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + + if (dst_match) { + e.ip = ip4hdr->dst_addr; + if (l4hdr) + e.port = l4hdr->dst_port; + } else { + e.ip = ip4hdr->src_addr; + if (l4hdr) + e.port = l4hdr->src_port; + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_ipport_variant4 = { + .adt = hash_ipport_adt4, + .test = hash_ipport_test4, + .hash.do_compare = hash_ipport_data_equal4, + .hash.do_list = hash_ipport_do_list4, + .hash.do_hash = hash_ipport_hashkey4 +}; + +typedef struct hash_ipport_elem6 { + struct in6_addr ip; + uint16_t port; + uint8_t proto; + + char comment[IPSET_MAXCOMLEN]; +} elem6_t; + +static int +hash_ipport_data_equal6(const void *elem1, const void *elem2) +{ + elem6_t *e1 = (elem6_t *)elem1; + elem6_t *e2 = (elem6_t *)elem2; + + return !memcmp(e1->ip.s6_addr, e2->ip.s6_addr, 16) && + e1->port == e2->port && e1->proto == e2->proto; +} + +static void +hash_ipport_do_list6(struct ipset_member *member, void *elem, bool comment) +{ + elem6_t *e = (elem6_t *)elem; + + member->port = ntohs(e->port); + member->proto = e->proto; + member->addr.in6 = e->ip; + if (comment) + rte_strlcpy(member->comment, e->comment, IPSET_MAXCOMLEN); +} + +static int +hash_ipport_adt6(int opcode, struct ipset *set, struct ipset_param *param) +{ + int ret; + uint16_t port; + elem6_t e; + ipset_adtfn adtfn = set->type->adtfn[opcode]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.ip = param->range.min_addr.in6; + e.proto = param->proto; + + if (opcode == IPSET_OP_TEST) { + e.port = htons(param->range.min_port); + return adtfn(set, &e, 0); + } + + if (set->comment && opcode == IPSET_OP_ADD) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + + for (port = param->range.min_port; port >= param->range.min_port && + port <= param->range.max_port; port++) { + e.port = htons(port); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + + return EDPVS_OK; +} + +static int +hash_ipport_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem6_t e; + uint16_t proto; + struct rte_ipv6_hdr *ip6hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET6 || mbuf_address_family(mbuf) != AF_INET6) + return 0; + + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip6hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip6hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + + if (dst_match) { + memcpy(&e.ip, ip6hdr->dst_addr, sizeof(e.ip)); + if (l4hdr) + e.port = l4hdr->dst_port; + } else { + memcpy(&e.ip, ip6hdr->src_addr, sizeof(e.ip)); + if (l4hdr) + e.port = l4hdr->src_port; + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_ipport_variant6 = { + .adt = hash_ipport_adt6, + .test = hash_ipport_test6, + .hash.do_compare = hash_ipport_data_equal6, + .hash.do_list = hash_ipport_do_list6, + .hash.do_hash = jhash_hashkey +}; + +static int +hash_ipport_create(struct ipset *set, struct ipset_param *param) +{ + hash_create(set, param); + + if (param->option.family == AF_INET) { + set->dsize = sizeof(elem4_t); + set->hash_len = offsetof(elem4_t, comment); + set->variant = &hash_ipport_variant4; + } else { + set->dsize = sizeof(elem6_t); + set->hash_len = offsetof(elem6_t, comment); + set->variant = &hash_ipport_variant6; + } + + return EDPVS_OK; +} + +struct ipset_type hash_ipport_type = { + .name = "hash:ip,port", + .create = hash_ipport_create, + .destroy = hash_destroy, + .flush = hash_flush, + .list = hash_list, + .adtfn = hash_adtfn, +}; diff --git a/src/ipset/ipset_hash_ipportip.c b/src/ipset/ipset_hash_ipportip.c new file mode 100644 index 000000000..87234e4e7 --- /dev/null +++ b/src/ipset/ipset_hash_ipportip.c @@ -0,0 +1,307 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "ipset/ipset_hash.h" +#include "ipset/pfxlen.h" + +typedef struct hash_ipportip_elem4 { + uint32_t ip1; + uint32_t ip2; + uint16_t port; + uint8_t proto; + + char comment[IPSET_MAXCOMLEN]; +} elem4_t; + +static int +hash_ipportip_data_equal4(const void *elem1, const void *elem2) +{ + elem4_t *e1 = (elem4_t *)elem1; + elem4_t *e2 = (elem4_t *)elem2; + + return e1->ip1 == e2->ip1 && e1->ip2 == e2->ip2 && + e1->port == e2->port && e1->proto == e2->proto; +} + +static void +hash_ipportip_do_list4(struct ipset_member *member, void *elem, bool comment) +{ + elem4_t *e = (elem4_t *)elem; + + member->port = ntohs(e->port); + member->proto = e->proto; + member->addr.in.s_addr = e->ip1; + member->addr2.in.s_addr = e->ip2; + if (comment) + rte_strlcpy(member->comment, e->comment, IPSET_MAXCOMLEN); +} + +static uint32_t +hash_ipportip_hashkey4(void *data, int len, uint32_t mask) +{ + elem4_t *e = (elem4_t *)data; + + return (e->ip1 * 31 + e->ip2 * 31 + e->port * 31 + e->proto) & mask; +} + +static int +hash_ipportip_adt4(int opcode, struct ipset *set, struct ipset_param *param) +{ + elem4_t e; + int ret; + uint16_t port; + uint32_t ip, ip_to, ip2, ip2_to; + ipset_adtfn adtfn = set->type->adtfn[opcode]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + if (opcode == IPSET_OP_TEST) { + e.ip1 = param->range.min_addr.in.s_addr; + e.ip2 = param->range2.min_addr.in.s_addr; + e.proto = param->proto; + e.port = htons(param->range.min_port); + + return adtfn(set, &e, 0); + } + + e.proto = param->proto; + if (set->comment && opcode == IPSET_OP_ADD) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + + ip = ntohl(param->range.min_addr.in.s_addr); + if (param->cidr) { + ip_set_mask_from_to(ip, ip_to, param->cidr); + } else { + ip_to = ntohl(param->range.max_addr.in.s_addr); + } + for (; ip <= ip_to; ip++) { + e.ip1 = htonl(ip); + + for (port = param->range.min_port; port >= param->range.min_port && + port <= param->range.max_port; port++) { + e.port = htons(port); + + ip2 = ntohl(param->range2.min_addr.in.s_addr); + if (param->cidr2) { + ip_set_mask_from_to(ip2, ip2_to, param->cidr2); + } else { + ip2_to = ntohl(param->range2.max_addr.in.s_addr); + } + for (; ip2 <= ip2_to; ip2++) { + e.ip2 = ntohl(ip2); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + } + } + return EDPVS_OK; +} + +static int +hash_ipportip_test4(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem4_t e; + uint16_t proto; + struct rte_ipv4_hdr *ip4hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET || mbuf_address_family(mbuf) != AF_INET) + return 0; + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip4hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip4hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + e.ip1 = ip4hdr->src_addr; + e.ip2 = ip4hdr->dst_addr; + + if (l4hdr) { + if (dst_match) + e.port = l4hdr->dst_port; + else + e.port = l4hdr->src_port; + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_ipportip_variant4 = { + .adt = hash_ipportip_adt4, + .test = hash_ipportip_test4, + .hash.do_compare = hash_ipportip_data_equal4, + .hash.do_list = hash_ipportip_do_list4, + .hash.do_hash = hash_ipportip_hashkey4 +}; + +typedef struct hash_ipportip_elem6 { + struct in6_addr ip1; + struct in6_addr ip2; + uint16_t port; + uint8_t proto; + + char comment[IPSET_MAXCOMLEN]; +} elem6_t; + +static int +hash_ipportip_data_equal6(const void *elem1, const void *elem2) +{ + elem6_t *e1 = (elem6_t *)elem1; + elem6_t *e2 = (elem6_t *)elem2; + + return !memcmp(e1->ip1.s6_addr, e2->ip1.s6_addr, 16) && + !memcmp(e1->ip2.s6_addr, e2->ip2.s6_addr, 16) && + e1->port == e2->port && + e1->proto == e2->proto; +} + +static void +hash_ipportip_do_list6(struct ipset_member *member, void *elem, bool comment) +{ + elem6_t *e = (elem6_t *)elem; + + member->port = ntohs(e->port); + member->proto = e->proto; + member->addr.in6 = e->ip1; + member->addr2.in6 = e->ip2; + if (comment) + rte_strlcpy(member->comment, e->comment, IPSET_MAXCOMLEN); +} + +static int +hash_ipportip_adt6(int opcode, struct ipset *set, struct ipset_param *param) +{ + int ret; + uint16_t port; + elem6_t e; + ipset_adtfn adtfn = set->type->adtfn[opcode]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.ip1 = param->range.min_addr.in6; + e.ip2 = param->range2.min_addr.in6; + e.proto = param->proto; + + if (opcode == IPSET_OP_TEST) { + e.port = htons(param->range.min_port); + return adtfn(set, &e, 0); + } + + if (set->comment && opcode == IPSET_OP_ADD) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + + for (port = param->range.min_port; port >= param->range.min_port && + port <= param->range.max_port; port++) { + e.port = htons(port); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + + return EDPVS_OK; +} + +static int +hash_ipportip_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem6_t e; + uint16_t proto; + struct rte_ipv6_hdr *ip6hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET6 || mbuf_address_family(mbuf) != AF_INET6) + return 0; + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip6hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip6hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + memcpy(&e.ip1, ip6hdr->src_addr, sizeof(e.ip1)); + memcpy(&e.ip2, ip6hdr->dst_addr, sizeof(e.ip2)); + + if (l4hdr) { + if (dst_match) + e.port = l4hdr->dst_port; + else + e.port = l4hdr->src_port; + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_ipportip_variant6 = { + .adt = hash_ipportip_adt6, + .test = hash_ipportip_test6, + .hash.do_compare = hash_ipportip_data_equal6, + .hash.do_list = hash_ipportip_do_list6, + .hash.do_hash = jhash_hashkey +}; + +static int +hash_ipportip_create(struct ipset *set, struct ipset_param *param) +{ + hash_create(set, param); + + if (param->option.family == AF_INET) { + set->dsize = sizeof(elem4_t); + set->hash_len = offsetof(elem4_t, comment); + set->variant = &hash_ipportip_variant4; + } else { + set->dsize = sizeof(elem6_t); + set->hash_len = offsetof(elem6_t, comment); + set->variant = &hash_ipportip_variant6; + } + + return EDPVS_OK; +} + +struct ipset_type hash_ipportip_type = { + .name = "hash:ip,port,ip", + .create = hash_ipportip_create, + .destroy = hash_destroy, + .flush = hash_flush, + .list = hash_list, + .adtfn = hash_adtfn, +}; diff --git a/src/ipset/ipset_hash_ipportnet.c b/src/ipset/ipset_hash_ipportnet.c new file mode 100644 index 000000000..0c040e7da --- /dev/null +++ b/src/ipset/ipset_hash_ipportnet.c @@ -0,0 +1,303 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include "ipset/ipset_hash.h" +#include "ipset/pfxlen.h" + +typedef struct hash_ipportnet_elem { + union inet_addr ip; + uint8_t cidr; + union inet_addr ip2; + uint8_t proto; + uint16_t port; + + /* data not evolved in hash calculation */ + char comment[IPSET_MAXCOMLEN]; + bool nomatch; +} elem_t; + +static int +hash_ipportnet_data_equal(const void *elem1, const void *elem2) +{ + elem_t *e2 = (elem_t *)elem2; + + if (memcmp(elem1, elem2, offsetof(elem_t, comment))) + return COMPARE_INEQUAL; + if (e2->nomatch) + return COMPARE_EQUAL_REJECT; + return COMPARE_EQUAL_ACCEPT; +} + +static void +hash_ipportnet_do_list(struct ipset_member *member, void *elem, bool comment) +{ + elem_t *e = (elem_t *)elem; + + member->addr = e->ip; + member->cidr = e->cidr; + member->port = ntohs(e->port); + member->proto = e->proto; + member->nomatch = e->nomatch; + member->addr2 = e->ip2; + + if (comment) + rte_strlcpy(member->comment, e->comment, IPSET_MAXCOMLEN); +} + +static uint32_t +hash_ipportnet_hashkey4(void *data, int len, uint32_t mask) +{ + elem_t *e = (elem_t *)data; + + return (e->ip.in.s_addr * 31 + (((uint32_t)e->port << 16) | + (((uint32_t)e->proto) << 8) | (uint32_t)e->cidr)) & mask; +} + +static int +hash_ipportnet_adt4(int op, struct ipset *set, struct ipset_param *param) +{ + elem_t e; + int ret; + uint16_t port; + uint32_t ip, ip_to, ip2, ip2_to, ip2_from; + ipset_adtfn adtfn = set->type->adtfn[op]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.cidr = param->cidr; + e.proto = param->proto; + + if (op == IPSET_OP_TEST) { + e.ip.in.s_addr = param->range.min_addr.in.s_addr; + e.ip2.in.s_addr = param->range2.min_addr.in.s_addr; + e.port = htons(param->range.min_port); + return adtfn(set, &e, 0); + } + + if (op == IPSET_OP_ADD) { + if (param->option.add.nomatch) + e.nomatch = true; + if (set->comment) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + } + + ip = ntohl(param->range.min_addr.in.s_addr); + if (e.cidr) { + ip_set_mask_from_to(ip, ip_to, e.cidr); + } else { + ip_to = ntohl(param->range.max_addr.in.s_addr); + } + + ip2_from = ntohl(param->range2.min_addr.in.s_addr);; + if (param->cidr2) { + ip_set_mask_from_to(ip2_from, ip2_to, param->cidr2); + } else { + ip2_to = ntohl(param->range2.max_addr.in.s_addr); + } + + do { + e.ip.in.s_addr = htonl(ip); + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + for (ip2 = ip2_from; ip2 <= ip2_to; ip2++) { + e.ip2.in.s_addr = htonl(ip2); + for (port = param->range.min_port; port >= param->range.min_port + && port <= param->range.max_port; port++) { + e.port = htons(port); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + } + } while(ip++ < ip_to); + + return EDPVS_OK; +} + +static int +hash_ipportnet_test4(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + uint16_t proto; + struct rte_ipv4_hdr *ip4hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET || mbuf_address_family(mbuf) != AF_INET) + return 0; + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip4hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip4hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + // Unlikely other set types, which match source address first and then dest address, + // ip,port,net always matches source address with its "net" part, dest address with its + // "ip" part respectively, and its "port" part match is determined by param dst_match. + e.ip2.in.s_addr = ip4hdr->dst_addr; // dst_ip + e.ip.in.s_addr = ip4hdr->src_addr; // src_net + + if (l4hdr) { + if (dst_match) + e.port = l4hdr->dst_port; // dst_ip,dst_port,src_net + else + e.port = l4hdr->src_port; // dst_ip,src_port,src_net + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_ipportnet_variant4 = { + .adt = hash_ipportnet_adt4, + .test = hash_ipportnet_test4, + .hash.do_compare = hash_ipportnet_data_equal, + .hash.do_netmask = hash_data_netmask4, + .hash.do_list = hash_ipportnet_do_list, + .hash.do_hash = hash_ipportnet_hashkey4, +}; + +static int +hash_ipportnet_adt6(int op, struct ipset *set, struct ipset_param *param) +{ + int ret; + uint16_t port; + elem_t e; + ipset_adtfn adtfn = set->type->adtfn[op]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.ip2 = param->range2.min_addr; + e.ip = param->range.min_addr; + e.cidr = param->cidr; + e.proto = param->proto; + + if (op == IPSET_OP_TEST) { + e.port = htons(param->range.min_port); + return adtfn(set, &e, 0); + } + + if (op == IPSET_OP_ADD) { + if (param->option.add.nomatch) + e.nomatch = true; + if (set->comment) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + } + + if (e.cidr) + ip6_netmask(&e.ip, e.cidr); + + for (port = param->range.min_port; port >= param->range.min_port && + port <= param->range.max_port; port++) { + e.port = htons(port); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + + return EDPVS_OK; +} + +static int +hash_ipportnet_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + uint16_t proto; + struct rte_ipv6_hdr *ip6hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET6 || mbuf_address_family(mbuf) != AF_INET6) + return 0; + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip6hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip6hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + // Unlikely other set types, which match source address first and then dest address, + // ip,port,net always matches source address with its "net" part, dest address with its + // "ip" part respectively, and its "port" part match is determined by param dst_match. + memcpy(&e.ip2, ip6hdr->dst_addr, sizeof(e.ip2)); // dst_ip + memcpy(&e.ip, ip6hdr->src_addr, sizeof(e.ip)); // src_net + if (l4hdr) { + if (dst_match) + e.port = l4hdr->dst_port; // dst_ip,dst_port,src_net + else + e.port = l4hdr->src_port; // dst_ip,src_port,src_net + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_ipportnet_variant6 = { + .adt = hash_ipportnet_adt6, + .test = hash_ipportnet_test6, + .hash.do_compare = hash_ipportnet_data_equal, + .hash.do_netmask = hash_data_netmask6, + .hash.do_list = hash_ipportnet_do_list, + .hash.do_hash = jhash_hashkey +}; + +static int +hash_ipportnet_create(struct ipset *set, struct ipset_param *param) +{ + hash_create(set, param); + set->net_count = 1; + set->dsize = sizeof(elem_t); + set->hash_len = offsetof(elem_t, comment); + + if (param->option.family == AF_INET) + set->variant = &hash_ipportnet_variant4; + else + set->variant = &hash_ipportnet_variant6; + + return EDPVS_OK; +} + +struct ipset_type hash_ipportnet_type = { + .name = "hash:ip,port,net", + .create = hash_ipportnet_create, + .destroy = hash_destroy, + .flush = hash_flush, + .list = hash_list, + .adtfn = hash_adtfn, +}; diff --git a/src/ipset/ipset_hash_net.c b/src/ipset/ipset_hash_net.c new file mode 100644 index 000000000..7bbee349d --- /dev/null +++ b/src/ipset/ipset_hash_net.c @@ -0,0 +1,237 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include "ipset/ipset_hash.h" +#include "ipset/pfxlen.h" + +typedef struct hash_net_elem { + union inet_addr ip; + uint8_t cidr; + + char comment[IPSET_MAXCOMLEN]; + bool nomatch; +} elem_t; + +static int +hash_net_data_equal4(const void *elem1, const void *elem2) +{ + elem_t *e1 = (elem_t *)elem1; + elem_t *e2 = (elem_t *)elem2; + + if (e1->ip.in.s_addr == e2->ip.in.s_addr && + e1->cidr == e2->cidr) { + if (e2->nomatch) + return COMPARE_EQUAL_REJECT; + else + return COMPARE_EQUAL_ACCEPT; + } + return COMPARE_INEQUAL; +} + +static void +hash_net_do_list(struct ipset_member *member, void *elem, bool comment) +{ + elem_t *e = (elem_t *)elem; + + member->addr = e->ip; + member->cidr = e->cidr; + member->nomatch = e->nomatch; + + if (comment) + rte_strlcpy(member->comment, e->comment, IPSET_MAXCOMLEN); +} + +static uint32_t +hash_net_hashkey4(void *data, int len, uint32_t mask) +{ + elem_t *e = (elem_t *)data; + + return (e->ip.in.s_addr * 31 + e->cidr * 31) & mask; +} + +static int +hash_net_adt4(int op, struct ipset *set, struct ipset_param *param) +{ + elem_t e; + int ret; + uint32_t ip, ip_to; + ipset_adtfn adtfn = set->type->adtfn[op]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.cidr = param->cidr; + if (op == IPSET_OP_TEST) { + e.ip.in.s_addr = param->range.min_addr.in.s_addr; + + return adtfn(set, &e, 0); + } + + if (param->opcode == IPSET_OP_ADD) { + if (set->comment) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + if (param->option.add.nomatch) + e.nomatch = true; + } + + ip = ntohl(param->range.min_addr.in.s_addr); + if (e.cidr) { + ip_set_mask_from_to(ip, ip_to, e.cidr); + } else { + ip_to = ntohl(param->range.max_addr.in.s_addr); + } + + do { + e.ip.in.s_addr = htonl(ip); + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } while(ip++ < ip_to); + + return EDPVS_OK; +} + +static int +hash_net_test4(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + struct rte_ipv4_hdr *ip4hdr; + + if (set->family != AF_INET || mbuf_address_family(mbuf) != AF_INET) + return 0; + + ip4hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip4hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + + if (dst_match) + e.ip.in.s_addr = ip4hdr->dst_addr; + else + e.ip.in.s_addr = ip4hdr->src_addr; + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_net_variant4 = { + .adt = hash_net_adt4, + .test = hash_net_test4, + .hash.do_compare = hash_net_data_equal4, + .hash.do_netmask = hash_data_netmask4, + .hash.do_list = hash_net_do_list, + .hash.do_hash = hash_net_hashkey4, +}; + +static int +hash_net_data_equal6(const void *elem1, const void *elem2) +{ + if (!memcmp(elem1, elem2, offsetof(elem_t, comment))) { + if (((elem_t *)elem2)->nomatch) + return COMPARE_EQUAL_REJECT; + return COMPARE_EQUAL_ACCEPT; + } + return COMPARE_INEQUAL; +} + +static int +hash_net_adt6(int op, struct ipset *set, struct ipset_param *param) +{ + elem_t e; + ipset_adtfn adtfn = set->type->adtfn[op]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.ip = param->range.min_addr; + e.cidr = param->cidr; + if (e.cidr) + ip6_netmask(&e.ip, e.cidr); + + if (param->opcode == IPSET_OP_ADD) { + if (set->comment) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + if (param->option.add.nomatch) + e.nomatch = true; + } + + return adtfn(set, &e, param->flag); +} + +static int +hash_net_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + struct rte_ipv6_hdr *ip6hdr; + + if (set->family != AF_INET6 || mbuf_address_family(mbuf) != AF_INET6) + return 0; + + ip6hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip6hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + + if (dst_match) + memcpy(&e.ip, ip6hdr->dst_addr, sizeof(e.ip)); + else + memcpy(&e.ip, ip6hdr->src_addr, sizeof(e.ip)); + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_net_variant6 = { + .adt = hash_net_adt6, + .test = hash_net_test6, + .hash.do_compare = hash_net_data_equal6, + .hash.do_netmask = hash_data_netmask6, + .hash.do_list = hash_net_do_list, + .hash.do_hash = jhash_hashkey +}; + +static int +hash_net_create(struct ipset *set, struct ipset_param *param) +{ + hash_create(set, param); + set->net_count = 1; + set->dsize = sizeof(elem_t); + set->hash_len = offsetof(elem_t, comment); + + if (param->option.family == AF_INET) + set->variant = &hash_net_variant4; + else + set->variant = &hash_net_variant6; + + return EDPVS_OK; +} + +struct ipset_type hash_net_type = { + .name = "hash:net", + .create = hash_net_create, + .destroy = hash_destroy, + .flush = hash_flush, + .list = hash_list, + .adtfn = hash_adtfn, +}; diff --git a/src/ipset/ipset_hash_netport.c b/src/ipset/ipset_hash_netport.c new file mode 100644 index 000000000..975eef330 --- /dev/null +++ b/src/ipset/ipset_hash_netport.c @@ -0,0 +1,287 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include "ipset/ipset_hash.h" +#include "ipset/pfxlen.h" + +typedef struct hash_netport_elem { + union inet_addr ip; + uint8_t cidr; + uint8_t proto; + uint16_t port; + + /* data not evolved in hash calculation */ + char comment[IPSET_MAXCOMLEN]; + bool nomatch; +} elem_t; + +static int +hash_netport_data_equal(const void *elem1, const void *elem2) +{ + elem_t *e2 = (elem_t *)elem2; + + if (memcmp(elem1, elem2, offsetof(elem_t, comment))) + return COMPARE_INEQUAL; + if (e2->nomatch) + return COMPARE_EQUAL_REJECT; + return COMPARE_EQUAL_ACCEPT; +} + +static void +hash_netport_do_list(struct ipset_member *member, void *elem, bool comment) +{ + elem_t *e = (elem_t *)elem; + + member->addr = e->ip; + member->cidr = e->cidr; + member->port = ntohs(e->port); + member->proto = e->proto; + member->nomatch = e->nomatch; + + if (comment) + rte_strlcpy(member->comment, e->comment, IPSET_MAXCOMLEN); +} + +static uint32_t +hash_netport_hashkey4(void *data, int len, uint32_t mask) +{ + elem_t *e = (elem_t *)data; + + return (e->ip.in.s_addr * 31 + (((uint32_t)e->port << 16) | + (((uint32_t)e->proto) << 8) | (uint32_t)e->cidr)) & mask; +} + +static int +hash_netport_adt4(int op, struct ipset *set, struct ipset_param *param) +{ + elem_t e; + int ret; + uint16_t port; + uint32_t ip, ip_to; + ipset_adtfn adtfn = set->type->adtfn[op]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.cidr = param->cidr; + e.proto = param->proto; + + if (op == IPSET_OP_TEST) { + e.ip.in.s_addr = param->range.min_addr.in.s_addr; + e.port = htons(param->range.min_port); + return adtfn(set, &e, 0); + } + + if (op == IPSET_OP_ADD) { + if (param->option.add.nomatch) + e.nomatch = true; + if (set->comment) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + } + + ip = ntohl(param->range.min_addr.in.s_addr); + if (e.cidr) { + ip_set_mask_from_to(ip, ip_to, e.cidr); + } else { + ip_to = ntohl(param->range.max_addr.in.s_addr); + } + + do { + e.ip.in.s_addr = htonl(ip); + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + for (port = param->range.min_port; + port >= param->range.min_port && + port <= param->range.max_port; port++) { + e.port = htons(port); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + } while(ip++ < ip_to); + + return EDPVS_OK; +} + +static int +hash_netport_test4(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + uint16_t proto; + struct rte_ipv4_hdr *ip4hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET || mbuf_address_family(mbuf) != AF_INET) + return 0; + + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip4hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip4hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + if (dst_match) { + e.ip.in.s_addr = ip4hdr->dst_addr; + if (l4hdr) + e.port = l4hdr->dst_port; + } else { + e.ip.in.s_addr = ip4hdr->src_addr; + if (l4hdr) + e.port = l4hdr->src_port; + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_netport_variant4 = { + .adt = hash_netport_adt4, + .test = hash_netport_test4, + .hash.do_compare = hash_netport_data_equal, + .hash.do_netmask = hash_data_netmask4, + .hash.do_list = hash_netport_do_list, + .hash.do_hash = hash_netport_hashkey4, +}; + +static int +hash_netport_adt6(int op, struct ipset *set, struct ipset_param *param) +{ + int ret; + uint16_t port; + elem_t e; + ipset_adtfn adtfn = set->type->adtfn[op]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.ip = param->range.min_addr; + e.cidr = param->cidr; + e.proto = param->proto; + + if (op == IPSET_OP_TEST) { + e.port = htons(param->range.min_port); + return adtfn(set, &e, 0); + } + + if (op == IPSET_OP_ADD) { + if (param->option.add.nomatch) + e.nomatch = true; + if (set->comment) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + } + + if (e.cidr) + ip6_netmask(&e.ip, e.cidr); + + for (port = param->range.min_port; port >= param->range.min_port && + port <= param->range.max_port; port++) { + e.port = htons(port); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + + return EDPVS_OK; +} + +static int +hash_netport_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + uint16_t proto; + struct rte_ipv6_hdr *ip6hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET6 || mbuf_address_family(mbuf) != AF_INET6) + return 0; + + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip6hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip6hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + if (dst_match) { + memcpy(&e.ip, ip6hdr->dst_addr, sizeof(e.ip)); + if (l4hdr) + e.port = l4hdr->dst_port; + } else { + memcpy(&e.ip, ip6hdr->src_addr, sizeof(e.ip)); + if (l4hdr) + e.port = l4hdr->src_port; + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_netport_variant6 = { + .adt = hash_netport_adt6, + .test = hash_netport_test6, + .hash.do_compare = hash_netport_data_equal, + .hash.do_netmask = hash_data_netmask6, + .hash.do_list = hash_netport_do_list, + .hash.do_hash = jhash_hashkey +}; + +static int +hash_netport_create(struct ipset *set, struct ipset_param *param) +{ + hash_create(set, param); + set->net_count = 1; + set->dsize = sizeof(elem_t); + set->hash_len = offsetof(elem_t, comment); + + if (param->option.family == AF_INET) + set->variant = &hash_netport_variant4; + else + set->variant = &hash_netport_variant6; + + return EDPVS_OK; +} + +struct ipset_type hash_netport_type = { + .name = "hash:net,port", + .create = hash_netport_create, + .destroy = hash_destroy, + .flush = hash_flush, + .list = hash_list, + .adtfn = hash_adtfn, +}; diff --git a/src/ipset/ipset_hash_netportiface.c b/src/ipset/ipset_hash_netportiface.c new file mode 100644 index 000000000..7b15c4bb5 --- /dev/null +++ b/src/ipset/ipset_hash_netportiface.c @@ -0,0 +1,302 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include "ipset/ipset_hash.h" +#include "ipset/pfxlen.h" + +typedef struct hash_netportiface_elem { + union inet_addr ip; + uint8_t cidr; + uint8_t proto; + uint16_t port; + uint16_t iface; + + /* data not evolved in hash calculation */ + struct netif_port *dev; + char comment[IPSET_MAXCOMLEN]; + bool nomatch; +} elem_t; + +static int +hash_netportiface_data_equal(const void *elem1, const void *elem2) +{ + elem_t *e2 = (elem_t *)elem2; + + if (memcmp(elem1, elem2, offsetof(elem_t, dev))) + return COMPARE_INEQUAL; + if (e2->nomatch) + return COMPARE_EQUAL_REJECT; + return COMPARE_EQUAL_ACCEPT; +} + +static void +hash_netportiface_do_list(struct ipset_member *member, void *elem, bool comment) +{ + elem_t *e = (elem_t *)elem; + + member->addr = e->ip; + member->cidr = e->cidr; + member->port = ntohs(e->port); + member->proto = e->proto; + member->nomatch = e->nomatch; + rte_strlcpy(member->iface, e->dev->name, IFNAMSIZ); + + if (comment) + rte_strlcpy(member->comment, e->comment, IPSET_MAXCOMLEN); +} + +static uint32_t +hash_netportiface_hashkey4(void *data, int len, uint32_t mask) +{ + elem_t *e = (elem_t *)data; + + return (e->ip.in.s_addr * 31 + e->cidr * 31 + + ((e->port << 16) | (e->iface | e->proto))) & mask; +} + +static int +hash_netportiface_adt4(int op, struct ipset *set, struct ipset_param *param) +{ + elem_t e; + int ret; + uint16_t port; + uint32_t ip, ip_to; + ipset_adtfn adtfn = set->type->adtfn[op]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.cidr = param->cidr; + e.proto = param->proto; + e.dev = netif_port_get_by_name(param->iface); + if (unlikely(e.dev == NULL)) + return EDPVS_INVAL; + e.iface = e.dev->id; + + if (op == IPSET_OP_TEST) { + e.ip.in.s_addr = param->range.min_addr.in.s_addr; + e.port = htons(param->range.min_port); + return adtfn(set, &e, 0); + } + + if (op == IPSET_OP_ADD) { + if (param->option.add.nomatch) + e.nomatch = true; + if (set->comment) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + } + + ip = ntohl(param->range.min_addr.in.s_addr); + if (e.cidr) { + ip_set_mask_from_to(ip, ip_to, e.cidr); + } else { + ip_to = ntohl(param->range.max_addr.in.s_addr); + } + + do { + e.ip.in.s_addr = htonl(ip); + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + for (port = param->range.min_port; + port >= param->range.min_port && + port <= param->range.max_port; port++) { + e.port = htons(port); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + } while(ip++ < ip_to); + + return EDPVS_OK; +} + +static int +hash_netportiface_test4(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + uint16_t proto; + struct rte_ipv4_hdr *ip4hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET || mbuf_address_family(mbuf) != AF_INET) + return 0; + + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip4hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip4hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + e.iface = mbuf->port; + + if (dst_match) { + e.ip.in.s_addr = ip4hdr->dst_addr; + if (l4hdr) + e.port = l4hdr->dst_port; + } else { + e.ip.in.s_addr = ip4hdr->src_addr; + if (l4hdr) + e.port = l4hdr->src_port; + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_netportiface_variant4 = { + .adt = hash_netportiface_adt4, + .test = hash_netportiface_test4, + .hash.do_compare = hash_netportiface_data_equal, + .hash.do_netmask = hash_data_netmask4, + .hash.do_list = hash_netportiface_do_list, + .hash.do_hash = hash_netportiface_hashkey4, +}; + +static int +hash_netportiface_adt6(int op, struct ipset *set, struct ipset_param *param) +{ + int ret; + uint16_t port; + elem_t e; + ipset_adtfn adtfn = set->type->adtfn[op]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.ip = param->range.min_addr; + e.cidr = param->cidr; + e.proto = param->proto; + e.dev = netif_port_get_by_name(param->iface); + if (unlikely(e.dev == NULL)) + return EDPVS_NOTEXIST; + e.iface = e.dev->id; + + if (op == IPSET_OP_TEST) { + e.port = htons(param->range.min_port); + return adtfn(set, &e, 0); + } + + if (op == IPSET_OP_ADD) { + if (param->option.add.nomatch) + e.nomatch = true; + if (set->comment) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + } + + if (e.cidr) + ip6_netmask(&e.ip, e.cidr); + + for (port = param->range.min_port; port >= param->range.min_port && + port <= param->range.max_port; port++) { + e.port = htons(port); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + + return EDPVS_OK; +} + +static int +hash_netportiface_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + uint16_t proto; + struct rte_ipv6_hdr *ip6hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET6 || mbuf_address_family(mbuf) != AF_INET6) + return 0; + + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip6hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip6hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + e.iface = mbuf->port; + + if (dst_match) { + memcpy(&e.ip, ip6hdr->dst_addr, sizeof(e.ip)); + if (l4hdr) + e.port = l4hdr->dst_port; + } else { + memcpy(&e.ip, ip6hdr->src_addr, sizeof(e.ip)); + if (l4hdr) + e.port = l4hdr->src_port; + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_netportiface_variant6 = { + .adt = hash_netportiface_adt6, + .test = hash_netportiface_test6, + .hash.do_compare = hash_netportiface_data_equal, + .hash.do_netmask = hash_data_netmask6, + .hash.do_list = hash_netportiface_do_list, + .hash.do_hash = jhash_hashkey +}; + +static int +hash_netportiface_create(struct ipset *set, struct ipset_param *param) +{ + hash_create(set, param); + set->net_count = 1; + set->dsize = sizeof(elem_t); + set->hash_len = offsetof(elem_t, dev); + + if (param->option.family == AF_INET) + set->variant = &hash_netportiface_variant4; + else + set->variant = &hash_netportiface_variant6; + + return EDPVS_OK; +} + +struct ipset_type hash_netportiface_type = { + .name = "hash:net,port,iface", + .create = hash_netportiface_create, + .destroy = hash_destroy, + .flush = hash_flush, + .list = hash_list, + .adtfn = hash_adtfn, +}; diff --git a/src/ipset/ipset_hash_netportnet.c b/src/ipset/ipset_hash_netportnet.c new file mode 100644 index 000000000..29dedc4f6 --- /dev/null +++ b/src/ipset/ipset_hash_netportnet.c @@ -0,0 +1,305 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "ipset/ipset.h" +#include "ipset/pfxlen.h" +#include "ipset/ipset_hash.h" + +typedef struct hash_netportnet_elem { + union inet_addr ip1; + uint8_t cidr1; + union inet_addr ip2; + uint8_t cidr2; + uint8_t proto; + uint16_t port; + + char comment[IPSET_MAXCOMLEN]; + bool nomatch; +} elem_t; + +static int +hash_netportnet_data_equal(const void *elem1, const void *elem2) +{ + elem_t *e2 = (elem_t *)elem2; + + if (memcmp(elem1, elem2, offsetof(elem_t, comment))) + return COMPARE_INEQUAL; + + if (e2->nomatch) + return COMPARE_EQUAL_REJECT; + return COMPARE_EQUAL_ACCEPT; +} + +static void +hash_netportnet_do_list(struct ipset_member *member, void *elem, bool comment) +{ + elem_t *e = (elem_t *)elem; + + member->addr = e->ip1; + member->addr2 = e->ip2; + member->cidr = e->cidr1; + member->cidr2 = e->cidr2; + member->proto = e->proto; + member->port = ntohs(e->port); + member->nomatch = e->nomatch; + if (comment) + rte_strlcpy(member->comment, e->comment, IPSET_MAXCOMLEN); +} + +static uint32_t +hash_netportnet_hashkey4(void *data, int len, uint32_t mask) +{ + elem_t *e = (elem_t *)data; + + return (e->ip1.in.s_addr * 31 + e->ip2.in.s_addr * 31 + + (((uint32_t)e->port << 16) | + ((uint32_t)e->cidr1 << 8) | + ((uint32_t)e->cidr2))) & mask; +} + +static int +hash_netportnet_adt4(int op, struct ipset *set, struct ipset_param *param) +{ + elem_t e; + int ret; + uint16_t port; + uint32_t ip1, ip1_to, ip2, ip2_to, ip2_from; + ipset_adtfn adtfn = set->type->adtfn[op]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.cidr1 = param->cidr; + e.cidr2 = param->cidr2; + e.proto = param->proto; + + if (op == IPSET_OP_TEST) { + e.ip1.in.s_addr = param->range.min_addr.in.s_addr; + e.ip2.in.s_addr = param->range2.min_addr.in.s_addr; + e.port = htons(param->range.min_port); + return adtfn(set, &e, 0); + } + + if (param->opcode == IPSET_OP_ADD) { + if (set->comment) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + if (param->option.add.nomatch) + e.nomatch = true; + } + + ip1 = ntohl(param->range.min_addr.in.s_addr); + ip2 = ntohl(param->range2.min_addr.in.s_addr); + + if (e.cidr1) { + ip_set_mask_from_to(ip1, ip1_to, e.cidr1); + } else { + ip1_to = ntohl(param->range.max_addr.in.s_addr); + } + + if (e.cidr2) { + ip_set_mask_from_to(ip2, ip2_to, e.cidr2); + } else { + ip2_to = ntohl(param->range2.max_addr.in.s_addr); + } + ip2_from = ip2; + + do { + e.ip1.in.s_addr = htonl(ip1); + ip1 = ip_set_range_to_cidr(ip1, ip1_to, &e.cidr1); + do { + e.ip2.in.s_addr = htonl(ip2); + ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr2); + for (port = param->range.min_port; port >= param->range.min_port + && port <= param->range.max_port; port++) { + e.port = htons(port); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + } while(ip2++ < ip2_to); + ip2 = ip2_from; + } while(ip1++ < ip1_to); + + return EDPVS_OK; +} + +static int +hash_netportnet_test4(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + uint16_t proto; + struct rte_ipv4_hdr *ip4hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET || mbuf_address_family(mbuf) != AF_INET) + return 0; + + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip4hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip4hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + e.ip1.in.s_addr = ip4hdr->src_addr; + e.ip2.in.s_addr = ip4hdr->dst_addr; + if (l4hdr) { + if (dst_match) + e.port = l4hdr->dst_port; + else + e.port = l4hdr->src_port; + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_netportnet_variant4 = { + .adt = hash_netportnet_adt4, + .test = hash_netportnet_test4, + .hash.do_compare = hash_netportnet_data_equal, + .hash.do_netmask = hash_data_netmask4, + .hash.do_list = hash_netportnet_do_list, + .hash.do_hash = hash_netportnet_hashkey4 +}; + +static int +hash_netportnet_adt6(int op, struct ipset *set, struct ipset_param *param) +{ + uint16_t port; + int ret; + elem_t e; + ipset_adtfn adtfn = set->type->adtfn[op]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.ip1 = param->range.min_addr; + e.ip2 = param->range2.min_addr; + e.cidr1 = param->cidr; + e.cidr2 = param->cidr2; + e.proto = param->proto; + + if (param->opcode == IPSET_OP_TEST) { + e.port = htons(param->range.min_port); + return adtfn(set, &e, 0); + } + + if ( param->opcode == IPSET_OP_ADD) { + if (set->comment) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + if (param->option.add.nomatch) + e.nomatch = true; + } + + if (e.cidr1) + ip6_netmask(&e.ip1, e.cidr1); + if (e.cidr2) + ip6_netmask(&e.ip2, e.cidr2); + + for (port = param->range.min_port; port >= param->range.min_port && + port <= param->range.max_port; port++) { + e.port = htons(port); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + return EDPVS_OK; +} + +static int +hash_netportnet_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + uint16_t proto; + struct rte_ipv6_hdr *ip6hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET6 || mbuf_address_family(mbuf) != AF_INET6) + return 0; + + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip6hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip6hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + memcpy(&e.ip1, ip6hdr->src_addr, sizeof(e.ip1)); + memcpy(&e.ip2, ip6hdr->dst_addr, sizeof(e.ip2)); + if (l4hdr) { + if (dst_match) + e.port = l4hdr->dst_port; + else + e.port = l4hdr->src_port; + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_netportnet_variant6 = { + .adt = hash_netportnet_adt6, + .test = hash_netportnet_test6, + .hash.do_compare = hash_netportnet_data_equal, + .hash.do_netmask = hash_data_netmask6, + .hash.do_list = hash_netportnet_do_list, + .hash.do_hash = jhash_hashkey +}; + +static int +hash_netportnet_create(struct ipset *set, struct ipset_param *param) +{ + hash_create(set, param); + set->net_count = 2; + set->dsize = sizeof(elem_t); + set->hash_len = offsetof(elem_t, comment); + + if (param->option.family == AF_INET) + set->variant = &hash_netportnet_variant4; + else + set->variant = &hash_netportnet_variant6; + + return EDPVS_OK; +} + +struct ipset_type hash_netportnet_type = { + .name = "hash:net,port,net", + .create = hash_netportnet_create, + .destroy = hash_destroy, + .flush = hash_flush, + .list = hash_list, + .adtfn = hash_adtfn, +}; diff --git a/src/ipset/ipset_hash_netportnetport.c b/src/ipset/ipset_hash_netportnetport.c new file mode 100644 index 000000000..5849c3d46 --- /dev/null +++ b/src/ipset/ipset_hash_netportnetport.c @@ -0,0 +1,317 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "ipset/ipset.h" +#include "ipset/pfxlen.h" +#include "ipset/ipset_hash.h" + +typedef struct hash_netportnetport_elem { + union inet_addr ip1; + uint8_t cidr1; + union inet_addr ip2; + uint8_t cidr2; + uint8_t proto; + uint16_t port1; + uint16_t port2; + + char comment[IPSET_MAXCOMLEN]; + bool nomatch; +} elem_t; + +static int +hash_netportnetport_data_equal(const void *elem1, const void *elem2) +{ + elem_t *e2 = (elem_t *)elem2; + + if (memcmp(elem1, elem2, offsetof(elem_t, comment))) + return COMPARE_INEQUAL; + + if (e2->nomatch) + return COMPARE_EQUAL_REJECT; + return COMPARE_EQUAL_ACCEPT; +} + +static void +hash_netportnetport_do_list(struct ipset_member *member, void *elem, bool comment) +{ + elem_t *e = (elem_t *)elem; + + member->addr = e->ip1; + member->addr2 = e->ip2; + member->cidr = e->cidr1; + member->cidr2 = e->cidr2; + member->proto = e->proto; + member->port = ntohs(e->port1); + member->port2 = ntohs(e->port2); + member->nomatch = e->nomatch; + if (comment) + rte_strlcpy(member->comment, e->comment, IPSET_MAXCOMLEN); +} + +static uint32_t +hash_netportnetport_hashkey4(void *data, int len, uint32_t mask) +{ + elem_t *e = (elem_t *)data; + + return (e->ip1.in.s_addr * 31 + e->ip2.in.s_addr * 31 + + e->cidr1 * 31 + e->cidr2 * 31 + + (((uint32_t)e->port1 << 16) | e->port2)) & mask; +} + +static int +hash_netportnetport_adt4(int op, struct ipset *set, struct ipset_param *param) +{ + elem_t e; + int ret; + uint16_t port1, port2; + uint32_t ip1, ip1_to, ip2, ip2_to, ip2_from; + ipset_adtfn adtfn = set->type->adtfn[op]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.cidr1 = param->cidr; + e.cidr2 = param->cidr2; + e.proto = param->proto; + + if (op == IPSET_OP_TEST) { + e.ip1.in.s_addr = param->range.min_addr.in.s_addr; + e.ip2.in.s_addr = param->range2.min_addr.in.s_addr; + e.port1 = htons(param->range.min_port); + e.port2 = htons(param->range2.min_port); + + return adtfn(set, &e, 0); + } + + if (param->opcode == IPSET_OP_ADD) { + if (set->comment) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + if (param->option.add.nomatch) + e.nomatch = true; + } + + ip1 = ntohl(param->range.min_addr.in.s_addr); + ip2 = ntohl(param->range2.min_addr.in.s_addr); + + if (e.cidr1) { + ip_set_mask_from_to(ip1, ip1_to, e.cidr1); + } else { + ip1_to = ntohl(param->range.max_addr.in.s_addr); + } + + if (e.cidr2) { + ip_set_mask_from_to(ip2, ip2_to, e.cidr2); + } else { + ip2_to = ntohl(param->range2.max_addr.in.s_addr); + } + ip2_from = ip2; + + do { + e.ip1.in.s_addr = htonl(ip1); + ip1 = ip_set_range_to_cidr(ip1, ip1_to, &e.cidr1); + do { + e.ip2.in.s_addr = htonl(ip2); + ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr2); + for (port1 = param->range.min_port; + port1 >= param->range.min_port && + port1 <= param->range.max_port; port1++) { + for (port2 = param->range2.min_port; + port2 >= param->range2.min_port && + port2 <= param->range2.max_port; port2++) { + e.port1 = htons(port1); + e.port2 = htons(port2); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + } + } while(ip2++ < ip2_to); + ip2 = ip2_from; + } while(ip1++ < ip1_to); + + return EDPVS_OK; +} + +static int +hash_netportnetport_test4(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + uint16_t proto; + struct rte_ipv4_hdr *ip4hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET || mbuf_address_family(mbuf) != AF_INET) + return 0; + + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip4hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip4hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + + e.ip1.in.s_addr = ip4hdr->src_addr; + e.ip2.in.s_addr = ip4hdr->dst_addr; + if (l4hdr) { + e.port1 = l4hdr->src_port; + e.port2 = l4hdr->dst_port; + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_netportnetport_variant4 = { + .adt = hash_netportnetport_adt4, + .test = hash_netportnetport_test4, + .hash.do_compare = hash_netportnetport_data_equal, + .hash.do_netmask = hash_data_netmask4, + .hash.do_list = hash_netportnetport_do_list, + .hash.do_hash = hash_netportnetport_hashkey4 +}; + +static int +hash_netportnetport_adt6(int op, struct ipset *set, struct ipset_param *param) +{ + uint16_t port1, port2; + int ret; + elem_t e; + ipset_adtfn adtfn = set->type->adtfn[op]; + + if (set->family != param->option.family) + return EDPVS_INVAL; + + memset(&e, 0, sizeof(e)); + + e.ip1 = param->range.min_addr; + e.ip2 = param->range2.min_addr; + e.cidr1 = param->cidr; + e.cidr2 = param->cidr2; + e.proto = param->proto; + + if (param->opcode == IPSET_OP_TEST) { + e.port1 = htons(param->range.min_port); + e.port2 = htons(param->range2.min_port); + return adtfn(set, &e, 0); + } + + if ( param->opcode == IPSET_OP_ADD) { + if (set->comment) + rte_strlcpy(e.comment, param->comment, IPSET_MAXCOMLEN); + if (param->option.add.nomatch) + e.nomatch = true; + } + + if (e.cidr1) + ip6_netmask(&e.ip1, e.cidr1); + if (e.cidr2) + ip6_netmask(&e.ip2, e.cidr2); + + for (port1 = param->range.min_port; port1 >= param->range.min_port && + port1 <= param->range.max_port; port1++) { + for (port2 = param->range2.min_port; port2 >= param->range2.min_port && + port2 <= param->range2.max_port; port2++) { + e.port1 = htons(port1); + e.port2 = htons(port2); + ret = adtfn(set, &e, param->flag); + if (ret) + return ret; + } + } + return EDPVS_OK; +} + +static int +hash_netportnetport_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) +{ + elem_t e; + uint16_t proto; + struct rte_ipv6_hdr *ip6hdr; + struct rte_udp_hdr *l4hdr = NULL; + + if (set->family != AF_INET6 || mbuf_address_family(mbuf) != AF_INET6) + return 0; + + proto = mbuf_protocol(mbuf); + if (!hash_proto_support(proto)) + return 0; + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { + l4hdr = mbuf_header_l4(mbuf); + if (unlikely(!l4hdr)) + return 0; + } + + ip6hdr = mbuf_header_l3(mbuf); + if (unlikely(!ip6hdr)) + return 0; + + memset(&e, 0, sizeof(e)); + e.proto = proto; + + memcpy(&e.ip1, ip6hdr->src_addr, sizeof(e.ip1)); + memcpy(&e.ip2, ip6hdr->dst_addr, sizeof(e.ip2)); + if (l4hdr) { + e.port1 = l4hdr->src_port; + e.port2 = l4hdr->dst_port; + } + + return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); +} + +struct ipset_type_variant hash_netportnetport_variant6 = { + .adt = hash_netportnetport_adt6, + .test = hash_netportnetport_test6, + .hash.do_compare = hash_netportnetport_data_equal, + .hash.do_netmask = hash_data_netmask6, + .hash.do_list = hash_netportnetport_do_list, + .hash.do_hash = jhash_hashkey +}; + +static int +hash_netportnetport_create(struct ipset *set, struct ipset_param *param) +{ + hash_create(set, param); + set->net_count = 2; + set->dsize = sizeof(elem_t); + set->hash_len = offsetof(elem_t, comment); + + if (param->option.family == AF_INET) + set->variant = &hash_netportnetport_variant4; + else + set->variant = &hash_netportnetport_variant6; + + return EDPVS_OK; +} + +struct ipset_type hash_netportnetport_type = { + .name = "hash:net,port,net,port", + .create = hash_netportnetport_create, + .destroy = hash_destroy, + .flush = hash_flush, + .list = hash_list, + .adtfn = hash_adtfn, +}; diff --git a/src/ipset/pfxlen.c b/src/ipset/pfxlen.c new file mode 100644 index 000000000..368b5754d --- /dev/null +++ b/src/ipset/pfxlen.c @@ -0,0 +1,219 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include "ipset/pfxlen.h" +/* Prefixlen maps for fast conversions, by Jan Engelhardt. */ + +#ifdef E +#undef E +#endif + +#define PREFIXES_MAP \ + E(0x00000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0x80000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), + +#undef htonl +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define htonl(x) \ + ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ + (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) +#else +#define htonl(x) (x) +#endif + +#define E(a, b, c, d) \ + {.ip6 = { \ + htonl(a), htonl(b), \ + htonl(c), htonl(d), \ + } } + +/* This table works for both IPv4 and IPv6; + * just use prefixlen_netmask_map[prefixlength].ip. + */ +const union nf_inet_addr ip_set_netmask_map[] = { + PREFIXES_MAP +}; + +#undef htonl +#undef E + +#define E(a, b, c, d) \ + {.ip6 = { (__be32)a, (__be32)b, \ + (__be32)c, (__be32)d, \ + } } + +/* This table works for both IPv4 and IPv6; + * just use prefixlen_hostmask_map[prefixlength].ip. + */ +const union nf_inet_addr ip_set_hostmask_map[] = { + PREFIXES_MAP +}; + +static inline bool after(__u32 seq1, __u32 seq2) +{ + return (__s32)(seq1-seq2) > 0; +} + +/* Find the largest network which matches the range from left, in host order. */ +__u32 +ip_set_range_to_cidr(__u32 from, __u32 to, __u8 *cidr) +{ + __u32 last; + __u8 i; + + for (i = 0; i < 32; i++) { + if ((from & ip_set_hostmask(i)) != from) + continue; + last = from | ~ip_set_hostmask(i); + if (!after(last, to)) { + *cidr = i; + return last; + } + } + *cidr = 32; + return from; +} diff --git a/src/ipv4_frag.c b/src/ipv4_frag.c index ce9f06078..c1c1d7019 100644 --- a/src/ipv4_frag.c +++ b/src/ipv4_frag.c @@ -301,7 +301,7 @@ int ipv4_fragment(struct rte_mbuf *mbuf, unsigned int mtu, /* copy metadata from orig pkt */ route4_get(rt); /* no need to hold before consume mbuf */ - MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; + MBUF_USERDATA(frag, struct route_entry *, MBUF_FIELD_ROUTE) = rt; frag->port = mbuf->port; frag->ol_flags = 0; /* do not offload csum for frag */ frag->l2_len = mbuf->l2_len; diff --git a/src/ipv6/icmp6.c b/src/ipv6/icmp6.c index 20e4a2c4d..963e12979 100644 --- a/src/ipv6/icmp6.c +++ b/src/ipv6/icmp6.c @@ -33,7 +33,7 @@ static void icmp6_dump_hdr(const struct rte_mbuf *mbuf) lcoreid_t lcore = rte_lcore_id(); fprintf(stderr, "lcore %d port %d icmp type %u code %u\n", - lcore, mbuf->port, ich->icmp_type, ich->icmp_code); + lcore, mbuf->port, ich->icmp6_type, ich->icmp6_code); return; } @@ -247,7 +247,7 @@ void icmp6_send(struct rte_mbuf *imbuf, int type, int code, uint32_t info) mbuf_copy_bits(imbuf, 0, ich + 1, room); - shdr.ip6_plen = room + sizeof(struct icmp6_hdr); + shdr.ip6_plen = htons(room + sizeof(struct icmp6_hdr)); icmp6_send_csum(&shdr, ich); if ((err = ipv6_xmit(mbuf, &fl6)) != EDPVS_OK) { diff --git a/src/ipvs/ip_vs_conhash.c b/src/ipvs/ip_vs_conhash.c index 5ab1537d7..858c4b503 100644 --- a/src/ipvs/ip_vs_conhash.c +++ b/src/ipvs/ip_vs_conhash.c @@ -150,6 +150,39 @@ static void node_fini(struct node_s *node) rte_free(p_conhash_node); } +static int conhash_update_node_replicas(struct conhash_node *p_conhash_node, struct conhash_sched_data *p_sched_data, + struct dp_vs_dest *dest, int weight_gcd) +{ + int16_t weight; + struct node_s *p_node; + int ret; + char iden[64]; + char addr[INET6_ADDRSTRLEN]; + + // del from conhash + p_node = &(p_conhash_node->node); + ret = conhash_del_node(p_sched_data->conhash, p_node); + if (ret < 0) { + RTE_LOG(ERR, SERVICE, "%s: conhash_del_node failed\n", __func__); + return EDPVS_INVAL; + } + + // adjust weight + weight = rte_atomic16_read(&dest->weight); + inet_ntop(dest->af, &dest->addr, addr, sizeof(addr)); + snprintf(iden, sizeof(iden), "%s%d", addr, dest->port); + conhash_set_node(p_node, iden, weight / weight_gcd * REPLICA); + + // add to conhash again + ret = conhash_add_node(p_sched_data->conhash, p_node); + if (ret < 0) { + RTE_LOG(ERR, SERVICE, "%s: conhash_set_node failed\n", __func__); + return EDPVS_INVAL; + } + + return EDPVS_OK; +} + static int dp_vs_conhash_add_dest(struct dp_vs_service *svc, struct dp_vs_dest *dest) { @@ -161,6 +194,7 @@ static int dp_vs_conhash_add_dest(struct dp_vs_service *svc, struct conhash_node *p_conhash_node; struct conhash_sched_data *p_sched_data; int weight_gcd; + struct dp_vs_dest *p_dest; p_sched_data = (struct conhash_sched_data *)(svc->sched_data); @@ -201,6 +235,16 @@ static int dp_vs_conhash_add_dest(struct dp_vs_service *svc, // add conhash node to list list_add(&(p_conhash_node->list), &(p_sched_data->nodes)); + list_for_each_entry(p_conhash_node, &(p_sched_data->nodes), list) { + p_dest = (struct dp_vs_dest *)p_conhash_node->node.data; + weight = rte_atomic16_read(&p_dest->weight); + if (p_conhash_node->node.replicas == weight / weight_gcd * REPLICA) + continue; + if (EDPVS_OK != conhash_update_node_replicas(p_conhash_node, p_sched_data, p_dest, weight_gcd)) { + return EDPVS_INVAL; + } + } + return EDPVS_OK; } @@ -209,13 +253,18 @@ static int dp_vs_conhash_del_dest(struct dp_vs_service *svc, { int ret; struct node_s *p_node; - struct conhash_node *p_conhash_node; + struct conhash_node *p_conhash_node, *next; struct conhash_sched_data *p_sched_data; + int weight_gcd; + struct dp_vs_dest *p_dest; + int16_t weight; p_sched_data = (struct conhash_sched_data *)(svc->sched_data); + weight_gcd = dp_vs_gcd_weight(svc); - list_for_each_entry(p_conhash_node, &(p_sched_data->nodes), list) { - if (p_conhash_node->node.data == dest) { + list_for_each_entry_safe(p_conhash_node, next, &(p_sched_data->nodes), list) { + p_dest = (struct dp_vs_dest *)p_conhash_node->node.data; + if (p_dest == dest) { p_node = &(p_conhash_node->node); ret = conhash_del_node(p_sched_data->conhash, p_node); if (ret < 0) { @@ -223,60 +272,42 @@ static int dp_vs_conhash_del_dest(struct dp_vs_service *svc, return EDPVS_INVAL; } node_fini(p_node); - return EDPVS_OK; + } else { + weight = rte_atomic16_read(&p_dest->weight); + if (p_conhash_node->node.replicas == weight / weight_gcd * REPLICA) + continue; + if (EDPVS_OK != conhash_update_node_replicas(p_conhash_node, p_sched_data, p_dest, weight_gcd)) { + return EDPVS_INVAL; + } } } - return EDPVS_NOTEXIST; + return EDPVS_OK; } static int dp_vs_conhash_edit_dest(struct dp_vs_service *svc, - struct dp_vs_dest *dest) + __rte_unused struct dp_vs_dest *dest) { - int ret; - char iden[64]; - char addr[INET6_ADDRSTRLEN]; int16_t weight; - struct node_s *p_node; struct conhash_node *p_conhash_node; struct conhash_sched_data *p_sched_data; int weight_gcd; + struct dp_vs_dest *p_dest; - weight = rte_atomic16_read(&dest->weight); weight_gcd = dp_vs_gcd_weight(svc); p_sched_data = (struct conhash_sched_data *)(svc->sched_data); - // find node by addr and port list_for_each_entry(p_conhash_node, &(p_sched_data->nodes), list) { - if (p_conhash_node->node.data == dest) { - if (p_conhash_node->node.replicas == weight / weight_gcd * REPLICA) - return EDPVS_OK; - - // del from conhash - p_node = &(p_conhash_node->node); - ret = conhash_del_node(p_sched_data->conhash, p_node); - if (ret < 0) { - RTE_LOG(ERR, SERVICE, "%s: conhash_del_node failed\n", __func__); - return EDPVS_INVAL; - } - - // adjust weight - inet_ntop(dest->af, &dest->addr, addr, sizeof(addr)); - snprintf(iden, sizeof(iden), "%s%d", addr, dest->port); - conhash_set_node(p_node, iden, weight / weight_gcd * REPLICA); - - // add to conhash again - ret = conhash_add_node(p_sched_data->conhash, p_node); - if (ret < 0) { - RTE_LOG(ERR, SERVICE, "%s: conhash_set_node failed\n", __func__); - return EDPVS_INVAL; - } - - return EDPVS_OK; + p_dest = (struct dp_vs_dest *)p_conhash_node->node.data; + weight = rte_atomic16_read(&p_dest->weight); + if (p_conhash_node->node.replicas == weight / weight_gcd * REPLICA) + continue; + if (EDPVS_OK != conhash_update_node_replicas(p_conhash_node, p_sched_data, p_dest, weight_gcd)) { + return EDPVS_INVAL; } } - return EDPVS_NOTEXIST; + return EDPVS_OK; } /* diff --git a/src/ipvs/ip_vs_core.c b/src/ipvs/ip_vs_core.c index 63b09ab9a..fbc53efa8 100644 --- a/src/ipvs/ip_vs_core.c +++ b/src/ipvs/ip_vs_core.c @@ -86,6 +86,8 @@ static struct dp_vs_conn *dp_vs_sched_persist(struct dp_vs_service *svc, assert(svc && iph && mbuf); conn_flags = (is_synproxy_on ? DPVS_CONN_F_SYNPROXY : 0); + conn_flags |= (svc->flags & DPVS_CONN_F_EXPIRE_QUIESCENT); + if (svc->af == AF_INET6) { /* FIXME: Is OK to use svc->netmask as IPv6 prefix length ? */ ipv6_addr_prefix_copy(&snet.in6, &iph->saddr.in6, svc->netmask); @@ -339,12 +341,12 @@ struct dp_vs_conn *dp_vs_schedule(struct dp_vs_service *svc, ports[0], ports[1], 0, ¶m); } - if (is_synproxy_on) { + if (is_synproxy_on) flags |= DPVS_CONN_F_SYNPROXY; - } - if (svc->flags & DP_VS_SVC_F_ONEPACKET && iph->proto == IPPROTO_UDP) { + if (svc->flags & DP_VS_SVC_F_ONEPACKET && iph->proto == IPPROTO_UDP) flags |= DPVS_CONN_F_ONE_PACKET; - } + flags |= (svc->flags & DPVS_CONN_F_EXPIRE_QUIESCENT); + conn = dp_vs_conn_new(mbuf, iph, ¶m, dest, flags); if (!conn) return NULL; @@ -502,7 +504,7 @@ static int __xmit_outbound_icmp6(struct rte_mbuf *mbuf, if (mbuf->pkt_len > rt6->rt6_mtu) { route6_put(rt6); - icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, htonl(rt6->rt6_mtu)); + icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, rt6->rt6_mtu); rte_pktmbuf_free(mbuf); return EDPVS_FRAG; } @@ -616,7 +618,7 @@ static int __xmit_inbound_icmp6(struct rte_mbuf *mbuf, if (mbuf->pkt_len > rt6->rt6_mtu) { route6_put(rt6); - icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, htonl(rt6->rt6_mtu)); + icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, rt6->rt6_mtu); rte_pktmbuf_free(mbuf); return EDPVS_FRAG; } @@ -1013,17 +1015,15 @@ static int __dp_vs_in(void *priv, struct rte_mbuf *mbuf, else dir = DPVS_CONN_DIR_INBOUND; } else { - /* assert(conn->dest->svc != NULL); */ - if (conn->dest && conn->dest->svc && - prot->conn_expire_quiescent && - (conn->dest->svc->flags & DPVS_CONN_F_EXPIRE_QUIESCENT)) { - if (rte_atomic16_read(&conn->dest->weight) == 0) { - RTE_LOG(INFO, IPVS, "%s: the conn is quiescent, expire it right now," - " and drop the packet!\n", __func__); - prot->conn_expire_quiescent(conn); - dp_vs_conn_put(conn); - return INET_DROP; - } + /* assert(conn->dest != NULL); */ + if (prot->conn_expire_quiescent && (conn->flags & DPVS_CONN_F_EXPIRE_QUIESCENT) && + conn->dest && (!dp_vs_dest_is_avail(conn->dest) || + rte_atomic16_read(&conn->dest->weight) == 0)) { + RTE_LOG(INFO, IPVS, "%s: the conn is quiescent, expire it right now," + " and drop the packet!\n", __func__); + prot->conn_expire_quiescent(conn); + dp_vs_conn_put(conn); + return INET_DROP; } } diff --git a/src/ipvs/ip_vs_laddr.c b/src/ipvs/ip_vs_laddr.c index 9d4cad725..a2a330854 100644 --- a/src/ipvs/ip_vs_laddr.c +++ b/src/ipvs/ip_vs_laddr.c @@ -160,7 +160,7 @@ static inline void put_laddr(struct dp_vs_laddr *laddr) if (rte_atomic32_dec_and_test(&laddr->refcnt)) { rte_free(laddr); #ifdef CONFIG_DPVS_IPVS_DEBUG - RTE_LOG(DEBUG, IPVS, "%s: [%02d] delete laddr.\n", rte_lcore_id(), __func__); + RTE_LOG(DEBUG, IPVS, "%s: [%02d] delete laddr.\n", __func__, rte_lcore_id()); #endif } } diff --git a/src/ipvs/ip_vs_mh.c b/src/ipvs/ip_vs_mh.c index aa160519e..b14cc05af 100644 --- a/src/ipvs/ip_vs_mh.c +++ b/src/ipvs/ip_vs_mh.c @@ -77,13 +77,6 @@ static inline void generate_hash_secret(hsiphash_key_t *hash1, hash2->key[1] = 2654446892UL; } -/* Helper function to determine if server is unavailable */ -static inline bool is_unavailable(struct dp_vs_dest *dest) -{ - return rte_atomic16_read(&dest->weight) <= 0 || - dest->flags & DPVS_DEST_F_OVERLOAD; -} - /* Returns hash value for IPVS MH entry */ static inline unsigned int dp_vs_mh_hashkey(int af, const union inet_addr *addr, __be16 port, hsiphash_key_t *key, unsigned int offset) @@ -231,7 +224,7 @@ static inline struct dp_vs_dest *dp_vs_mh_get(struct dp_vs_service *svc, % DP_VS_MH_TAB_SIZE; struct dp_vs_dest *dest = s->lookup[hash].dest; - return (!dest || is_unavailable(dest)) ? NULL : dest; + return dp_vs_dest_is_valid(dest) ? dest:NULL; } /* As ip_vs_mh_get, but with fallback if selected server is unavailable */ @@ -251,7 +244,7 @@ static inline struct dp_vs_dest *dp_vs_mh_get_fallback(struct dp_vs_service *svc dest = s->lookup[ihash].dest; if (!dest) return NULL; - if (!is_unavailable(dest)) + if (dp_vs_dest_is_valid(dest)) return dest; #ifdef CONFIG_DPVS_IPVS_DEBUG @@ -271,7 +264,7 @@ static inline struct dp_vs_dest *dp_vs_mh_get_fallback(struct dp_vs_service *svc dest = s->lookup[hash].dest; if (!dest) break; - if (!is_unavailable(dest)) + if (dp_vs_dest_is_valid(dest)) return dest; #ifdef CONFIG_DPVS_IPVS_DEBUG @@ -376,10 +369,8 @@ static int dp_vs_mh_shift_weight(struct dp_vs_service *svc, int gcd) static void dp_vs_mh_state_free(struct dp_vs_mh_state *s) { - if (s) { rte_free(s->lookup); rte_free(s); - } } static int dp_vs_mh_init_svc(struct dp_vs_service *svc) @@ -426,12 +417,13 @@ static int dp_vs_mh_done_svc(struct dp_vs_service *svc) { struct dp_vs_mh_state *s = svc->sched_data; + if(s){ /* Got to clean up lookup entry here */ - dp_vs_mh_reset(s); - - dp_vs_mh_state_free(s); - RTE_LOG(DEBUG, SERVICE, "MH lookup table (memory=%zdbytes) released\n", - sizeof(struct dp_vs_mh_lookup) * DP_VS_MH_TAB_SIZE); + dp_vs_mh_reset(s); + dp_vs_mh_state_free(s); + RTE_LOG(DEBUG, SERVICE, "MH lookup table (memory=%zdbytes) released\n", + sizeof(struct dp_vs_mh_lookup) * DP_VS_MH_TAB_SIZE); + } return EDPVS_OK; } diff --git a/src/ipvs/ip_vs_proto_tcp.c b/src/ipvs/ip_vs_proto_tcp.c index 863746c67..d5778e02e 100644 --- a/src/ipvs/ip_vs_proto_tcp.c +++ b/src/ipvs/ip_vs_proto_tcp.c @@ -158,20 +158,22 @@ inline void tcp6_send_csum(struct rte_ipv6_hdr *iph, struct tcphdr *th) { } static inline int tcp_send_csum(int af, int iphdrlen, struct tcphdr *th, - const struct dp_vs_conn *conn, struct rte_mbuf *mbuf) + const struct dp_vs_conn *conn, struct rte_mbuf *mbuf, struct netif_port *dev) { /* leverage HW TX TCP csum offload if possible */ - - struct netif_port *dev = NULL; + struct netif_port *select_dev = NULL; if (AF_INET6 == af) { struct route6 *rt6 = MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE); struct ip6_hdr *ip6h = ip6_hdr(mbuf); if (rt6 && rt6->rt6_dev) - dev = rt6->rt6_dev; + select_dev = rt6->rt6_dev; + else if (dev) + select_dev = dev; else if (conn->out_dev) - dev = conn->out_dev; - if (likely(dev && (dev->flag & NETIF_PORT_FLAG_TX_TCP_CSUM_OFFLOAD))) { + select_dev = conn->out_dev; + + if (likely(select_dev && (select_dev->flag & NETIF_PORT_FLAG_TX_TCP_CSUM_OFFLOAD))) { mbuf->l3_len = iphdrlen; mbuf->l4_len = (th->doff << 2); mbuf->ol_flags |= (PKT_TX_TCP_CKSUM | PKT_TX_IPV6); @@ -185,10 +187,12 @@ static inline int tcp_send_csum(int af, int iphdrlen, struct tcphdr *th, struct route_entry *rt = MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE); struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); if (rt && rt->port) - dev = rt->port; + select_dev = rt->port; + else if (dev) + select_dev = dev; else if (conn->out_dev) - dev = conn->out_dev; - if (likely(dev && (dev->flag & NETIF_PORT_FLAG_TX_TCP_CSUM_OFFLOAD))) { + select_dev = conn->out_dev; + if (likely(select_dev && (select_dev->flag & NETIF_PORT_FLAG_TX_TCP_CSUM_OFFLOAD))) { mbuf->l3_len = iphdrlen; mbuf->l4_len = (th->doff << 2); mbuf->ol_flags |= (PKT_TX_TCP_CKSUM | PKT_TX_IP_CKSUM | PKT_TX_IPV4); @@ -731,7 +735,7 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto, th->dest = conn->dport; - return tcp_send_csum(af, iphdrlen, th, conn, mbuf); + return tcp_send_csum(af, iphdrlen, th, conn, mbuf, conn->in_dev); } static int tcp_fnat_out_handler(struct dp_vs_proto *proto, @@ -769,7 +773,7 @@ static int tcp_fnat_out_handler(struct dp_vs_proto *proto, if (th->syn && th->ack) tcp_out_init_seq(conn, th); - return tcp_send_csum(af, iphdrlen, th, conn, mbuf); + return tcp_send_csum(af, iphdrlen, th, conn, mbuf, conn->out_dev); } static int tcp_snat_in_handler(struct dp_vs_proto *proto, @@ -793,7 +797,7 @@ static int tcp_snat_in_handler(struct dp_vs_proto *proto, th->dest = conn->dport; /* L4 re-checksum */ - return tcp_send_csum(af, iphdrlen, th, conn, mbuf); + return tcp_send_csum(af, iphdrlen, th, conn, mbuf, conn->in_dev); } static int tcp_snat_out_handler(struct dp_vs_proto *proto, @@ -817,7 +821,7 @@ static int tcp_snat_out_handler(struct dp_vs_proto *proto, th->source = conn->vport; /* L4 re-checksum */ - return tcp_send_csum(af, iphdrlen, th, conn, mbuf); + return tcp_send_csum(af, iphdrlen, th, conn, mbuf, conn->out_dev); } static inline int tcp_state_idx(struct tcphdr *th) @@ -1141,8 +1145,9 @@ static int tcp_conn_expire(struct dp_vs_proto *proto, int err; assert(proto && conn && conn->dest); - if (conn->dest->fwdmode == DPVS_FWD_MODE_NAT - || conn->dest->fwdmode == DPVS_FWD_MODE_FNAT) { + if (conn->dest->fwdmode == DPVS_FWD_MODE_FNAT + || conn->dest->fwdmode == DPVS_FWD_MODE_SNAT + || conn->dest->fwdmode == DPVS_FWD_MODE_NAT) { /* send RST to RS and client */ err = tcp_send_rst(proto, conn, DPVS_CONN_DIR_INBOUND); if (err != EDPVS_OK) diff --git a/src/ipvs/ip_vs_proto_udp.c b/src/ipvs/ip_vs_proto_udp.c index b868d47e9..fb8313828 100644 --- a/src/ipvs/ip_vs_proto_udp.c +++ b/src/ipvs/ip_vs_proto_udp.c @@ -78,11 +78,10 @@ inline void udp6_send_csum(struct rte_ipv6_hdr *iph, struct rte_udp_hdr *uh) static inline int udp_send_csum(int af, int iphdrlen, struct rte_udp_hdr *uh, const struct dp_vs_conn *conn, - struct rte_mbuf *mbuf, const struct opphdr *opp) + struct rte_mbuf *mbuf, const struct opphdr *opp, struct netif_port *dev) { /* leverage HW TX UDP csum offload if possible */ - - struct netif_port *dev = NULL; + struct netif_port *select_dev = NULL; if (AF_INET6 == af) { /* UDP checksum is mandatory for IPv6.[RFC 2460] */ @@ -92,10 +91,12 @@ static inline int udp_send_csum(int af, int iphdrlen, struct rte_udp_hdr *uh, } else { struct route6 *rt6 = MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE); if (rt6 && rt6->rt6_dev) - dev = rt6->rt6_dev; + select_dev = rt6->rt6_dev; + else if (dev) + select_dev = dev; else if (conn->out_dev) - dev = conn->out_dev; - if (likely(dev && (dev->flag & NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD))) { + select_dev = conn->out_dev; + if (likely(select_dev && (select_dev->flag & NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD))) { mbuf->l3_len = iphdrlen; mbuf->l4_len = sizeof(struct rte_udp_hdr); mbuf->ol_flags |= (PKT_TX_UDP_CKSUM | PKT_TX_IPV6); @@ -125,10 +126,12 @@ static inline int udp_send_csum(int af, int iphdrlen, struct rte_udp_hdr *uh, } else { struct route_entry *rt = MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE); if (rt && rt->port) - dev = rt->port; + select_dev = rt->port; + else if (dev) + select_dev = dev; else if (conn->out_dev) - dev = conn->out_dev; - if (likely(dev && (dev->flag & NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD))) { + select_dev = conn->out_dev; + if (likely(select_dev && (select_dev->flag & NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD))) { mbuf->l3_len = iphdrlen; mbuf->l4_len = sizeof(struct rte_udp_hdr); mbuf->ol_flags |= (PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM | PKT_TX_IPV4); @@ -629,10 +632,14 @@ static int udp_insert_uoa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf, if (AF_INET6 == tuplehash_out(conn).af) { mtu = ((struct route6*)rt)->rt6_mtu; + } else { + mtu = ((struct route_entry*) rt)->mtu; + } + + if (AF_INET6 == conn->af) { iph = ip6_hdr(mbuf); iphdrlen = ip6_hdrlen(mbuf); } else { - mtu = ((struct route_entry*) rt)->mtu; iph = (struct iphdr *)ip4_hdr(mbuf); iphdrlen = ip4_hdrlen(mbuf); } @@ -718,7 +725,7 @@ static int udp_fnat_in_handler(struct dp_vs_proto *proto, uh->src_port = conn->lport; uh->dst_port = conn->dport; - return udp_send_csum(af, iphdrlen, uh, conn, mbuf, opp); + return udp_send_csum(af, iphdrlen, uh, conn, mbuf, opp, conn->in_dev); } static int udp_fnat_out_handler(struct dp_vs_proto *proto, @@ -740,7 +747,7 @@ static int udp_fnat_out_handler(struct dp_vs_proto *proto, uh->src_port = conn->vport; uh->dst_port = conn->cport; - return udp_send_csum(af, iphdrlen, uh, conn, mbuf, NULL); + return udp_send_csum(af, iphdrlen, uh, conn, mbuf, NULL, conn->out_dev); } static int udp_fnat_in_pre_handler(struct dp_vs_proto *proto, @@ -772,7 +779,7 @@ static int udp_snat_in_handler(struct dp_vs_proto *proto, uh->dst_port = conn->dport; - return udp_send_csum(af, iphdrlen, uh, conn, mbuf, NULL); + return udp_send_csum(af, iphdrlen, uh, conn, mbuf, NULL, conn->in_dev); } static int udp_snat_out_handler(struct dp_vs_proto *proto, @@ -792,7 +799,7 @@ static int udp_snat_out_handler(struct dp_vs_proto *proto, uh->src_port = conn->vport; - return udp_send_csum(af, iphdrlen, uh, conn, mbuf, NULL); + return udp_send_csum(af, iphdrlen, uh, conn, mbuf, NULL, conn->out_dev); } struct dp_vs_proto dp_vs_proto_udp = { diff --git a/src/ipvs/ip_vs_service.c b/src/ipvs/ip_vs_service.c index 424f0eab5..2ca7f4e7f 100644 --- a/src/ipvs/ip_vs_service.c +++ b/src/ipvs/ip_vs_service.c @@ -36,7 +36,6 @@ #include "netif.h" #include "assert.h" #include "neigh.h" -#include "ipset.h" static rte_atomic16_t dp_vs_num_services[DPVS_MAX_LCORE]; @@ -218,11 +217,7 @@ __dp_vs_service_match_get4(const struct rte_mbuf *mbuf, bool *outwall, lcoreid_t if ((rt->flag & RTF_KNI) || (rt->flag & RTF_LOCALIN)) return NULL; oif = rt->port->id; - } else if (outwall != NULL && (NULL != ipset_addr_lookup(AF_INET, &daddr)) - && (rt = route_gfw_net_lookup(&daddr.in))) { - char dst[64]; - RTE_LOG(DEBUG, IPSET, "%s: IP %s is in the gfwip set, found route in the outwall table.\n", __func__, - inet_ntop(AF_INET, &daddr, dst, sizeof(dst))? dst: ""); + } else if (outwall != NULL && (rt = route_gfw_net_lookup(&daddr.in))) { oif = rt->port->id; route4_put(rt); *outwall = true; diff --git a/src/ipvs/ip_vs_synproxy.c b/src/ipvs/ip_vs_synproxy.c index 19e3003f5..561fe2c3a 100644 --- a/src/ipvs/ip_vs_synproxy.c +++ b/src/ipvs/ip_vs_synproxy.c @@ -39,6 +39,7 @@ #define DP_VS_SYNPROXY_SACK_DEFAULT 1 #define DP_VS_SYNPROXY_WSCALE_DEFAULT 0 #define DP_VS_SYNPROXY_TIMESTAMP_DEFAULT 0 +#define DP_VS_SYNPROXY_CLWND_DEFAULT 1 #define DP_VS_SYNPROXY_DEFER_DEFAULT 0 #define DP_VS_SYNPROXY_DUP_ACK_DEFAULT 10 #define DP_VS_SYNPROXY_MAX_ACK_SAVED_DEFAULT 3 @@ -54,6 +55,7 @@ int dp_vs_synproxy_ctrl_sack = DP_VS_SYNPROXY_SACK_DEFAULT; int dp_vs_synproxy_ctrl_wscale = DP_VS_SYNPROXY_WSCALE_DEFAULT; int dp_vs_synproxy_ctrl_timestamp = DP_VS_SYNPROXY_TIMESTAMP_DEFAULT; int dp_vs_synproxy_ctrl_synack_ttl = DP_VS_SYNPROXY_TTL_DEFAULT; +int dp_synproxy_ctrl_clwnd = DP_VS_SYNPROXY_CLWND_DEFAULT; int dp_vs_synproxy_ctrl_defer = DP_VS_SYNPROXY_DEFER_DEFAULT; int dp_vs_synproxy_ctrl_conn_reuse = DP_VS_SYNPROXY_CONN_REUSE_DEFAULT; int dp_vs_synproxy_ctrl_conn_reuse_cl = DP_VS_SYNPROXY_CONN_REUSE_CL_DEFAULT; @@ -610,8 +612,9 @@ static void syn_proxy_reuse_mbuf(int af, struct rte_mbuf *mbuf, tmpport = th->dest; th->dest = th->source; th->source = tmpport; - /* set window size to zero */ - th->window = 0; + /* set window size to zero if enabled */ + if (dp_synproxy_ctrl_clwnd && !dp_vs_synproxy_ctrl_defer) + th->window = 0; /* set seq(cookie) and ack_seq */ th->ack_seq = htonl(ntohl(th->seq) + 1); th->seq = htonl(isn); @@ -773,8 +776,8 @@ int dp_vs_synproxy_syn_rcv(int af, struct rte_mbuf *mbuf, static inline int syn_proxy_ack_has_data(struct rte_mbuf *mbuf, const struct dp_vs_iphdr *iph, struct tcphdr *th) { - RTE_LOG(DEBUG, IPVS, "tot_len = %u, iph_len = %u, tcph_len = %u\n", - mbuf->pkt_len, iph->len, th->doff * 4); + RTE_LOG(DEBUG, IPVS, "%s: tot_len = %u, iph_len = %u, tcph_len = %u\n", + __func__, mbuf->pkt_len, iph->len, th->doff * 4); return (mbuf->pkt_len - iph->len - th->doff * 4) != 0; } @@ -1455,7 +1458,7 @@ int dp_vs_synproxy_synack_rcv(struct rte_mbuf *mbuf, struct dp_vs_conn *cp, * The probe will be forward to RS and RS will respond a window update. * So DPVS has no need to send a window update. */ - if (cp->ack_num == 1) + if (dp_synproxy_ctrl_clwnd && !dp_vs_synproxy_ctrl_defer && cp->ack_num <= 1) syn_proxy_send_window_update(tuplehash_out(cp).af, mbuf, cp, pp, th); list_for_each_entry_safe(tmbuf, tmbuf2, &cp->ack_mbuf, list) { @@ -1798,6 +1801,12 @@ static void synack_timestamp_handler(vector_t tokens) dp_vs_synproxy_ctrl_timestamp = 1; } +static void close_client_window_handler(vector_t tokens) +{ + RTE_LOG(INFO, IPVS, "close_client_window ON\n"); + dp_synproxy_ctrl_clwnd = 1; +} + static void defer_rs_syn_handler(vector_t tokens) { RTE_LOG(INFO, IPVS, "synproxy_defer_rs_syn ON\n"); @@ -1907,6 +1916,7 @@ void synproxy_keyword_value_init(void) dp_vs_synproxy_ctrl_wscale = 0; dp_vs_synproxy_ctrl_timestamp = 0; dp_vs_synproxy_ctrl_synack_ttl = DP_VS_SYNPROXY_TTL_DEFAULT; + dp_synproxy_ctrl_clwnd = 0; dp_vs_synproxy_ctrl_defer = 0; dp_vs_synproxy_ctrl_conn_reuse = 0; dp_vs_synproxy_ctrl_conn_reuse_cl = 0; @@ -1934,6 +1944,7 @@ void install_synproxy_keywords(void) install_keyword("timestamp", synack_timestamp_handler, KW_TYPE_NORMAL); install_sublevel_end(); + install_keyword("close_client_window", close_client_window_handler, KW_TYPE_NORMAL); install_keyword("defer_rs_syn", defer_rs_syn_handler, KW_TYPE_NORMAL); install_keyword("rs_syn_max_retry", rs_syn_max_retry_handler, KW_TYPE_NORMAL); install_keyword("ack_storm_thresh", ack_storm_thresh_handler, KW_TYPE_NORMAL); diff --git a/src/ipvs/ip_vs_whtlst.c b/src/ipvs/ip_vs_whtlst.c index b017af0f3..116b6c0cf 100644 --- a/src/ipvs/ip_vs_whtlst.c +++ b/src/ipvs/ip_vs_whtlst.c @@ -213,7 +213,7 @@ static int dp_vs_whtlst_del(int af, uint8_t proto, const union inet_addr *vaddr, } /*del whtlst ip on all slave lcores*/ - msg = msg_make(MSG_TYPE_WHTLST_DEL, 0, DPVS_MSG_MULTICAST, + msg = msg_make(MSG_TYPE_WHTLST_DEL, whtlst_msg_seq(), DPVS_MSG_MULTICAST, cid, sizeof(struct dp_vs_whtlst_conf), &cf); if (!msg) return EDPVS_NOMEM; diff --git a/src/ipvs/ip_vs_xmit.c b/src/ipvs/ip_vs_xmit.c index 3208eeb1e..6e557493c 100644 --- a/src/ipvs/ip_vs_xmit.c +++ b/src/ipvs/ip_vs_xmit.c @@ -858,7 +858,7 @@ static int __dp_vs_out_xmit_fnat6(struct dp_vs_proto *proto, mtu = rt6->rt6_mtu; if (mbuf->pkt_len > mtu) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); - icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, htonl(mtu)); + icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, mtu); err = EDPVS_FRAG; goto errout; } @@ -1304,7 +1304,7 @@ static int __dp_vs_xmit_dr6(struct dp_vs_proto *proto, mtu = rt6->rt6_mtu; if (mbuf->pkt_len > mtu) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); - icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, htonl(mtu)); + icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, mtu); err = EDPVS_FRAG; goto errout; } @@ -1456,7 +1456,7 @@ static int __dp_vs_xmit_snat6(struct dp_vs_proto *proto, mtu = rt6->rt6_mtu; if (mbuf->pkt_len > mtu) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); - icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, htonl(mtu)); + icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, mtu); err = EDPVS_FRAG; goto errout; } @@ -1703,7 +1703,7 @@ static int __dp_vs_out_xmit_snat6(struct dp_vs_proto *proto, if (mbuf->pkt_len > rt6->rt6_mtu) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); - icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, htonl(rt6->rt6_mtu)); + icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, rt6->rt6_mtu); err = EDPVS_FRAG; goto errout; } @@ -1872,7 +1872,7 @@ static int __dp_vs_xmit_nat6(struct dp_vs_proto *proto, mtu = rt6->rt6_mtu; if (mbuf->pkt_len > mtu) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); - icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, htonl(mtu)); + icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, mtu); err = EDPVS_FRAG; goto errout; } @@ -2044,7 +2044,7 @@ static int __dp_vs_out_xmit_nat6(struct dp_vs_proto *proto, mtu = rt6->rt6_mtu; if (mbuf->pkt_len > mtu) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); - icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, htonl(mtu)); + icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, mtu); err = EDPVS_FRAG; goto errout; } @@ -2108,6 +2108,7 @@ static int __dp_vs_xmit_tunnel4(struct dp_vs_proto *proto, uint8_t tos = old_iph->type_of_service; uint16_t df = old_iph->fragment_offset & htons(RTE_IPV4_HDR_DF_FLAG); int err, mtu; + uint32_t ip4h_len = sizeof(struct rte_ipv4_hdr); /* * drop old route. just for safe, because @@ -2133,7 +2134,15 @@ static int __dp_vs_xmit_tunnel4(struct dp_vs_proto *proto, mtu = rt->mtu; MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; - new_iph = (struct rte_ipv4_hdr*)rte_pktmbuf_prepend(mbuf, sizeof(struct rte_ipv4_hdr)); + if (mbuf->pkt_len + ip4h_len > mtu && df) { + RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); + icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, + htonl(mtu - ip4h_len)); + err = EDPVS_FRAG; + goto errout; + } + + new_iph = (struct rte_ipv4_hdr*)rte_pktmbuf_prepend(mbuf, ip4h_len); if (!new_iph) { RTE_LOG(WARNING, IPVS, "%s: mbuf has not enough headroom" " space for ipvs tunnel\n", __func__); @@ -2141,15 +2150,7 @@ static int __dp_vs_xmit_tunnel4(struct dp_vs_proto *proto, goto errout; } - if (mbuf->pkt_len > mtu && df) { - RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); - icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, - htonl(rt->mtu)); - err = EDPVS_FRAG; - goto errout; - } - - memset(new_iph, 0, sizeof(struct rte_ipv4_hdr)); + memset(new_iph, 0, ip4h_len); new_iph->version_ihl = 0x45; new_iph->type_of_service = tos; new_iph->total_length = htons(mbuf->pkt_len); @@ -2189,6 +2190,7 @@ static int __dp_vs_xmit_tunnel6(struct dp_vs_proto *proto, struct ip6_hdr *new_ip6h, *old_ip6h = ip6_hdr(mbuf); struct route6 *rt6; int err, mtu; + uint32_t ip6h_len = sizeof(struct ip6_hdr); /* * drop old route. just for safe, because @@ -2213,7 +2215,14 @@ static int __dp_vs_xmit_tunnel6(struct dp_vs_proto *proto, mtu = rt6->rt6_mtu; MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt6; - new_ip6h = (struct ip6_hdr*)rte_pktmbuf_prepend(mbuf, sizeof(struct ip6_hdr)); + if (mbuf->pkt_len + ip6h_len > mtu) { + RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); + icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, mtu - ip6h_len); + err = EDPVS_FRAG; + goto errout; + } + + new_ip6h = (struct ip6_hdr*)rte_pktmbuf_prepend(mbuf, ip6h_len); if (!new_ip6h) { RTE_LOG(WARNING, IPVS, "%s: mbuf has not enough headroom" " space for ipvs tunnel\n", __func__); @@ -2221,16 +2230,9 @@ static int __dp_vs_xmit_tunnel6(struct dp_vs_proto *proto, goto errout; } - if (mbuf->pkt_len > mtu) { - RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); - icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, htonl(mtu)); - err = EDPVS_FRAG; - goto errout; - } - - memset(new_ip6h, 0, sizeof(struct ip6_hdr)); + memset(new_ip6h, 0, ip6h_len); new_ip6h->ip6_flow = old_ip6h->ip6_flow; - new_ip6h->ip6_plen = htons(mbuf->pkt_len - sizeof(struct ip6_hdr)); + new_ip6h->ip6_plen = htons(mbuf->pkt_len - ip6h_len); new_ip6h->ip6_nxt = IPPROTO_IPV6; new_ip6h->ip6_hops = old_ip6h->ip6_hops; @@ -2264,6 +2266,7 @@ static int __dp_vs_xmit_tunnel_6o4(struct dp_vs_proto *proto, struct route_entry *rt; struct rte_ipv4_hdr *new_iph; struct ip6_hdr *old_ip6h = ip6_hdr(mbuf); + uint32_t ip4h_len = sizeof(struct rte_ipv4_hdr); /* * drop old route. just for safe, because @@ -2289,7 +2292,14 @@ static int __dp_vs_xmit_tunnel_6o4(struct dp_vs_proto *proto, mtu = rt->mtu; MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; - new_iph = (struct rte_ipv4_hdr*)rte_pktmbuf_prepend(mbuf, sizeof(struct rte_ipv4_hdr)); + if (mbuf->pkt_len + ip4h_len > mtu) { + RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); + icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, mtu - ip4h_len); + err = EDPVS_FRAG; + goto errout; + } + + new_iph = (struct rte_ipv4_hdr*)rte_pktmbuf_prepend(mbuf, ip4h_len); if (!new_iph) { RTE_LOG(WARNING, IPVS, "%s: mbuf has not enough headroom" " space for ipvs tunnel\n", __func__); @@ -2297,14 +2307,7 @@ static int __dp_vs_xmit_tunnel_6o4(struct dp_vs_proto *proto, goto errout; } - if (mbuf->pkt_len > mtu) { - RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); - icmp6_send(mbuf, ICMP6_PACKET_TOO_BIG, 0, htonl(mtu)); - err = EDPVS_FRAG; - goto errout; - } - - memset(new_iph, 0, sizeof(struct rte_ipv4_hdr)); + memset(new_iph, 0, ip4h_len); new_iph->version_ihl = 0x45; new_iph->type_of_service = 0; new_iph->total_length = htons(mbuf->pkt_len); diff --git a/src/kni.c b/src/kni.c index 475f2fdcf..c0dcb7726 100644 --- a/src/kni.c +++ b/src/kni.c @@ -35,7 +35,11 @@ #include "dpdk.h" #include "netif.h" #include "netif_addr.h" +#include "ctrl.h" #include "kni.h" +#include "vlan.h" +#include "conf/kni.h" +#include "conf/sockopts.h" #define Kni /* KNI is defined */ #define RTE_LOGTYPE_Kni RTE_LOGTYPE_USER1 @@ -383,6 +387,7 @@ int kni_add_dev(struct netif_port *dev, const char *kniname) dev->kni.addr = dev->addr; dev->kni.kni = kni; dev->kni.rx_ring = rb; + INIT_LIST_HEAD(&dev->kni.kni_flows); return EDPVS_OK; } @@ -398,6 +403,287 @@ int kni_del_dev(struct netif_port *dev) return EDPVS_OK; } +/////////////// KNI FLOW ////////////// + +/* + * Kni Address Flow: + * The idea is to specify kni interface with an ip address, and isolate all traffic + * target at the address to a dedicated nic rx-queue, which may avoid disturbances + * of dataplane when overload. + * Note that not all nic can support this flow type under the premise of sapool. + * See `check_kni_addr_flow_support` for supported nics as we known so far. It's + * encouraged to add more nic types satisfied the flow type. + */ + +#define NETDEV_IXGBE_DRIVER_NAME "ixgbe" +#define NETDEV_I40E_DRIVER_NAME "i40e" +#define NETDEV_MLNX_DRIVER_NAME "mlx5" + +static bool check_kni_addr_flow_support(const struct netif_port *dev) +{ + if (dev->type == PORT_TYPE_BOND_MASTER) { + int i; + for (i = 0; i < dev->bond->master.slave_nb; i++) { + if (!check_kni_addr_flow_support(dev->bond->master.slaves[i])) + return false; + } + return true; + } else if (dev->type == PORT_TYPE_VLAN) { + const struct vlan_dev_priv *vlan = netif_priv_const(dev); + assert(vlan && vlan->real_dev); + return check_kni_addr_flow_support(vlan->real_dev); + } + + // PMD drivers support kni address flow + // - mlx5 + // - ixgbe + // - ... + // PMD drivers do NOT support kni address flow + // - ... + if (strstr(dev->dev_info.driver_name, NETDEV_MLNX_DRIVER_NAME)) + return true; + if (strstr(dev->dev_info.driver_name, NETDEV_IXGBE_DRIVER_NAME)) + return true; + + // TODO:check and then add more supported types + + return false; +} + +static inline int kni_addr_flow_allowed(const struct netif_port *dev) +{ + if (!g_kni_lcore_id) + return EDPVS_DISABLED; + + if (dev->type != PORT_TYPE_GENERAL + && dev->type != PORT_TYPE_VLAN + && dev->type != PORT_TYPE_BOND_MASTER) { + RTE_LOG(WARNING, KNI, "%s: kni addr flow only supports physical (exclusive" + " of bonding slaves), vlan, and bonding master devices\n", __func__); + return EDPVS_NOTSUPP; + } + + if (!check_kni_addr_flow_support(dev)) { + RTE_LOG(WARNING, KNI, "%s: %s (driver: %s) doesn't support kni address flow, steer kni " + "traffic onto slave workers\n", __func__, dev->name, dev->dev_info.driver_name); + return EDPVS_NOTSUPP; + } + + return EDPVS_OK; +} + +static struct kni_addr_flow* kni_addr_flow_lookup(const struct netif_port *dev, + const struct kni_addr_flow_entry *param) { + struct kni_addr_flow *flow; + if (unlikely(!param || !dev)) + return NULL; + + list_for_each_entry(flow, &dev->kni.kni_flows, node) { + if (flow->af == param->af && + inet_addr_equal(flow->af, &flow->addr, ¶m->addr)) + return flow; + } + return NULL; +} + +static int kni_addr_flow_add(struct netif_port *dev, const struct kni_addr_flow_entry *param) +{ + int err; + struct kni_addr_flow *flow; + struct netif_flow_handler_param flow_handlers; + + if ((err = kni_addr_flow_allowed(dev)) != EDPVS_OK) + return err; + + if (kni_addr_flow_lookup(dev, param)) + return EDPVS_EXIST; + + flow = rte_malloc("kni_addr_flow", sizeof(struct kni_addr_flow), RTE_CACHE_LINE_SIZE); + if (unlikely(flow == NULL)) + return EDPVS_NOMEM; + flow->af = param->af; + flow->addr = param->addr; + flow->dev = dev; + flow->kni_worker = g_kni_lcore_id; + + flow_handlers.size = NELEMS(flow->flows), + flow_handlers.flow_num = 0, + flow_handlers.handlers = &flow->flows[0], + err = netif_kni_flow_add(dev, flow->kni_worker, flow->af, &flow->addr, &flow_handlers); + if (err != EDPVS_OK) { + rte_free(flow); + return err; + } + flow->nflows = flow_handlers.flow_num; + + list_add(&flow->node, &dev->kni.kni_flows); + + return EDPVS_OK; +} + +static int kni_addr_flow_del(struct netif_port *dev, const struct kni_addr_flow_entry *param) +{ + int err; + struct kni_addr_flow *flow; + struct netif_flow_handler_param flow_handlers; + + if ((err = kni_addr_flow_allowed(dev)) != EDPVS_OK) + return err; + + flow = kni_addr_flow_lookup(dev, param); + if (!flow) + return EDPVS_NOTEXIST; + + list_del(&flow->node); + + flow_handlers.size = NELEMS(flow->flows); + flow_handlers.flow_num = flow->nflows; + flow_handlers.handlers = &flow->flows[0]; + err = netif_kni_flow_del(dev, flow->kni_worker, flow->af, &flow->addr, &flow_handlers); + if (err != EDPVS_OK) { + list_add(&flow->node, &dev->kni.kni_flows); + return err; + } + + rte_free(flow); + return EDPVS_OK; +} + +static int kni_addr_flow_flush(struct netif_port *dev) +{ + int err, retval = EDPVS_OK; + struct kni_addr_flow *flow, *next; + struct netif_flow_handler_param flow_handlers; + + if ((err = kni_addr_flow_allowed(dev)) != EDPVS_OK) + return err; + + list_for_each_entry_safe(flow, next, &dev->kni.kni_flows, node) { + list_del(&flow->node); + flow_handlers.size = NELEMS(flow->flows); + flow_handlers.flow_num = flow->nflows; + flow_handlers.handlers = &flow->flows[0]; + err = netif_kni_flow_del(dev, flow->kni_worker, flow->af, &flow->addr, &flow_handlers); + if (err != EDPVS_OK) { + retval = err; + list_add(&flow->node, &dev->kni.kni_flows); + } else { + rte_free(flow); + } + } + + return retval; +} + +static void inline kni_addr_flow_fill_entry(const struct kni_addr_flow *flow, + struct kni_conf_param *entry) { + snprintf(entry->ifname, sizeof(entry->ifname), "%s", flow->dev->name); + entry->type = KNI_DTYPE_ADDR_FLOW; + entry->data.flow.af = flow->af; + entry->data.flow.addr = flow->addr; +} + +static int kni_addr_flow_get(struct netif_port *dev, const struct kni_addr_flow_entry *param, + struct kni_info **pentries, int *plen) +{ + int i, n, err; + size_t memlen; + struct kni_addr_flow *flow; + struct kni_info *info; + + if ((err = kni_addr_flow_allowed(dev)) != EDPVS_OK) + return err; + + i = 0; + n = list_elems(&dev->kni.kni_flows); + memlen = sizeof(struct kni_info) + n * sizeof(struct kni_conf_param); + info = rte_calloc("kni_addr_flow_get", 1, memlen, RTE_CACHE_LINE_SIZE); + if (unlikely(!info)) + return EDPVS_NOMEM; + + list_for_each_entry(flow, &dev->kni.kni_flows, node) { + assert(i < n); + kni_addr_flow_fill_entry(flow, &info->entries[i++]); + } + assert(i == n); + info->len = n; + + *plen = memlen; + *pentries = info; + return EDPVS_OK; +} + +/////////////// KNI FLOW END ////////////// + +static int kni_sockopt_set(sockoptid_t opt, const void *conf, size_t size) +{ + const struct kni_conf_param *param = conf; + struct netif_port *dev; + + if (!conf || size < sizeof(struct kni_conf_param)) + return EDPVS_INVAL; + + if (param->type != KNI_DTYPE_ADDR_FLOW) + return EDPVS_NOTSUPP; + + dev = netif_port_get_by_name(param->ifname); + if (!dev) + return EDPVS_NOTEXIST; + + switch (opt) { + case SOCKOPT_SET_KNI_ADD: + return kni_addr_flow_add(dev, ¶m->data.flow); + case SOCKOPT_SET_KNI_DEL: + return kni_addr_flow_del(dev, ¶m->data.flow); + case SOCKOPT_SET_KNI_FLUSH: + return kni_addr_flow_flush(dev); + default: + return EDPVS_NOTSUPP; + } + + return EDPVS_OK; +} + +static int kni_sockopt_get(sockoptid_t opt, const void *conf, size_t size, + void **out, size_t *outsize) +{ + int err, len = 0; + struct netif_port *dev; + struct kni_info *info = NULL; + const struct kni_conf_param *param = conf; + + if (!conf || size < sizeof(struct kni_conf_param) || !out || !outsize) + return EDPVS_INVAL; + + if (opt != SOCKOPT_GET_KNI_LIST) + return EDPVS_NOTSUPP; + + if (param->type != KNI_DTYPE_ADDR_FLOW) + return EDPVS_NOTSUPP; + + dev = netif_port_get_by_name(param->ifname); + if (!dev) + return EDPVS_NOTEXIST; + + err = kni_addr_flow_get(dev, ¶m->data.flow, &info, &len); + if (err != EDPVS_OK) + return err; + + *out = info; + *outsize = len; + return EDPVS_OK; +} + +static struct dpvs_sockopts kni_sockopts = { + .version = SOCKOPT_VERSION, + .set_opt_min = SOCKOPT_SET_KNI_ADD, + .set_opt_max = SOCKOPT_SET_KNI_FLUSH, + .set = kni_sockopt_set, + .get_opt_min = SOCKOPT_GET_KNI_LIST, + .get_opt_max = SOCKOPT_GET_KNI_LIST, + .get = kni_sockopt_get, +}; + int kni_init(void) { int i; @@ -415,3 +701,31 @@ int kni_init(void) return EDPVS_OK; } + +int kni_ctrl_init(void) +{ + int err; + + err = sockopt_register(&kni_sockopts); + if (err != EDPVS_OK) { + RTE_LOG(ERR, KNI, "%s: fail to register kni_sockopts -- %s\n", + __func__, dpvs_strerror(err)); + return err; + } + + return EDPVS_OK; +} + +int kni_ctrl_term(void) +{ + int err; + + err = sockopt_unregister(&kni_sockopts); + if (err != EDPVS_OK) { + RTE_LOG(ERR, KNI, "%s: fail to unregister kni_sockopts -- %s\n", + __func__, dpvs_strerror(err)); + return err; + } + + return EDPVS_OK; +} diff --git a/src/log.c b/src/log.c index e0b9dd2d7..6dc6f83cf 100644 --- a/src/log.c +++ b/src/log.c @@ -26,18 +26,39 @@ #include "dpdk.h" #include "global_data.h" -int g_dpvs_log_thread_ready = 0; -int g_dpvs_log_time_off = 0; -int log_internal = LOG_INTERNAL_TIME; -log_buf_t w_buf; -lcoreid_t g_dpvs_log_core = 0; -log_stats_t log_stats_info[DPVS_MAX_LCORE]; -struct rte_ring *log_ring; -bool g_dpvs_log_async_mode = 0; +#define DPVS_LOG_RING_SIZE 4096 + +#define LOG_INTERNAL_TIME 5 +#define LOG_SLOW_INTERNAL_TIME (60*10) + +#define TIMEZONE 0 +#define DAY (60*60*24) +#define YEARFIRST 2001 +#define YEARSTART (365*(YEARFIRST-1970) + 8) +#define YEAR400 (365*4*100 + (4*(100/4 - 1) + 1)) +#define YEAR100 (365*100 + (100/4 - 1)) +#define YEAR004 (365*4 + 1) +#define YEAR001 365 + +bool g_dpvs_log_async_mode = false; +lcoreid_t g_dpvs_log_core = DPVS_MAX_LCORE; +uint8_t g_dpvs_log_tslen = 0; // timestamp length + +static bool dpvs_log_thread_ready = false; +static int log_internal = LOG_INTERNAL_TIME; +static log_buf_t w_buf; +static log_stats_t log_stats_info[DPVS_MAX_LCORE]; +static struct rte_ring *log_ring; static struct rte_mempool *dp_vs_log_pool; static int log_pool_size = DPVS_LOG_POOL_SIZE_DEF; static int log_pool_cache = DPVS_LOG_CACHE_SIZE_DEF; +void dpvs_set_log_pool_size(int size) +{ + assert(size > DPVS_LOG_POOL_SIZE_MIN); + log_pool_size = size; +} + static int log_send(struct dpvs_log *msg) { int res; @@ -59,13 +80,16 @@ static inline void dpvs_log_thread_lcore_set(lcoreid_t core_num) } static struct dpvs_log *dpvs_log_msg_make(int level, int type, lcoreid_t cid, - uint32_t len, const void *data) + size_t len, const void *data) { struct dpvs_log *log_msg; - if (unlikely(rte_mempool_get(dp_vs_log_pool, (void **)&log_msg) != 0)) { + if (unlikely(rte_mempool_get(dp_vs_log_pool, (void **)&log_msg) != 0)) return NULL; - } + + if (unlikely(len + sizeof(struct dpvs_log) > DPVS_LOG_MAX_LINE_LEN)) + len = DPVS_LOG_MAX_LINE_LEN - sizeof(struct dpvs_log); // truncated + log_msg->log_level = level; log_msg->log_type = type; log_msg->cid = cid; @@ -88,8 +112,7 @@ static unsigned int log_BKDRHash(char *str, int len) unsigned int seed = 131; // 31 131 1313 13131 131313 etc.. unsigned int hash = 0; - while (len--) - { + while (len--) { hash = hash * seed + (*str++); } @@ -107,32 +130,28 @@ static uint64_t log_get_time(char *time, int time_len) int i; tm = sys_current_time(); - sec = tm + (60*60)*TIMEZONE; - ad = sec/DAY; + sec = tm + (60 * 60) * TIMEZONE; + ad = sec / DAY; ad = ad - YEARSTART; - y400 = ad/YEAR400; - y100 = (ad - y400*YEAR400)/YEAR100; - y004 = (ad - y400*YEAR400 - y100*YEAR100)/YEAR004; - y001 = (ad - y400*YEAR400 - y100*YEAR100 - y004*YEAR004)/YEAR001; - yy = y400*4*100 + y100*100 + y004*4 + y001*1 + YEARFIRST; - dd = (ad - y400*YEAR400 - y100*YEAR100 - y004*YEAR004)%YEAR001 + 1; - - if(0 == yy%1000) - { - if(0 == (yy/1000)%4) - { + y400 = ad / YEAR400; + y100 = (ad - y400 * YEAR400) / YEAR100; + y004 = (ad - y400 * YEAR400 - y100 * YEAR100) / YEAR004; + y001 = (ad - y400 * YEAR400 - y100 * YEAR100 - y004 * YEAR004) / YEAR001; + yy = y400 * 4 * 100 + y100 * 100 + y004 * 4 + y001 * 1 + YEARFIRST; + dd = (ad - y400 * YEAR400 - y100 * YEAR100 - y004 * YEAR004) % YEAR001 + 1; + + if (0 == yy % 1000) { + if (0 == (yy / 1000) % 4 ) { m[1] = 29; } } else { - if(0 == yy%4) - { + if (0 == yy % 4) { m[1] = 29; } } - for(i = 0; i < 12; i++) - { - if(dd - m[i] <= 0) - { + + for (i = 0; i < 12; i++) { + if(dd - m[i] <= 0) { break; } else { dd = dd -m[i]; @@ -140,66 +159,65 @@ static uint64_t log_get_time(char *time, int time_len) } mm = i + 1; - hh = sec/(60*60)%24; - mi = sec/60 - sec/(60*60)*60; - ss = sec - sec/60*60; - snprintf(time, time_len, "%d-%02d-%02d %02d:%02d:%02d\n", yy, mm, dd, hh, mi, ss); + hh = sec / (60 * 60) % 24; + mi = sec / 60 - sec / (60 * 60) * 60; + ss = sec - sec / 60 * 60; + snprintf(time, time_len, "%04d-%02d-%02d %02d:%02d:%02d", yy, mm, dd, hh, mi, ss); return tm; } -static int dpvs_async_log(uint32_t level, uint32_t logtype, lcoreid_t cid, char *log, int len, int off) +static int dpvs_async_log(uint32_t level, uint32_t logtype, + uint64_t now, lcoreid_t cid, char *log, size_t len, size_t tslen) { - struct dpvs_log *msg = NULL; - int log_hash_new; - int err; - - log_hash_new = log_BKDRHash(log+off, len); - if (log_hash_new == log_stats_info[cid].log_hash - && (rte_get_timer_cycles() - log_stats_info[cid].log_begin) - < log_internal * g_cycles_per_sec) { + struct dpvs_log *msg; + unsigned int log_hash_new; + + log_hash_new = log_BKDRHash(log + tslen, len); + if ((log_hash_new == log_stats_info[cid].log_hash) && + (now - log_stats_info[cid].log_begin < log_internal * g_cycles_per_sec)) { log_stats_info[cid].missed++; - return -1; + return EDPVS_EXIST; } + /* add time info and send out to log ring */ - if (off) { + if (tslen) { log_get_time(log, LOG_SYS_TIME_LEN); - log[LOG_SYS_TIME_LEN-1] = ' '; + log[LOG_SYS_TIME_LEN - 1] = ' '; len += LOG_SYS_TIME_LEN; } log_stats_info[cid].log_hash = log_hash_new; - log_stats_info[cid].log_begin = rte_get_timer_cycles(); + log_stats_info[cid].log_begin = now; msg = dpvs_log_msg_make(level, logtype, cid, len, log); - if (msg == NULL) - return -1; + if (unlikely(!msg)) + return EDPVS_NOMEM; - err = log_send(msg); - if (err != EDPVS_OK) { + if (log_send(msg) != EDPVS_OK) { dpvs_log_free(msg); /* log ring is full, need to set limit rate */ - fprintf(stderr, "log ring is full !\n"); + // fprintf(stderr, "log ring is full !\n"); log_stats_info[cid].slow = 1; - log_stats_info[cid].slow_begin = rte_get_timer_cycles(); - return -1; + log_stats_info[cid].slow_begin = now; + return EDPVS_OVERLOAD; } - return 0; + return EDPVS_OK; } int dpvs_log(uint32_t level, uint32_t logtype, const char *func, int line, const char *format, ...) { va_list ap; lcoreid_t cid; + uint64_t now; + size_t capa, len = 0, tslen = g_dpvs_log_tslen; char log_buf[DPVS_LOG_MAX_LINE_LEN]; - int len = 0; - int off = g_dpvs_log_time_off; if (level > rte_log_get_global_level()) - return -1; + return EDPVS_INVAL; va_start(ap, format); do { - if (!g_dpvs_log_async_mode || !g_dpvs_log_core || !g_dpvs_log_thread_ready) { + if (!g_dpvs_log_async_mode || !g_dpvs_log_core || !dpvs_log_thread_ready) { rte_vlog(level, logtype, format, ap); break; } @@ -208,30 +226,37 @@ int dpvs_log(uint32_t level, uint32_t logtype, const char *func, int line, const rte_vlog(level, logtype, format, ap); break; } + + now = rte_get_timer_cycles(); cid = rte_lcore_id(); + capa = sizeof(log_buf) - tslen; if (log_stats_info[cid].slow) { /* set log limit rate to 5 sec and keep for 10 mins */ - if (rte_get_timer_cycles() - log_stats_info[cid].slow_begin > LOG_SLOW_INTERNAL_TIME * g_cycles_per_sec) { + if (now - log_stats_info[cid].slow_begin > LOG_SLOW_INTERNAL_TIME * g_cycles_per_sec) { log_stats_info[cid].slow = 0; } - if ((rte_get_timer_cycles() - log_stats_info[cid].log_begin) < log_internal * g_cycles_per_sec) { + if ((now - log_stats_info[cid].log_begin) < log_internal * g_cycles_per_sec) { log_stats_info[cid].missed++; break; } /* just output func and line if log is too fast */ - len = snprintf(log_buf+off, sizeof(log_buf)-off, "%s:%d\n", func, line); - dpvs_async_log(level, logtype, cid, log_buf, len, off); + len = snprintf(log_buf + tslen, capa, "%s:%d\n", func, line); + if (unlikely(len > capa)) + len = capa; // truncated + dpvs_async_log(level, logtype, now, cid, log_buf, len, tslen); break; } - len = vsnprintf(log_buf+off, sizeof(log_buf)-off, format, ap); - dpvs_async_log(level, logtype, cid, log_buf, len, off); - }while(0); + len = vsnprintf(log_buf + tslen, sizeof(log_buf) - tslen, format, ap); + if (unlikely(len > capa)) + len = capa; // truncated + dpvs_async_log(level, logtype, now, cid, log_buf, len, tslen); + } while (0); va_end(ap); - return 0; + return EDPVS_OK; } -static int log_buf_flush(FILE *f) +static void log_buf_flush(FILE *f) { if (f == NULL) { w_buf.buf[w_buf.pos] = '\0'; @@ -241,25 +266,20 @@ static int log_buf_flush(FILE *f) fflush(f); } w_buf.pos = 0; - return 0; } -static int log_buf_timeout_flush(FILE *f, int timeout) +static inline void log_buf_timeout_flush(FILE *f, int timeout, uint64_t now) { - uint64_t now; - - now = rte_get_timer_cycles(); - - if (w_buf.pos && ((now - w_buf.time) >= timeout * g_cycles_per_sec)) { + if (unlikely(w_buf.pos && (now - w_buf.time + >= timeout * g_cycles_per_sec))) { log_buf_flush(f); } - return 0; } -static int log_slave_process(void) +static void log_slave_process(void) { struct dpvs_log *msg_log; - int ret = EDPVS_OK; + uint64_t now = rte_get_timer_cycles(); FILE *f = rte_log_get_stream(); /* dequeue LOG from ring, no lock for ring and w_buf */ @@ -269,22 +289,21 @@ static int log_slave_process(void) } if (!w_buf.pos) { w_buf.level = msg_log->log_level - 1; - w_buf.time = rte_get_timer_cycles(); + w_buf.time = now; } - strncpy(w_buf.buf+w_buf.pos, msg_log->data, msg_log->log_len); + strncpy(w_buf.buf + w_buf.pos, msg_log->data, sizeof(w_buf.buf) - w_buf.pos); w_buf.pos += msg_log->log_len; - log_buf_timeout_flush(f, 5); + log_buf_timeout_flush(f, log_internal, now); dpvs_log_free(msg_log); } - log_buf_timeout_flush(f, 5); - - return ret; + log_buf_timeout_flush(f, log_internal, now); } static void log_slave_loop_func(void) { - g_dpvs_log_thread_ready = 1; - while(1){ + dpvs_log_thread_ready = true; + + while (1) { log_slave_process(); } } @@ -292,8 +311,7 @@ static void log_slave_loop_func(void) static void log_signal_handler(int signum) { if (signum == SIGABRT || signum == SIGSEGV) { - printf("\nSignal %d received, preparing to exit...\n", - signum); + fprintf(stderr, "\nSignal %d received, preparing to exit...\n", signum); } log_slave_process(); log_buf_flush(rte_log_get_stream()); @@ -301,16 +319,14 @@ static void log_signal_handler(int signum) kill(getpid(), signum); } -static int __log_slave_init(void) +int log_slave_init(void) { char ring_name[16]; int lcore_id; - FILE *f = rte_log_get_stream(); char log_pool_name[32]; - if (f != NULL) { - g_dpvs_log_time_off = LOG_SYS_TIME_LEN; - } + if (!g_dpvs_log_async_mode) + return EDPVS_DISABLED; RTE_LCORE_FOREACH_WORKER(lcore_id) { if (rte_eal_get_lcore_state(lcore_id) == FINISHED) { @@ -319,18 +335,23 @@ static int __log_slave_init(void) break; } } - snprintf(ring_name, sizeof(ring_name), "log_ring_%d", g_dpvs_log_core); - log_ring = rte_ring_create(ring_name, DPVS_LOG_RING_SIZE_DEF, - rte_socket_id(), 0/*RING_F_SC_DEQ*/); + if (!rte_lcore_is_enabled(lcore_id)) + return EDPVS_NONEALCORE; + if (lcore_id >= DPVS_MAX_LCORE || g_lcore_role[lcore_id] != LCORE_ROLE_IDLE) + return EDPVS_BUSY; + + snprintf(ring_name, sizeof(ring_name), "log_ring_w%d", g_dpvs_log_core); + log_ring = rte_ring_create(ring_name, DPVS_LOG_RING_SIZE, + SOCKET_ID_ANY , RING_F_SC_DEQ); if (unlikely(NULL == log_ring)) { - fprintf(stderr, "Fail to init log slave core\n"); + fprintf(stderr, "fail to init log slave core\n"); return EDPVS_DPDKAPIFAIL; } /* use memory pool for log msg */ snprintf(log_pool_name, sizeof(log_pool_name), "log_msg_pool"); dp_vs_log_pool = rte_mempool_create(log_pool_name, log_pool_size, - sizeof(struct dpvs_log) + DPVS_LOG_MAX_LINE_LEN, + DPVS_LOG_MAX_LINE_LEN, log_pool_cache, 0, NULL, NULL, NULL, NULL, 0, 0); @@ -345,12 +366,3 @@ static int __log_slave_init(void) return EDPVS_OK; } - -int log_slave_init(void) -{ - if (g_dpvs_log_async_mode) - return __log_slave_init(); - - return EDPVS_OK; -} - diff --git a/src/main.c b/src/main.c index 0efc4728e..2da1a2551 100644 --- a/src/main.c +++ b/src/main.c @@ -38,6 +38,7 @@ #include "ipv4.h" #include "neigh.h" #include "sa_pool.h" +#include "ipset/ipset.h" #include "ipvs/ipvs.h" #include "cfgfile.h" #include "ip_tunnel.h" @@ -101,14 +102,16 @@ static void inline dpdk_version_check(void) sa_pool_init, sa_pool_term), \ DPVS_MODULE(MODULE_IP_TUNNEL, "tunnel", \ ip_tunnel_init, ip_tunnel_term), \ + DPVS_MODULE(MODULE_IPSET, "ipset", \ + ipset_init, ipset_term), \ DPVS_MODULE(MODULE_VS, "ipvs", \ dp_vs_init, dp_vs_term), \ DPVS_MODULE(MODULE_NETIF_CTRL, "netif ctrl", \ netif_ctrl_init, netif_ctrl_term), \ DPVS_MODULE(MODULE_IFTRAF, "iftraf", \ iftraf_init, iftraf_term), \ - DPVS_MODULE(MODULE_LAST, "iftraf", \ - eal_mem_init, eal_mem_term) \ + DPVS_MODULE(MODULE_LAST, "last", \ + NULL, NULL) \ } #define DPVS_MODULE(a, b, c, d) a @@ -325,14 +328,15 @@ int main(int argc, char *argv[]) /* print port-queue-lcore relation */ netif_print_lcore_conf(pql_conf_buf, &pql_conf_buf_len, true, 0); - RTE_LOG(INFO, DPVS, "\nport-queue-lcore relation array: \n%s\n", + RTE_LOG(INFO, DPVS, "port-queue-lcore relation array: \n%s\n", pql_conf_buf); - log_slave_init(); - /* start slave worker threads */ dpvs_lcore_start(0); + /* start async logging worker thread */ + log_slave_init(); + /* write pid file */ if (!pidfile_write(DPVS_PIDFILE, getpid())) goto end; diff --git a/src/mbuf.c b/src/mbuf.c index 444082e2d..7d10d54d3 100644 --- a/src/mbuf.c +++ b/src/mbuf.c @@ -22,6 +22,7 @@ */ #include #include +#include #include "mbuf.h" #include "inet.h" #include "ipv4.h" @@ -49,6 +50,7 @@ void *mbuf_userdata_const(const struct rte_mbuf *mbuf, mbuf_usedata_field_t fiel * * it expands heading mbuf, moving it's tail forward and copying necessary * data from segments part. + * */ int mbuf_may_pull(struct rte_mbuf *mbuf, unsigned int len) { @@ -127,6 +129,7 @@ void mbuf_copy_metadata(struct rte_mbuf *mi, struct rte_mbuf *m) __rte_mbuf_sanity_check(m, 0); } +/* deprecated: replace it with rte_pktmbuf_copy */ struct rte_mbuf *mbuf_copy(struct rte_mbuf *md, struct rte_mempool *mp) { struct rte_mbuf *mc, *mi, **prev; @@ -221,3 +224,119 @@ int mbuf_init(void) return EDPVS_OK; } + +uint16_t mbuf_ether_type(struct rte_mbuf *mbuf) +{ + // FIXME: The ether-type should be retrived from mbuf->packet_type + // in consideration of performance. But the packet_type field of mbuf + // is overwitten by DPVS, which is expected to be fixed in dpvs v1.9. + + uint16_t ethtype; + struct rte_ether_hdr *ehdr; + + ehdr = mbuf_header_l2(mbuf); + if (unlikely(!ehdr)) + return 0; + ethtype = ntohs(ehdr->ether_type); + if (unlikely(ethtype == RTE_ETHER_TYPE_VLAN)) + ethtype = ntohs(*((uint16_t *)(((void *)&ehdr->ether_type) + 4))); + return ethtype; +} + +int mbuf_address_family(struct rte_mbuf *mbuf) +{ + uint16_t etype = mbuf_ether_type(mbuf); + + if (etype == RTE_ETHER_TYPE_IPV4) + return AF_INET; + if (etype == RTE_ETHER_TYPE_IPV6) + return AF_INET6; + return 0; // AF_UNSPEC +} + +uint8_t mbuf_protocol(struct rte_mbuf *mbuf) +{ + // FIXME: The ether-type should be retrived from mbuf->packet_type + // in consideration of performance. But the packet_type field of mbuf + // is overwitten by DPVS, which is expected to be fixed in dpvs v1.9. + + uint32_t ptype; + struct rte_net_hdr_lens hdrlens = { 0 }; + + ptype = rte_net_get_ptype(mbuf, &hdrlens, RTE_PTYPE_L2_MASK + | RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK); + if (!ptype) + return 0; + + switch (ptype & RTE_PTYPE_L4_MASK) { + case RTE_PTYPE_L4_TCP: + return IPPROTO_TCP; + case RTE_PTYPE_L4_UDP: + return IPPROTO_UDP; + case RTE_PTYPE_L4_SCTP: + return IPPROTO_SCTP; + case RTE_PTYPE_L4_ICMP: + return IPPROTO_ICMP; +#if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0) + case RTE_PTYPE_L4_IGMP: + return IPPROTO_IGMP; +#endif + default: + return 0; + } + + return 0; +} + +void *mbuf_header_l2(struct rte_mbuf *mbuf) +{ + uint32_t ptype; + struct rte_net_hdr_lens hdrlens = { 0 }; + + if (unlikely(!mbuf->l2_len)) { + ptype = rte_net_get_ptype(mbuf, &hdrlens, RTE_PTYPE_L2_MASK); + if (!ptype) + return NULL; + mbuf->l2_len = hdrlens.l2_len; + } + + return rte_pktmbuf_mtod(mbuf, void *); +} + +void *mbuf_header_l3(struct rte_mbuf *mbuf) +{ + uint32_t ptype; + struct rte_net_hdr_lens hdrlens = { 0 }; + + if (unlikely(!mbuf->l3_len || !mbuf->l2_len)) { + ptype = rte_net_get_ptype(mbuf, &hdrlens, + RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK); + if (!ptype) + return NULL; + assert(!mbuf->l2_len || mbuf->l2_len == hdrlens.l2_len); + mbuf->l2_len = hdrlens.l2_len; + mbuf->l3_len = hdrlens.l3_len; + } + + return rte_pktmbuf_mtod_offset(mbuf, void *, mbuf->l2_len); +} + +void *mbuf_header_l4(struct rte_mbuf *mbuf) +{ + uint32_t ptype; + struct rte_net_hdr_lens hdrlens = { 0 }; + + if (unlikely(!mbuf->l4_len || !mbuf->l3_len || !mbuf->l2_len)) { + ptype = rte_net_get_ptype(mbuf, &hdrlens, RTE_PTYPE_L2_MASK + | RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK); + if (!ptype) + return NULL; + assert((!mbuf->l2_len || mbuf->l2_len == hdrlens.l2_len) + && (!mbuf->l3_len || mbuf->l3_len == hdrlens.l3_len)); + mbuf->l2_len = hdrlens.l2_len; + mbuf->l3_len = hdrlens.l3_len; + mbuf->l4_len = hdrlens.l4_len; + } + + return rte_pktmbuf_mtod_offset(mbuf, void *, mbuf->l2_len + mbuf->l3_len); +} diff --git a/src/neigh.c b/src/neigh.c index 80341b763..c26e9e2ca 100644 --- a/src/neigh.c +++ b/src/neigh.c @@ -85,7 +85,7 @@ static struct nud_state nud_states[] = { /* params from config file */ static int arp_unres_qlen = NEIGH_ENTRY_BUFF_SIZE_DEF; -static struct rte_ring *neigh_ring[DPVS_MAX_LCORE]; +static struct rte_ring *neigh_ring[DPVS_MAX_LCORE] = {NULL, }; static void unres_qlen_handler(vector_t tokens) { @@ -751,6 +751,8 @@ static int neigh_ring_init(void) uint8_t cid; socket_id = rte_socket_id(); for (cid = 0; cid < DPVS_MAX_LCORE; cid++) { + if (false == netif_lcore_is_fwd_worker(cid)) + continue; snprintf(name_buf, RTE_RING_NAMESIZE, "neigh_ring_c%d", cid); neigh_ring[cid] = rte_ring_create(name_buf, MAC_RING_SIZE, socket_id, RING_F_SC_DEQ); @@ -812,6 +814,8 @@ static void neigh_process_ring(void *arg) struct raw_neigh *param; lcoreid_t cid = rte_lcore_id(); + if (unlikely(NULL == neigh_ring[cid])) + return; nb_rb = rte_ring_dequeue_burst(neigh_ring[cid], (void **)params, NETIF_MAX_PKT_BURST, NULL); if (nb_rb > 0) { @@ -1014,7 +1018,7 @@ int neigh_sync_core(const void *param, bool add_del, enum param_kind kind) cid = rte_lcore_id(); for (i = 0; i < DPVS_MAX_LCORE; i++) { - if ((i == cid) || (!is_lcore_id_valid(i)) || (i == master_cid)) + if ((i == cid) || (!is_lcore_id_valid(i)) || (i == master_cid) || (NULL == neigh_ring[i])) continue; switch (kind) { case NEIGH_ENTRY: diff --git a/src/netif.c b/src/netif.c index 80d9babdd..44aba9c8c 100644 --- a/src/netif.c +++ b/src/netif.c @@ -71,6 +71,8 @@ int netif_pktpool_mbuf_cache = NETIF_PKTPOOL_MBUF_CACHE_DEF; #define ARP_RING_SIZE 2048 +#define RETA_CONF_SIZE (ETH_RSS_RETA_SIZE_512 / RTE_RETA_GROUP_SIZE) + /* physical nic id = phy_pid_base + index */ static portid_t phy_pid_base = 0; static portid_t phy_pid_end = -1; // not inclusive @@ -990,7 +992,7 @@ static inline int is_ipv4_pkt_valid(struct rte_ipv4_hdr *iph, uint32_t link_len) return EDPVS_OK; } -static void parse_ipv4_hdr(struct rte_mbuf *mbuf, uint16_t port, uint16_t queue) +__rte_unused static void parse_ipv4_hdr(struct rte_mbuf *mbuf, uint16_t port, uint16_t queue) { char saddr[16], daddr[16]; uint16_t lcore; @@ -1208,11 +1210,11 @@ static void config_lcores(struct list_head *worker_list) port->id, queue->rx_queues[ii], queue->isol_rxq_ring_sz, &lcore_conf[id].pqs[tk].rxqs[ii]) < 0) { - RTE_LOG(ERR, NETIF, "[%s]: isol_rxq add failed for cpu%d:%s:" + RTE_LOG(ERR, NETIF, "%s: isol_rxq add failed for cpu%d:%s:" "rx%d, recieving locally instead.\n", __func__, worker_min->cpu_id, port->name, queue->rx_queues[ii]); } else { - RTE_LOG(INFO, NETIF, "[%s]: isol_rxq on cpu%d with ring size %d is " + RTE_LOG(INFO, NETIF, "%s: isol_rxq on cpu%d with ring size %d is " "added for cpu%d:%s:rx%d\n", __func__, queue->isol_rxq_lcore_ids[ii], queue->isol_rxq_ring_sz, worker_min->cpu_id, port->name, queue->rx_queues[ii]); @@ -1239,25 +1241,30 @@ static void config_lcores(struct list_head *worker_list) lcoreid_t lcore2index[DPVS_MAX_LCORE+1]; portid_t port2index[DPVS_MAX_LCORE][NETIF_MAX_PORTS]; -static void lcore_index_init(void) +static int lcore_index_init(void) { - lcoreid_t ii; - int tk = 0; - for (ii = 0; ii <= DPVS_MAX_LCORE; ii++) { - if (rte_lcore_is_enabled(ii)) { - if (likely(tk)) - lcore2index[ii] = tk - 1; - else - lcore2index[ii] = DPVS_MAX_LCORE; - tk++; - } else - lcore2index[ii] = DPVS_MAX_LCORE; + lcoreid_t cid; + int i; + + for (i = 0; i <= DPVS_MAX_LCORE; i++) + lcore2index[i] = DPVS_MAX_LCORE; + + for (i = 0; lcore_conf[i].nports > 0; i++) { + cid = lcore_conf[i].id; + if (!rte_lcore_is_enabled(cid)) + return EDPVS_NONEALCORE; + lcore2index[cid] = i; } + #ifdef CONFIG_DPVS_NETIF_DEBUG printf("lcore fast searching table: \n"); - for (ii = 0; ii <= DPVS_MAX_LCORE; ii++) - printf("lcore2index[%d] = %d\n", ii, lcore2index[ii]); + for (i = 0; i <= DPVS_MAX_LCORE; i++) { + if (lcore2index[i] != DPVS_MAX_LCORE) + printf("\tcid: %2d --> %2d\n", i, lcore2index[i]); + } #endif + + return EDPVS_OK; } static void port_index_init(void) @@ -1280,9 +1287,10 @@ static void port_index_init(void) #ifdef CONFIG_DPVS_NETIF_DEBUG printf("port fast searching table(port2index[cid][pid]): \n"); for (ii = 0; ii < DPVS_MAX_LCORE; ii++) { - for (jj = 0; jj < NETIF_MAX_PORTS; jj++) - printf("%d-%d:%d ", ii, jj, port2index[ii][jj]); - printf("\n"); + for (jj = 0; jj < NETIF_MAX_PORTS; jj++) { + if (port2index[ii][jj] != NETIF_PORT_ID_INVALID) + printf("\tcid: %2d, pid: %2d --> index: %2d\n", ii, jj, port2index[ii][jj]); + } } #endif } @@ -1345,6 +1353,32 @@ static void build_lcore_index(void) g_lcore_num = idx; } +static inline void dump_lcore_role(void) +{ + dpvs_lcore_role_t role; + lcoreid_t cid; + char bufs[LCORE_ROLE_MAX+1][1024]; + char results[sizeof bufs]; + + for (role = 0; role < LCORE_ROLE_MAX; role++) + snprintf(bufs[role], sizeof(bufs[role]), "\t%s: ", + dpvs_lcore_role_str(role)); + + for (cid = 0; cid < DPVS_MAX_LCORE; cid++) { + role = g_lcore_role[cid]; + snprintf(&bufs[role][strlen(bufs[role])], sizeof(bufs[role]) + - strlen(bufs[role]), "%-4d", cid); + } + + snprintf(results, sizeof(results), "%s", bufs[0]); + for (role = 1; role < LCORE_ROLE_MAX; role++) { + strncat(results, "\n", sizeof(results) - strlen(results) - 1); + strncat(results, bufs[role], sizeof(results) - strlen(results) - 1); + } + + RTE_LOG(INFO, NETIF, "LCORE ROLES:\n%s\n", results); +} + static void lcore_role_init(void) { int i, cid; @@ -1375,10 +1409,7 @@ static void lcore_role_init(void) } build_lcore_index(); - - for (cid = 0; cid < DPVS_MAX_LCORE; cid++) - RTE_LOG(INFO, NETIF, "[%02d]: %s\n", - cid, dpvs_lcore_role_str(g_lcore_role[cid])); + dump_lcore_role(); } static inline void netif_copy_lcore_stats(struct netif_lcore_stats *stats) @@ -1419,6 +1450,37 @@ static int port_tx_queues_get(portid_t pid) return tx_ports; } +/* + * params: + * @pid: [in] port id + * @qids: [out] queue id array containing rss queues when return + * @n_queues: [in,out], `qids` array length when input, rss queue number when return + */ +static int get_configured_rss_queues(portid_t pid, queueid_t *qids, int *n_queues) +{ + int i, j, k, tk = 0; + if (!qids || !n_queues || *n_queues < NETIF_MAX_QUEUES) + return EDPVS_INVAL; + + for (i = 0; lcore_conf[i].nports > 0; i++) { + if (lcore_conf[i].type != LCORE_ROLE_FWD_WORKER) + continue; + for (j = 0; j < lcore_conf[i].nports; j++) { + if (lcore_conf[i].pqs[j].id == pid) + break; + } + if (lcore_conf[i].pqs[j].id != pid) + return EDPVS_INVAL; + for (k = 0; k < lcore_conf[i].pqs[j].nrxq; k++) { + qids[tk++] = lcore_conf[i].pqs[j].txqs[k].id; + if (tk > *n_queues) + return EDPVS_NOMEM; + } + } + *n_queues = tk; + return EDPVS_OK; +} + static uint8_t get_configured_port_nb(int lcores, const struct netif_lcore_conf *lcore_conf) { int i = 0, j, k; @@ -2264,8 +2326,6 @@ int netif_rcv(struct netif_port *dev, __be16 eth_type, struct rte_mbuf *mbuf) if (!pt) return EDPVS_KNICONTINUE; - mbuf->l2_len = 0; /* make sense ? */ - return pt->func(mbuf, dev); } @@ -2449,6 +2509,8 @@ void lcore_process_packets(struct rte_mbuf **mbufs, lcoreid_t cid, uint16_t coun mbuf->port = dev->id; } + mbuf->tx_offload = 0; /* reset l2_len, l3_len, l4_len, ... */ + if (t < count) { rte_prefetch0(rte_pktmbuf_mtod(mbufs[t], void *)); t++; @@ -2585,29 +2647,34 @@ static struct dpvs_lcore_job_array netif_jobs[NETIF_JOB_MAX] = { static void netif_lcore_init(void) { - int i, res; + int i, err; lcoreid_t cid; + char buf1[1024], buf2[1024]; timer_sched_interval_us = dpvs_timer_sched_interval_get(); + buf1[0] = buf2[0] = '\0'; for (cid = 0; cid < DPVS_MAX_LCORE; cid++) { if (rte_lcore_is_enabled(cid)) - RTE_LOG(INFO, NETIF, "%s: lcore%d is enabled\n", __func__, cid); + snprintf(&buf1[strlen(buf1)], sizeof(buf1)-strlen(buf1), "%4d", cid); else - RTE_LOG(INFO, NETIF, "%s: lcore%d is disabled\n", __func__, cid); + snprintf(&buf2[strlen(buf2)], sizeof(buf2)-strlen(buf2), "%4d", cid); } - - /* build lcore fast searching table */ - lcore_index_init(); + RTE_LOG(INFO, NETIF, "LCORE STATUS\n\tenabled: %s\n\tdisabled: %s\n", buf1, buf2); /* init isolate rxqueue table */ isol_rxq_init(); /* check and set lcore config */ config_lcores(&worker_list); - if ((res = check_lcore_conf(rte_lcore_count(), lcore_conf)) != EDPVS_OK) - rte_exit(EXIT_FAILURE, "[%s] bad lcore configuration (err=%d)," - " exit ...\n", __func__, res); + if ((err = check_lcore_conf(rte_lcore_count(), lcore_conf)) != EDPVS_OK) + rte_exit(EXIT_FAILURE, "%s: bad lcore configuration (error code: %d)," + " exit ...\n", __func__, err); + + /* build lcore fast searching table */ + if ((err = lcore_index_init()) != EDPVS_OK) + rte_exit(EXIT_FAILURE, "%s: lcore_index_init failed (cause: %s), exit ...\n", + __func__, dpvs_strerror(err)); /* build port fast searching table */ port_index_init(); @@ -2627,9 +2694,9 @@ static void netif_lcore_init(void) } for (i = 0; i < NELEMS(netif_jobs); i++) { - res = dpvs_lcore_job_register(&netif_jobs[i].job, netif_jobs[i].role); - if (res < 0) { - rte_exit(EXIT_FAILURE, "%s: fail to register lcore job '%s', exiting ...\n", + err = dpvs_lcore_job_register(&netif_jobs[i].job, netif_jobs[i].role); + if (err < 0) { + rte_exit(EXIT_FAILURE, "%s: fail to register lcore job '%s', exit ...\n", __func__, netif_jobs[i].job.name); break; } @@ -2742,7 +2809,7 @@ static void kni_ingress_process(void) { struct rte_mbuf *mbufs[NETIF_MAX_PKT_BURST]; struct netif_port *dev; - uint16_t i, nb_rb, pkt_num; + uint16_t i, pkt_total, pkt_sent; portid_t id; lcoreid_t cid = rte_lcore_id(); @@ -2751,25 +2818,152 @@ static void kni_ingress_process(void) if (!dev || !kni_dev_exist(dev)) continue; - nb_rb = rte_ring_dequeue_burst(dev->kni.rx_ring, (void**)mbufs, + pkt_total = rte_ring_dequeue_burst(dev->kni.rx_ring, (void**)mbufs, NETIF_MAX_PKT_BURST, NULL); - if (nb_rb == 0) + if (pkt_total == 0) continue; - lcore_stats[cid].ipackets += nb_rb; - for (i = 0; i < nb_rb; i++) + lcore_stats[cid].ipackets += pkt_total; + for (i = 0; i < pkt_total; i++) lcore_stats[cid].ibytes += mbufs[i]->pkt_len; - pkt_num = rte_kni_tx_burst(dev->kni.kni, mbufs, nb_rb); + pkt_sent = rte_kni_tx_burst(dev->kni.kni, mbufs, pkt_total); - if (unlikely(pkt_num < nb_rb)) { + if (unlikely(pkt_sent < pkt_total)) { #ifdef CONFIG_DPVS_NETIF_DEBUG - RTE_LOG(INFO, NETIF, - "%s: fail to send to kni inteface %d pkts from kni rx_ring\n", - __func__, nb_rb - pkt_num); + RTE_LOG(INFO, NETIF, "%s: sent %d packets to kni %s, loss %.2f%%\n", + __func__, pkt_total, dev->kni.name, + (pkt_total-pkt_sent)*100.0/pkt_total); #endif - free_mbufs(&(mbufs[pkt_num]), nb_rb - pkt_num); - lcore_stats[cid].dropped += (nb_rb - pkt_num); + free_mbufs(&(mbufs[pkt_sent]), pkt_total - pkt_sent); + lcore_stats[cid].dropped += (pkt_total - pkt_sent); + } + pkt_total = 0; + } +} + +/* + * note: kni_ingress_flow_xmit_vlan_trunk supports both vlan trunk and vlan access + */ +static inline void kni_ingress_flow_xmit_vlan_trunk(struct netif_port *dev, + lcoreid_t cid, struct netif_queue_conf *qconf) +{ + int i, left, right, len; + unsigned pkt_total, pkt_sent; + struct rte_ether_hdr *eh; + struct rte_mbuf *mbuf; + struct netif_port *rdev; + + for (i = 0; i < qconf->len; i++) { + // the received packets may from multiple vlans, + // we have to process them one by one + mbuf = qconf->mbufs[i]; + eh = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); + if (eh->ether_type == htons(ETH_P_8021Q) || + mbuf->ol_flags & PKT_RX_VLAN_STRIPPED) { + vlan_rcv(mbuf, dev); + } else if (dev->type == PORT_TYPE_BOND_MASTER) { + mbuf->port = dev->id; + } + /* + * The performance is astonishingly poor (as observed only about 3K pps, + * dozens multiple less than normal) if xmit packets to kni one at a time! + * + if (rte_kni_tx_burst(dev->kni.kni, qconf->mbufs, 1) != 1) { + rte_pktmbuf_free(qconf->mbufs[i]); + lcore_stats[cid].dropped++; + } + */ + } + + // reorder mbufs and xmit to kni in batch for each vlan + len = qconf->len; + while (len > 0) { + left = 0; + right = len - 1; + mbuf = qconf->mbufs[right]; + while (left < right) { + while (left < right && qconf->mbufs[left]->port != mbuf->port) + left++; + qconf->mbufs[right] = qconf->mbufs[left]; + while (left < right && qconf->mbufs[right]->port == mbuf->port) + right--; + qconf->mbufs[left] = qconf->mbufs[right]; + } + qconf->mbufs[right] = mbuf; + + rdev = netif_port_get(mbuf->port); + if (unlikely(!rdev || !kni_dev_exist(rdev))) + rdev = dev; + pkt_total = len - right; + //pkt_sent = rte_kni_tx_burst(rdev->kni.kni, &qconf->mbufs[right], pkt_total); + pkt_sent = rte_ring_enqueue_bulk(rdev->kni.rx_ring, + (void *const *)&qconf->mbufs[right], pkt_total, NULL); + if (unlikely(pkt_sent < pkt_total)) { +#ifdef CONFIG_DPVS_NETIF_DEBUG + RTE_LOG(INFO, NETIF, "%s: enqueue %d packets to kni %s, loss %.2f%%\n", + __func__, pkt_total, rdev->kni.name, + (pkt_total-pkt_sent)*100.0/pkt_total); +#endif + free_mbufs(&(qconf->mbufs[right+pkt_sent]), pkt_total - pkt_sent); + lcore_stats[cid].dropped += pkt_total - pkt_sent; + } + len = right; + } + + qconf->len = 0; +} + +static inline void kni_ingress_flow_xmit_vlan_access(struct netif_port *dev, + lcoreid_t cid, struct netif_queue_conf *qconf) +{ + unsigned pkt_sent; + + // pkt_sent = rte_kni_tx_burst(dev->kni.kni, qconf->mbufs, qconf->len); + pkt_sent = rte_ring_enqueue_bulk(dev->kni.rx_ring, (void *const *)qconf->mbufs, qconf->len, NULL); + + if (unlikely(pkt_sent < qconf->len)) { +#ifdef CONFIG_DPVS_NETIF_DEBUG + RTE_LOG(INFO, NETIF, "%s: enqueue %d packets to rx_ring of kni %s, loss %.2f%%\n", + __func__, qconf->len, dev->kni.name, + (qconf->len-pkt_sent)*100.0/qconf->len); +#endif + free_mbufs(&(qconf->mbufs[pkt_sent]), qconf->len - pkt_sent); + lcore_stats[cid].dropped += qconf->len - pkt_sent; + } + + qconf->len = 0; +} + +/* + * Receive packets matched kni ip addresses with rte_flow from KNI worker + */ +static void kni_ingress_flow_process(void) +{ + int i, j; + portid_t pid; + lcoreid_t cid; + struct netif_port *dev; + struct netif_queue_conf *qconf; + + cid = rte_lcore_id(); // kni worker + assert(LCORE_ID_ANY != cid); + + for (i = 0; i < lcore_conf[lcore2index[cid]].nports; i++) { + pid = lcore_conf[lcore2index[cid]].pqs[i].id; + assert(pid <= bond_pid_end); + dev = netif_port_get(pid); + if (!dev || !kni_dev_exist(dev)) + continue; + for (j = 0; j < lcore_conf[lcore2index[cid]].pqs[i].nrxq; j++) { + qconf = &lcore_conf[lcore2index[cid]].pqs[i].rxqs[j]; + qconf->len = netif_rx_burst(pid, qconf); + lcore_stats_burst(&lcore_stats[cid], qconf->len); + if (!qconf->len) + continue; + if (dev->vlan_info) + kni_ingress_flow_xmit_vlan_trunk(dev, cid, qconf); + else + kni_ingress_flow_xmit_vlan_access(dev, cid, qconf); } - nb_rb = 0; } } @@ -2778,16 +2972,128 @@ static void kni_ingress_process(void) */ void kni_lcore_loop(void *dummy) { - kni_ingress_process(); - kni_egress_process(); - /* This is a lazy solution. - * `lcore_job_xmit` can be scheduled with an independent job on kni worker instead. */ - if (g_kni_lcore_id != 0) + * It's better to schedule the tasks with an independent job on kni worker instead. */ + if (g_kni_lcore_id != 0) { + kni_ingress_flow_process(); lcore_job_xmit(NULL); + } + + kni_ingress_process(); + kni_egress_process(); } /********************************************* port *************************************************/ + +static void netif_dump_rss_reta(struct netif_port *port) +{ + int i, len, pos; + uint32_t reta_id, reta_pos; + char buf[ETH_RSS_RETA_SIZE_512 * 8]; + struct rte_eth_rss_reta_entry64 reta_info[RETA_CONF_SIZE]; + + if (port->type != PORT_TYPE_GENERAL && port->type != PORT_TYPE_BOND_SLAVE) + return; + + if (unlikely(port->dev_info.reta_size == 0)) + if (unlikely(rte_eth_dev_info_get(port->id, &port->dev_info))) + return; + + memset(reta_info, 0, sizeof(reta_info)); + for (i = 0; i < port->dev_info.reta_size; i++) + reta_info[i / RTE_RETA_GROUP_SIZE].mask = UINT64_MAX; + + if (unlikely(rte_eth_dev_rss_reta_query(port->id, reta_info, + port->dev_info.reta_size))) + return; + + buf[0] = '\0'; + len = pos = 0; + for (i = 0; i < port->dev_info.reta_size; i++) { + reta_id = i / RTE_RETA_GROUP_SIZE; + reta_pos = i % RTE_RETA_GROUP_SIZE; + if (i % 8 == 0) { + len = snprintf(&buf[pos], sizeof(buf) - pos, "\n%4d: ", i); + if (len >= sizeof(buf) - pos) { + snprintf(&buf[sizeof(buf)-16], 16, "%s", "(truncated)"); + break; + } + pos += len; + } + len = snprintf(&buf[pos], sizeof(buf)-pos, "%-4d", reta_info[reta_id].reta[reta_pos]); + if (len >= sizeof(buf) - pos) { + snprintf(&buf[sizeof(buf)-16], 16, "%s", "(truncated)"); + break; + } + pos += len; + } + + RTE_LOG(INFO, NETIF, "RSS RETA(%s):%s\n", port->name, buf); +} + +static int __netif_update_rss_reta(struct netif_port *port) +{ + int i, err; + int nrssq = NETIF_MAX_QUEUES; + queueid_t rssq[NETIF_MAX_QUEUES]; + uint32_t reta_id, reta_pos; + struct rte_eth_rss_reta_entry64 reta_conf[RETA_CONF_SIZE]; + + if (port->type != PORT_TYPE_GENERAL && port->type != PORT_TYPE_BOND_SLAVE) + return EDPVS_NOTSUPP; + + if (port->type == PORT_TYPE_BOND_SLAVE) + err = get_configured_rss_queues(port->bond->slave.master->id, rssq, &nrssq); + else + err = get_configured_rss_queues(port->id, rssq, &nrssq); + if (err != EDPVS_OK) + return err; +#ifdef CONFIG_DPVS_NETIF_DEBUG + printf("RSS QUEUES(%s): ", port->name); + for (i = 0; i < nrssq; i++) { + printf("%-4d", rssq[i]); + } + printf("\n"); +#endif + + memset(reta_conf, 0, sizeof(reta_conf)); + for (i = 0; i < port->dev_info.reta_size; i++) { + reta_id = i / RTE_RETA_GROUP_SIZE; + reta_pos = i % RTE_RETA_GROUP_SIZE; + reta_conf[reta_id].mask = UINT64_MAX; + reta_conf[reta_id].reta[reta_pos] = (uint16_t)(rssq[i % nrssq]); + } + + if (rte_eth_dev_rss_reta_update(port->id, reta_conf, port->dev_info.reta_size)) + return EDPVS_DPDKAPIFAIL; + + netif_dump_rss_reta(port); + return EDPVS_OK; +} + +static int netif_update_rss_reta(struct netif_port *port) +{ + switch (port->type) { + case PORT_TYPE_GENERAL: + return __netif_update_rss_reta(port); + case PORT_TYPE_BOND_MASTER: + { + // notes: + // rss reta of bonding slaves must be configured after bonding devices bootup, + // or it would be reset when bonding device bootup. + int i, err; + for (i = 0; i < port->bond->master.slave_nb; i++) { + err = __netif_update_rss_reta(port->bond->master.slaves[i]); + if (err != EDPVS_OK) + return err; + } + return EDPVS_OK; + } + default: + return EDPVS_OK; + } +} + static inline int port_tab_hashkey(portid_t id) { return id & NETIF_PORT_TABLE_MASK; @@ -3376,14 +3682,14 @@ static void fill_port_config(struct netif_port *port, char *promisc_on) port->mtu = cfg_stream->mtu; if (cfg_stream->rx_queue_nb > 0 && port->nrxq > cfg_stream->rx_queue_nb) { - RTE_LOG(WARNING, NETIF, "%s: rx-queues(%d) configured in workers != " - "rx-queues(%d) configured in device, setup %d rx-queues for %s\n", + RTE_LOG(WARNING, NETIF, "%s: rx-queues configured in workers (%d) != " + "rx-queues configured in device (%d), setup %d rx-queues for %s\n", port->name, port->nrxq, cfg_stream->rx_queue_nb, port->nrxq, port->name); } if (cfg_stream->tx_queue_nb > 0 && port->ntxq > cfg_stream->tx_queue_nb) { - RTE_LOG(WARNING, NETIF, "%s: tx-queues(%d) configured in workers != " - "tx-queues(%d) configured in device, setup %d tx-queues for %s\n", + RTE_LOG(WARNING, NETIF, "%s: tx-queues configured in workers (%d) != " + "tx-queues configured in device (%d), setup %d tx-queues for %s\n", port->name, port->ntxq, cfg_stream->tx_queue_nb, port->ntxq, port->name); } @@ -3643,6 +3949,11 @@ int netif_port_start(struct netif_port *port) } } + /* update rss reta */ + if ((ret = netif_update_rss_reta(port)) != EDPVS_OK) + RTE_LOG(WARNING, NETIF, "%s: %s update rss reta failed (cause: %s)\n", + __func__, port->name, dpvs_strerror(ret)); + return EDPVS_OK; } @@ -3904,7 +4215,7 @@ static char *find_conf_kni_name(portid_t id) } /* Allocate and register all DPDK ports available */ -inline static void netif_port_init(void) +static void netif_port_init(void) { int nports, nports_cfg; portid_t pid; @@ -3982,7 +4293,7 @@ static int obtain_dpdk_bond_name(char *dst, const char *ori, size_t size) * DPDK need bonding device name start with "net_bonding" * to match the driver. */ - snprintf(dst, size, "net_bonding%u\n", num); + snprintf(dst, size, "net_bonding%u", num); return EDPVS_OK; } @@ -4095,8 +4406,8 @@ int netif_init(void) int netif_term(void) { - netif_cfgfile_term(); netif_lcore_cleanup(); + netif_cfgfile_term(); return EDPVS_OK; } @@ -5030,10 +5341,13 @@ int netif_ctrl_init(void) { int err; + if ((err = lcore_stats_msg_init()) != EDPVS_OK) + return err; + if ((err = sockopt_register(&netif_sockopt)) != EDPVS_OK) return err; - if ((err = lcore_stats_msg_init()) != EDPVS_OK) + if ((err = kni_ctrl_init()) != EDPVS_OK) return err; return EDPVS_OK; @@ -5043,6 +5357,9 @@ int netif_ctrl_term(void) { int err; + if ((err = kni_ctrl_term()) != EDPVS_OK) + return err; + if ((err = sockopt_unregister(&netif_sockopt)) != EDPVS_OK) return err; diff --git a/src/netif_flow.c b/src/netif_flow.c index ed4da0874..0d00da98c 100644 --- a/src/netif_flow.c +++ b/src/netif_flow.c @@ -29,6 +29,10 @@ #define SAPOOL_PATTERN_NUM 4 /* sapool action stack: QUEUE | END */ #define SAPOOL_ACTION_NUM 2 +/* kni flow pattern stack: ETH | IP | END */ +#define KNI_PATTERN_NUM 3 +/* kni flow action stack: QUEUE | END */ +#define KNI_ACTION_NUM 2 /* dpvs use only one flow group */ #define NETIF_FLOW_GROUP 0 @@ -37,6 +41,7 @@ * The enum value matters. Lower value denotes higher priority. */ typedef enum { NETIF_FLOW_PRIO_SAPOOL = 1, // sapool flow rules + NETIF_FLOW_PRIO_KNI = 2, // kni ip address flow rules NETIF_FLOW_PRIO_TUNNEL, // TODO, gre tunnel flow rules // more ... } netif_flow_type_prio_t; @@ -200,7 +205,7 @@ static inline int __netif_flow_flush(struct netif_port *dev) return EDPVS_INVAL; if (rte_flow_flush(dev->id, &flow_error)) { - RTE_LOG(WARNING, FLOW, "rte_flow_flush on %s failed -- %d, %s, %s\n", + RTE_LOG(WARNING, FLOW, "rte_flow_flush on %s failed -- %d, %p, %s\n", dev->name, flow_error.type, flow_error.cause, flow_error.message); return EDPVS_DPDKAPIFAIL; } @@ -407,3 +412,118 @@ int netif_sapool_flow_del(struct netif_port *dev, lcoreid_t cid, return ret; } + +/* + * Set kni flow rules. + * + * Ether | IPv4/IPv6 | END + */ +int netif_kni_flow_add(struct netif_port *dev, lcoreid_t cid, + int af, const union inet_addr *addr, + netif_flow_handler_param_t *flows) +{ + int err; + char ipbuf[64]; + struct rte_flow_attr attr = { + .group = NETIF_FLOW_GROUP, + .priority = NETIF_FLOW_PRIO_KNI, + .ingress = 1, + .egress = 0, + //.transfer = 0, + }; + struct rte_flow_item pattern[KNI_PATTERN_NUM]; + struct rte_flow_action action[KNI_ACTION_NUM]; + netif_flow_handler_param_t resp; + + struct rte_flow_item_ipv4 ip_spec, ip_mask; + struct rte_flow_item_ipv6 ip6_spec, ip6_mask; + + queueid_t queue_id; + struct rte_flow_action_queue queue; + + if (unlikely(!dev || !addr || !flows)) + return EDPVS_INVAL; + if (unlikely(flows->size < 2 || !flows->handlers)) + return EDPVS_INVAL; + + memset(pattern, 0, sizeof(pattern)); + memset(action, 0, sizeof(action)); + + /* create action stack */ + err = netif_get_queue(dev, cid, &queue_id); + if (unlikely(err != EDPVS_OK)) + return err; + queue.index = queue_id; + action[0].type = RTE_FLOW_ACTION_TYPE_QUEUE; + action[0].conf = &queue; + action[1].type = RTE_FLOW_ACTION_TYPE_END; + + /* create pattern stack */ + pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH; + + if (af == AF_INET) { + memset(&ip_spec, 0, sizeof(struct rte_flow_item_ipv4)); + memset(&ip_mask, 0, sizeof(struct rte_flow_item_ipv4)); + ip_spec.hdr.dst_addr = addr->in.s_addr; + ip_mask.hdr.dst_addr = htonl(0xffffffff); + pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV4; + pattern[1].spec = &ip_spec; + pattern[1].mask = &ip_mask; + } else if (af == AF_INET6) { + memset(&ip6_spec, 0, sizeof(struct rte_flow_item_ipv6)); + memset(&ip6_mask, 0, sizeof(struct rte_flow_item_ipv6)); + memcpy(&ip6_spec.hdr.dst_addr, &addr->in6, sizeof(ip6_spec.hdr.dst_addr)); + memset(&ip6_mask.hdr.dst_addr, 0xff, sizeof(ip6_mask.hdr.dst_addr)); + pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV6; + pattern[1].spec = &ip6_spec; + pattern[1].mask = &ip6_mask; + } else { + return EDPVS_INVAL; + } + pattern[2].type = RTE_FLOW_ITEM_TYPE_END; + + /* set kni flow */ + resp.size = flows->size; + resp.flow_num = 0; + resp.handlers = &flows->handlers[0]; + err = netif_flow_create(dev, &attr, pattern, action, &resp); + if (err) { + RTE_LOG(ERR, FLOW, "%s: adding kni flow failed: %s ip %s queue %d lcore %2d" + " (cause: %s)\n", __func__, dev->name, inet_ntop(af, addr, ipbuf, + sizeof(ipbuf)) ? : "::", queue_id, cid, dpvs_strerror(err)); + return EDPVS_RESOURCE; + } + + flows->flow_num = resp.flow_num; + RTE_LOG(INFO, FLOW, "%s: adding kni flow succeed: %s ip %s queue %d lcore %2d\n", + __func__, dev->name, inet_ntop(af, addr, ipbuf, sizeof(ipbuf)) ? : "::", + queue_id, cid); + + return EDPVS_OK; +} + +/* + * Delete kni flow rules. + * + * Ether | IPv4/IPv6 | END + */ +int netif_kni_flow_del(struct netif_port *dev, lcoreid_t cid, + int af, const union inet_addr *addr, + netif_flow_handler_param_t *flows) +{ + int err; + char ipbuf[64]; + + err = netif_flow_destroy(flows); + if (err != EDPVS_OK) { + RTE_LOG(ERR, FLOW, "%s: deleting kni flow failed: %s ip %s (cause: %s)\n", + __func__, dev->name, inet_ntop(af, addr, ipbuf, sizeof(ipbuf)) + ? : "::", dpvs_strerror(err)); + return EDPVS_RESOURCE; + } + + flows->flow_num = 0; + RTE_LOG(INFO, FLOW, "%s: deleting kni flow succeed: %s ip %s\n", __func__, + dev->name, inet_ntop(af, addr, ipbuf, sizeof(ipbuf)) ? : "::"); + return EDPVS_OK; +} diff --git a/src/tc/cls_ipset.c b/src/tc/cls_ipset.c new file mode 100644 index 000000000..7f49d0669 --- /dev/null +++ b/src/tc/cls_ipset.c @@ -0,0 +1,119 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* + * "ipset" classifier for traffic control module. + */ + +#include "conf/tc.h" +#include "tc/cls.h" +#include "ipset/ipset.h" + +struct ipset_cls_priv { + struct tc_cls *cls; + struct ipset *set; + bool dst_match; + + struct tc_cls_result result; +}; + +static inline int pkttype2family(uint16_t pkt_type) +{ + switch (pkt_type) { + case ETH_P_IP: + return AF_INET; + case ETH_P_IPV6: + return AF_INET6; + } + return AF_UNSPEC; +} + +static int cls_ipset_classify(struct tc_cls *cls, + struct rte_mbuf *mbuf, struct tc_cls_result *result) +{ + struct ipset_cls_priv *priv = tc_cls_priv(cls); + + if (pkttype2family(ntohs(cls->pkt_type)) != priv->set->family) + return TC_ACT_RECLASSIFY; + + if (elem_in_set(priv->set, mbuf, priv->dst_match)) { + // matched + *result = priv->result; + return TC_ACT_OK; + } + + // missed + return TC_ACT_RECLASSIFY; +} + +static int cls_ipset_init(struct tc_cls *cls, const void *arg) +{ + struct ipset_cls_priv *priv = tc_cls_priv(cls); + const struct tc_cls_ipset_copt *copt = arg; + + if (!arg) + return EDPVS_INVAL; + + priv->cls = cls; + priv->dst_match = copt->dst_match; + priv->set = ipset_get(copt->setname); + if (unlikely(!priv->set)) + return EDPVS_NOTEXIST; + + if (copt->result.drop) { + priv->result.drop = copt->result.drop; + } else { + /* 0: (TC_H_UNSPEC) is not valid target */ + if (copt->result.sch_id != TC_H_UNSPEC) { + priv->result.sch_id = copt->result.sch_id; + priv->result.drop = false; /* exclusive with sch_id */ + } + } + + return EDPVS_OK; +} + +static void cls_ipset_destroy(struct tc_cls *cls) +{ + struct ipset_cls_priv *priv = tc_cls_priv(cls); + + if (likely(priv->set != NULL)) + ipset_put(priv->set); +} + +static int cls_ipset_dump(struct tc_cls *cls, void *arg) +{ + struct ipset_cls_priv *priv = tc_cls_priv(cls); + struct tc_cls_ipset_copt *copt = arg; + + strncpy(copt->setname, priv->set->name, sizeof(copt->setname)); + copt->dst_match = priv->dst_match; + copt->result = priv->result; + + return EDPVS_OK; +} + +struct tc_cls_ops ipset_cls_ops = { + .name = "ipset", + .priv_size = sizeof(struct ipset_cls_priv), + .classify = cls_ipset_classify, + .init = cls_ipset_init, + .change = cls_ipset_init, + .destroy = cls_ipset_destroy, + .dump = cls_ipset_dump, +}; diff --git a/src/tc/cls_match.c b/src/tc/cls_match.c index 267f063f9..ab7ec357d 100644 --- a/src/tc/cls_match.c +++ b/src/tc/cls_match.c @@ -231,7 +231,7 @@ static int match_init(struct tc_cls *cls, const void *arg) const struct tc_cls_match_copt *copt = arg; if (!arg) - return EDPVS_OK; + return EDPVS_INVAL; if (copt->proto) priv->proto = copt->proto; diff --git a/src/tc/sch_tbf.c b/src/tc/sch_tbf.c index 4209790ea..ba6ea8f97 100644 --- a/src/tc/sch_tbf.c +++ b/src/tc/sch_tbf.c @@ -268,7 +268,7 @@ static int tbf_dump(struct Qsch *sch, void *arg) priv = qsch_priv(sch); - memset(qopt, 0, sizeof(&qopt)); + memset(qopt, 0, sizeof(*qopt)); qopt->rate.rate = priv->rate.rate_bytes_ps * 8; qopt->peakrate.rate = priv->peak.rate_bytes_ps * 8; qopt->limit = priv->limit; diff --git a/src/tc/tc.c b/src/tc/tc.c index 3fd61e0bb..8310e33ee 100644 --- a/src/tc/tc.c +++ b/src/tc/tc.c @@ -36,6 +36,7 @@ extern struct Qsch_ops bfifo_sch_ops; extern struct Qsch_ops pfifo_fast_ops; extern struct Qsch_ops tbf_sch_ops; extern struct tc_cls_ops match_cls_ops; +extern struct tc_cls_ops ipset_cls_ops; static struct list_head qsch_ops_base; static struct list_head cls_ops_base; @@ -333,6 +334,7 @@ int tc_init(void) /* classifier */ INIT_LIST_HEAD(&cls_ops_base); tc_register_cls(&match_cls_ops); + tc_register_cls(&ipset_cls_ops); /* per-NUMA socket mempools for queued tc_mbuf{} */ for (s = 0; s < get_numa_nodes(); s++) { @@ -363,6 +365,7 @@ int tc_term(void) tc_unregister_qsch(&tbf_sch_ops); tc_unregister_cls(&match_cls_ops); + tc_unregister_cls(&ipset_cls_ops); for (s = 0; s < get_numa_nodes(); s++) { if (tc_mbuf_pools[s]) { diff --git a/src/timer.c b/src/timer.c index 26d744f93..067c7d129 100644 --- a/src/timer.c +++ b/src/timer.c @@ -283,7 +283,8 @@ static inline void deviation_measure(void) static void rte_timer_tick_cb(struct rte_timer *tim, void *arg) { struct timer_scheduler *sched = arg; - struct dpvs_timer *timer, *next; + struct dpvs_timer *timer; + struct list_head *head; uint64_t left, hash, off, remainder; int level, lower; uint32_t *cursor; @@ -309,8 +310,9 @@ static void rte_timer_tick_cb(struct rte_timer *tim, void *arg) carry = true; } - list_for_each_entry_safe(timer, next, - &sched->hashs[level][*cursor], list) { + head = &sched->hashs[level][*cursor]; + while (!list_empty(head)) { + timer = list_first_entry(head, struct dpvs_timer, list); /* is all lower levels ticks empty ? */ left = timer->delay % get_level_ticks(level); if (!left) { diff --git a/src/vlan.c b/src/vlan.c index 46b44679c..65c27d884 100644 --- a/src/vlan.c +++ b/src/vlan.c @@ -371,6 +371,7 @@ static inline int vlan_untag_mbuf(struct rte_mbuf *mbuf) /* strip the vlan header */ memmove((void *)vehdr + VLAN_HLEN, vehdr, 2 * ETH_ALEN); rte_pktmbuf_adj(mbuf, VLAN_HLEN); + mbuf->l2_len = sizeof(*vehdr) - VLAN_HLEN; return EDPVS_OK; } diff --git a/test/flameGraph/README.md b/test/flameGraph/README.md new file mode 100644 index 000000000..6b2d7aab5 --- /dev/null +++ b/test/flameGraph/README.md @@ -0,0 +1,4 @@ +Notes: +1. Use the Linux `perf` tool to generate input data for `run.sh`, for example: `perf record -e cpu-clock -g -C 1`. +2. The flame scripts are from the open source project: [FlameGraph](https://github.com/brendangregg/FlameGraph). + diff --git a/test/flameGraph/flamegraph.pl b/test/flameGraph/flamegraph.pl new file mode 100755 index 000000000..9b9898e52 --- /dev/null +++ b/test/flameGraph/flamegraph.pl @@ -0,0 +1,1243 @@ +#!/usr/bin/perl -w +# +# flamegraph.pl flame stack grapher. +# +# This takes stack samples and renders a call graph, allowing hot functions +# and codepaths to be quickly identified. Stack samples can be generated using +# tools such as DTrace, perf, SystemTap, and Instruments. +# +# USAGE: ./flamegraph.pl [options] input.txt > graph.svg +# +# grep funcA input.txt | ./flamegraph.pl [options] > graph.svg +# +# Then open the resulting .svg in a web browser, for interactivity: mouse-over +# frames for info, click to zoom, and ctrl-F to search. +# +# Options are listed in the usage message (--help). +# +# The input is stack frames and sample counts formatted as single lines. Each +# frame in the stack is semicolon separated, with a space and count at the end +# of the line. These can be generated for Linux perf script output using +# stackcollapse-perf.pl, for DTrace using stackcollapse.pl, and for other tools +# using the other stackcollapse programs. Example input: +# +# swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 1 +# +# An optional extra column of counts can be provided to generate a differential +# flame graph of the counts, colored red for more, and blue for less. This +# can be useful when using flame graphs for non-regression testing. +# See the header comment in the difffolded.pl program for instructions. +# +# The input functions can optionally have annotations at the end of each +# function name, following a precedent by some tools (Linux perf's _[k]): +# _[k] for kernel +# _[i] for inlined +# _[j] for jit +# _[w] for waker +# Some of the stackcollapse programs support adding these annotations, eg, +# stackcollapse-perf.pl --kernel --jit. They are used merely for colors by +# some palettes, eg, flamegraph.pl --color=java. +# +# The output flame graph shows relative presence of functions in stack samples. +# The ordering on the x-axis has no meaning; since the data is samples, time +# order of events is not known. The order used sorts function names +# alphabetically. +# +# While intended to process stack samples, this can also process stack traces. +# For example, tracing stacks for memory allocation, or resource usage. You +# can use --title to set the title to reflect the content, and --countname +# to change "samples" to "bytes" etc. +# +# There are a few different palettes, selectable using --color. By default, +# the colors are selected at random (except for differentials). Functions +# called "-" will be printed gray, which can be used for stack separators (eg, +# between user and kernel stacks). +# +# HISTORY +# +# This was inspired by Neelakanth Nadgir's excellent function_call_graph.rb +# program, which visualized function entry and return trace events. As Neel +# wrote: "The output displayed is inspired by Roch's CallStackAnalyzer which +# was in turn inspired by the work on vftrace by Jan Boerhout". See: +# https://blogs.oracle.com/realneel/entry/visualizing_callstacks_via_dtrace_and +# +# Copyright 2016 Netflix, Inc. +# Copyright 2011 Joyent, Inc. All rights reserved. +# Copyright 2011 Brendan Gregg. All rights reserved. +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at docs/cddl1.txt or +# http://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at docs/cddl1.txt. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# 11-Oct-2014 Adrien Mahieux Added zoom. +# 21-Nov-2013 Shawn Sterling Added consistent palette file option +# 17-Mar-2013 Tim Bunce Added options and more tunables. +# 15-Dec-2011 Dave Pacheco Support for frames with whitespace. +# 10-Sep-2011 Brendan Gregg Created this. + +use strict; + +use Getopt::Long; + +use open qw(:std :utf8); + +# tunables +my $encoding; +my $fonttype = "Verdana"; +my $imagewidth = 1200; # max width, pixels +my $frameheight = 16; # max height is dynamic +my $fontsize = 12; # base text size +my $fontwidth = 0.59; # avg width relative to fontsize +my $minwidth = 0.1; # min function width, pixels +my $nametype = "Function:"; # what are the names in the data? +my $countname = "samples"; # what are the counts in the data? +my $colors = "hot"; # color theme +my $bgcolors = ""; # background color theme +my $nameattrfile; # file holding function attributes +my $timemax; # (override the) sum of the counts +my $factor = 1; # factor to scale counts by +my $hash = 0; # color by function name +my $palette = 0; # if we use consistent palettes (default off) +my %palette_map; # palette map hash +my $pal_file = "palette.map"; # palette map file name +my $stackreverse = 0; # reverse stack order, switching merge end +my $inverted = 0; # icicle graph +my $flamechart = 0; # produce a flame chart (sort by time, do not merge stacks) +my $negate = 0; # switch differential hues +my $titletext = ""; # centered heading +my $titledefault = "Flame Graph"; # overwritten by --title +my $titleinverted = "Icicle Graph"; # " " +my $searchcolor = "rgb(230,0,230)"; # color for search highlighting +my $notestext = ""; # embedded notes in SVG +my $subtitletext = ""; # second level title (optional) +my $help = 0; + +sub usage { + die < outfile.svg\n + --title TEXT # change title text + --subtitle TEXT # second level title (optional) + --width NUM # width of image (default 1200) + --height NUM # height of each frame (default 16) + --minwidth NUM # omit smaller functions (default 0.1 pixels) + --fonttype FONT # font type (default "Verdana") + --fontsize NUM # font size (default 12) + --countname TEXT # count type label (default "samples") + --nametype TEXT # name type label (default "Function:") + --colors PALETTE # set color palette. choices are: hot (default), mem, + # io, wakeup, chain, java, js, perl, red, green, blue, + # aqua, yellow, purple, orange + --bgcolors COLOR # set background colors. gradient choices are yellow + # (default), blue, green, grey; flat colors use "#rrggbb" + --hash # colors are keyed by function name hash + --cp # use consistent palette (palette.map) + --reverse # generate stack-reversed flame graph + --inverted # icicle graph + --flamechart # produce a flame chart (sort by time, do not merge stacks) + --negate # switch differential hues (blue<->red) + --notes TEXT # add notes comment in SVG (for debugging) + --help # this message + + eg, + $0 --title="Flame Graph: malloc()" trace.txt > graph.svg +USAGE_END +} + +GetOptions( + 'fonttype=s' => \$fonttype, + 'width=i' => \$imagewidth, + 'height=i' => \$frameheight, + 'encoding=s' => \$encoding, + 'fontsize=f' => \$fontsize, + 'fontwidth=f' => \$fontwidth, + 'minwidth=f' => \$minwidth, + 'title=s' => \$titletext, + 'subtitle=s' => \$subtitletext, + 'nametype=s' => \$nametype, + 'countname=s' => \$countname, + 'nameattr=s' => \$nameattrfile, + 'total=s' => \$timemax, + 'factor=f' => \$factor, + 'colors=s' => \$colors, + 'bgcolors=s' => \$bgcolors, + 'hash' => \$hash, + 'cp' => \$palette, + 'reverse' => \$stackreverse, + 'inverted' => \$inverted, + 'flamechart' => \$flamechart, + 'negate' => \$negate, + 'notes=s' => \$notestext, + 'help' => \$help, +) or usage(); +$help && usage(); + +# internals +my $ypad1 = $fontsize * 3; # pad top, include title +my $ypad2 = $fontsize * 2 + 10; # pad bottom, include labels +my $ypad3 = $fontsize * 2; # pad top, include subtitle (optional) +my $xpad = 10; # pad lefm and right +my $framepad = 1; # vertical padding for frames +my $depthmax = 0; +my %Events; +my %nameattr; + +if ($flamechart && $titletext eq "") { + $titletext = "Flame Chart"; +} + +if ($titletext eq "") { + unless ($inverted) { + $titletext = $titledefault; + } else { + $titletext = $titleinverted; + } +} + +if ($nameattrfile) { + # The name-attribute file format is a function name followed by a tab then + # a sequence of tab separated name=value pairs. + open my $attrfh, $nameattrfile or die "Can't read $nameattrfile: $!\n"; + while (<$attrfh>) { + chomp; + my ($funcname, $attrstr) = split /\t/, $_, 2; + die "Invalid format in $nameattrfile" unless defined $attrstr; + $nameattr{$funcname} = { map { split /=/, $_, 2 } split /\t/, $attrstr }; + } +} + +if ($notestext =~ /[<>]/) { + die "Notes string can't contain < or >" +} + +# background colors: +# - yellow gradient: default (hot, java, js, perl) +# - green gradient: mem +# - blue gradient: io, wakeup, chain +# - gray gradient: flat colors (red, green, blue, ...) +if ($bgcolors eq "") { + # choose a default + if ($colors eq "mem") { + $bgcolors = "green"; + } elsif ($colors =~ /^(io|wakeup|chain)$/) { + $bgcolors = "blue"; + } elsif ($colors =~ /^(red|green|blue|aqua|yellow|purple|orange)$/) { + $bgcolors = "grey"; + } else { + $bgcolors = "yellow"; + } +} +my ($bgcolor1, $bgcolor2); +if ($bgcolors eq "yellow") { + $bgcolor1 = "#eeeeee"; # background color gradient start + $bgcolor2 = "#eeeeb0"; # background color gradient stop +} elsif ($bgcolors eq "blue") { + $bgcolor1 = "#eeeeee"; $bgcolor2 = "#e0e0ff"; +} elsif ($bgcolors eq "green") { + $bgcolor1 = "#eef2ee"; $bgcolor2 = "#e0ffe0"; +} elsif ($bgcolors eq "grey") { + $bgcolor1 = "#f8f8f8"; $bgcolor2 = "#e8e8e8"; +} elsif ($bgcolors =~ /^#......$/) { + $bgcolor1 = $bgcolor2 = $bgcolors; +} else { + die "Unrecognized bgcolor option \"$bgcolors\"" +} + +# SVG functions +{ package SVG; + sub new { + my $class = shift; + my $self = {}; + bless ($self, $class); + return $self; + } + + sub header { + my ($self, $w, $h) = @_; + my $enc_attr = ''; + if (defined $encoding) { + $enc_attr = qq{ encoding="$encoding"}; + } + $self->{svg} .= < + + + + +SVG + } + + sub include { + my ($self, $content) = @_; + $self->{svg} .= $content; + } + + sub colorAllocate { + my ($self, $r, $g, $b) = @_; + return "rgb($r,$g,$b)"; + } + + sub group_start { + my ($self, $attr) = @_; + + my @g_attr = map { + exists $attr->{$_} ? sprintf(qq/$_="%s"/, $attr->{$_}) : () + } qw(id class); + push @g_attr, $attr->{g_extra} if $attr->{g_extra}; + if ($attr->{href}) { + my @a_attr; + push @a_attr, sprintf qq/xlink:href="%s"/, $attr->{href} if $attr->{href}; + # default target=_top else links will open within SVG + push @a_attr, sprintf qq/target="%s"/, $attr->{target} || "_top"; + push @a_attr, $attr->{a_extra} if $attr->{a_extra}; + $self->{svg} .= sprintf qq/\n/, join(' ', (@a_attr, @g_attr)); + } else { + $self->{svg} .= sprintf qq/\n/, join(' ', @g_attr); + } + + $self->{svg} .= sprintf qq/%s<\/title>/, $attr->{title} + if $attr->{title}; # should be first element within g container + } + + sub group_end { + my ($self, $attr) = @_; + $self->{svg} .= $attr->{href} ? qq/<\/a>\n/ : qq/<\/g>\n/; + } + + sub filledRectangle { + my ($self, $x1, $y1, $x2, $y2, $fill, $extra) = @_; + $x1 = sprintf "%0.1f", $x1; + $x2 = sprintf "%0.1f", $x2; + my $w = sprintf "%0.1f", $x2 - $x1; + my $h = sprintf "%0.1f", $y2 - $y1; + $extra = defined $extra ? $extra : ""; + $self->{svg} .= qq/\n/; + } + + sub stringTTF { + my ($self, $id, $x, $y, $str, $extra) = @_; + $x = sprintf "%0.2f", $x; + $id = defined $id ? qq/id="$id"/ : ""; + $extra ||= ""; + $self->{svg} .= qq/$str<\/text>\n/; + } + + sub svg { + my $self = shift; + return "$self->{svg}\n"; + } + 1; +} + +sub namehash { + # Generate a vector hash for the name string, weighting early over + # later characters. We want to pick the same colors for function + # names across different flame graphs. + my $name = shift; + my $vector = 0; + my $weight = 1; + my $max = 1; + my $mod = 10; + # if module name present, trunc to 1st char + $name =~ s/.(.*?)`//; + foreach my $c (split //, $name) { + my $i = (ord $c) % $mod; + $vector += ($i / ($mod++ - 1)) * $weight; + $max += 1 * $weight; + $weight *= 0.70; + last if $mod > 12; + } + return (1 - $vector / $max) +} + +sub color { + my ($type, $hash, $name) = @_; + my ($v1, $v2, $v3); + + if ($hash) { + $v1 = namehash($name); + $v2 = $v3 = namehash(scalar reverse $name); + } else { + $v1 = rand(1); + $v2 = rand(1); + $v3 = rand(1); + } + + # theme palettes + if (defined $type and $type eq "hot") { + my $r = 205 + int(50 * $v3); + my $g = 0 + int(230 * $v1); + my $b = 0 + int(55 * $v2); + return "rgb($r,$g,$b)"; + } + if (defined $type and $type eq "mem") { + my $r = 0; + my $g = 190 + int(50 * $v2); + my $b = 0 + int(210 * $v1); + return "rgb($r,$g,$b)"; + } + if (defined $type and $type eq "io") { + my $r = 80 + int(60 * $v1); + my $g = $r; + my $b = 190 + int(55 * $v2); + return "rgb($r,$g,$b)"; + } + + # multi palettes + if (defined $type and $type eq "java") { + # Handle both annotations (_[j], _[i], ...; which are + # accurate), as well as input that lacks any annotations, as + # best as possible. Without annotations, we get a little hacky + # and match on java|org|com, etc. + if ($name =~ m:_\[j\]$:) { # jit annotation + $type = "green"; + } elsif ($name =~ m:_\[i\]$:) { # inline annotation + $type = "aqua"; + } elsif ($name =~ m:^L?(java|javax|jdk|net|org|com|io|sun)/:) { # Java + $type = "green"; + } elsif ($name =~ /:::/) { # Java, typical perf-map-agent method separator + $type = "green"; + } elsif ($name =~ /::/) { # C++ + $type = "yellow"; + } elsif ($name =~ m:_\[k\]$:) { # kernel annotation + $type = "orange"; + } elsif ($name =~ /::/) { # C++ + $type = "yellow"; + } else { # system + $type = "red"; + } + # fall-through to color palettes + } + if (defined $type and $type eq "perl") { + if ($name =~ /::/) { # C++ + $type = "yellow"; + } elsif ($name =~ m:Perl: or $name =~ m:\.pl:) { # Perl + $type = "green"; + } elsif ($name =~ m:_\[k\]$:) { # kernel + $type = "orange"; + } else { # system + $type = "red"; + } + # fall-through to color palettes + } + if (defined $type and $type eq "js") { + # Handle both annotations (_[j], _[i], ...; which are + # accurate), as well as input that lacks any annotations, as + # best as possible. Without annotations, we get a little hacky, + # and match on a "/" with a ".js", etc. + if ($name =~ m:_\[j\]$:) { # jit annotation + if ($name =~ m:/:) { + $type = "green"; # source + } else { + $type = "aqua"; # builtin + } + } elsif ($name =~ /::/) { # C++ + $type = "yellow"; + } elsif ($name =~ m:/.*\.js:) { # JavaScript (match "/" in path) + $type = "green"; + } elsif ($name =~ m/:/) { # JavaScript (match ":" in builtin) + $type = "aqua"; + } elsif ($name =~ m/^ $/) { # Missing symbol + $type = "green"; + } elsif ($name =~ m:_\[k\]:) { # kernel + $type = "orange"; + } else { # system + $type = "red"; + } + # fall-through to color palettes + } + if (defined $type and $type eq "wakeup") { + $type = "aqua"; + # fall-through to color palettes + } + if (defined $type and $type eq "chain") { + if ($name =~ m:_\[w\]:) { # waker + $type = "aqua" + } else { # off-CPU + $type = "blue"; + } + # fall-through to color palettes + } + + # color palettes + if (defined $type and $type eq "red") { + my $r = 200 + int(55 * $v1); + my $x = 50 + int(80 * $v1); + return "rgb($r,$x,$x)"; + } + if (defined $type and $type eq "green") { + my $g = 200 + int(55 * $v1); + my $x = 50 + int(60 * $v1); + return "rgb($x,$g,$x)"; + } + if (defined $type and $type eq "blue") { + my $b = 205 + int(50 * $v1); + my $x = 80 + int(60 * $v1); + return "rgb($x,$x,$b)"; + } + if (defined $type and $type eq "yellow") { + my $x = 175 + int(55 * $v1); + my $b = 50 + int(20 * $v1); + return "rgb($x,$x,$b)"; + } + if (defined $type and $type eq "purple") { + my $x = 190 + int(65 * $v1); + my $g = 80 + int(60 * $v1); + return "rgb($x,$g,$x)"; + } + if (defined $type and $type eq "aqua") { + my $r = 50 + int(60 * $v1); + my $g = 165 + int(55 * $v1); + my $b = 165 + int(55 * $v1); + return "rgb($r,$g,$b)"; + } + if (defined $type and $type eq "orange") { + my $r = 190 + int(65 * $v1); + my $g = 90 + int(65 * $v1); + return "rgb($r,$g,0)"; + } + + return "rgb(0,0,0)"; +} + +sub color_scale { + my ($value, $max) = @_; + my ($r, $g, $b) = (255, 255, 255); + $value = -$value if $negate; + if ($value > 0) { + $g = $b = int(210 * ($max - $value) / $max); + } elsif ($value < 0) { + $r = $g = int(210 * ($max + $value) / $max); + } + return "rgb($r,$g,$b)"; +} + +sub color_map { + my ($colors, $func) = @_; + if (exists $palette_map{$func}) { + return $palette_map{$func}; + } else { + $palette_map{$func} = color($colors, $hash, $func); + return $palette_map{$func}; + } +} + +sub write_palette { + open(FILE, ">$pal_file"); + foreach my $key (sort keys %palette_map) { + print FILE $key."->".$palette_map{$key}."\n"; + } + close(FILE); +} + +sub read_palette { + if (-e $pal_file) { + open(FILE, $pal_file) or die "can't open file $pal_file: $!"; + while ( my $line = ) { + chomp($line); + (my $key, my $value) = split("->",$line); + $palette_map{$key}=$value; + } + close(FILE) + } +} + +my %Node; # Hash of merged frame data +my %Tmp; + +# flow() merges two stacks, storing the merged frames and value data in %Node. +sub flow { + my ($last, $this, $v, $d) = @_; + + my $len_a = @$last - 1; + my $len_b = @$this - 1; + + my $i = 0; + my $len_same; + for (; $i <= $len_a; $i++) { + last if $i > $len_b; + last if $last->[$i] ne $this->[$i]; + } + $len_same = $i; + + for ($i = $len_a; $i >= $len_same; $i--) { + my $k = "$last->[$i];$i"; + # a unique ID is constructed from "func;depth;etime"; + # func-depth isn't unique, it may be repeated later. + $Node{"$k;$v"}->{stime} = delete $Tmp{$k}->{stime}; + if (defined $Tmp{$k}->{delta}) { + $Node{"$k;$v"}->{delta} = delete $Tmp{$k}->{delta}; + } + delete $Tmp{$k}; + } + + for ($i = $len_same; $i <= $len_b; $i++) { + my $k = "$this->[$i];$i"; + $Tmp{$k}->{stime} = $v; + if (defined $d) { + $Tmp{$k}->{delta} += $i == $len_b ? $d : 0; + } + } + + return $this; +} + +# parse input +my @Data; +my @SortedData; +my $last = []; +my $time = 0; +my $delta = undef; +my $ignored = 0; +my $line; +my $maxdelta = 1; + +# reverse if needed +foreach (<>) { + chomp; + $line = $_; + if ($stackreverse) { + # there may be an extra samples column for differentials + # XXX todo: redo these REs as one. It's repeated below. + my($stack, $samples) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/); + my $samples2 = undef; + if ($stack =~ /^(.*)\s+?(\d+(?:\.\d*)?)$/) { + $samples2 = $samples; + ($stack, $samples) = $stack =~ (/^(.*)\s+?(\d+(?:\.\d*)?)$/); + unshift @Data, join(";", reverse split(";", $stack)) . " $samples $samples2"; + } else { + unshift @Data, join(";", reverse split(";", $stack)) . " $samples"; + } + } else { + unshift @Data, $line; + } +} + +if ($flamechart) { + # In flame chart mode, just reverse the data so time moves from left to right. + @SortedData = reverse @Data; +} else { + @SortedData = sort @Data; +} + +# process and merge frames +foreach (@SortedData) { + chomp; + # process: folded_stack count + # eg: func_a;func_b;func_c 31 + my ($stack, $samples) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/); + unless (defined $samples and defined $stack) { + ++$ignored; + next; + } + + # there may be an extra samples column for differentials: + my $samples2 = undef; + if ($stack =~ /^(.*)\s+?(\d+(?:\.\d*)?)$/) { + $samples2 = $samples; + ($stack, $samples) = $stack =~ (/^(.*)\s+?(\d+(?:\.\d*)?)$/); + } + $delta = undef; + if (defined $samples2) { + $delta = $samples2 - $samples; + $maxdelta = abs($delta) if abs($delta) > $maxdelta; + } + + # for chain graphs, annotate waker frames with "_[w]", for later + # coloring. This is a hack, but has a precedent ("_[k]" from perf). + if ($colors eq "chain") { + my @parts = split ";--;", $stack; + my @newparts = (); + $stack = shift @parts; + $stack .= ";--;"; + foreach my $part (@parts) { + $part =~ s/;/_[w];/g; + $part .= "_[w]"; + push @newparts, $part; + } + $stack .= join ";--;", @parts; + } + + # merge frames and populate %Node: + $last = flow($last, [ '', split ";", $stack ], $time, $delta); + + if (defined $samples2) { + $time += $samples2; + } else { + $time += $samples; + } +} +flow($last, [], $time, $delta); + +warn "Ignored $ignored lines with invalid format\n" if $ignored; +unless ($time) { + warn "ERROR: No stack counts found\n"; + my $im = SVG->new(); + # emit an error message SVG, for tools automating flamegraph use + my $imageheight = $fontsize * 5; + $im->header($imagewidth, $imageheight); + $im->stringTTF(undef, int($imagewidth / 2), $fontsize * 2, + "ERROR: No valid input provided to flamegraph.pl."); + print $im->svg; + exit 2; +} +if ($timemax and $timemax < $time) { + warn "Specified --total $timemax is less than actual total $time, so ignored\n" + if $timemax/$time > 0.02; # only warn is significant (e.g., not rounding etc) + undef $timemax; +} +$timemax ||= $time; + +my $widthpertime = ($imagewidth - 2 * $xpad) / $timemax; +my $minwidth_time = $minwidth / $widthpertime; + +# prune blocks that are too narrow and determine max depth +while (my ($id, $node) = each %Node) { + my ($func, $depth, $etime) = split ";", $id; + my $stime = $node->{stime}; + die "missing start for $id" if not defined $stime; + + if (($etime-$stime) < $minwidth_time) { + delete $Node{$id}; + next; + } + $depthmax = $depth if $depth > $depthmax; +} + +# draw canvas, and embed interactive JavaScript program +my $imageheight = (($depthmax + 1) * $frameheight) + $ypad1 + $ypad2; +$imageheight += $ypad3 if $subtitletext ne ""; +my $titlesize = $fontsize + 5; +my $im = SVG->new(); +my ($black, $vdgrey, $dgrey) = ( + $im->colorAllocate(0, 0, 0), + $im->colorAllocate(160, 160, 160), + $im->colorAllocate(200, 200, 200), + ); +$im->header($imagewidth, $imageheight); +my $inc = < + + + + + + + +INC +$im->include($inc); +$im->filledRectangle(0, 0, $imagewidth, $imageheight, 'url(#background)'); +$im->stringTTF("title", int($imagewidth / 2), $fontsize * 2, $titletext); +$im->stringTTF("subtitle", int($imagewidth / 2), $fontsize * 4, $subtitletext) if $subtitletext ne ""; +$im->stringTTF("details", $xpad, $imageheight - ($ypad2 / 2), " "); +$im->stringTTF("unzoom", $xpad, $fontsize * 2, "Reset Zoom", 'class="hide"'); +$im->stringTTF("search", $imagewidth - $xpad - 100, $fontsize * 2, "Search"); +$im->stringTTF("ignorecase", $imagewidth - $xpad - 16, $fontsize * 2, "ic"); +$im->stringTTF("matched", $imagewidth - $xpad - 100, $imageheight - ($ypad2 / 2), " "); + +if ($palette) { + read_palette(); +} + +# draw frames +$im->group_start({id => "frames"}); +while (my ($id, $node) = each %Node) { + my ($func, $depth, $etime) = split ";", $id; + my $stime = $node->{stime}; + my $delta = $node->{delta}; + + $etime = $timemax if $func eq "" and $depth == 0; + + my $x1 = $xpad + $stime * $widthpertime; + my $x2 = $xpad + $etime * $widthpertime; + my ($y1, $y2); + unless ($inverted) { + $y1 = $imageheight - $ypad2 - ($depth + 1) * $frameheight + $framepad; + $y2 = $imageheight - $ypad2 - $depth * $frameheight; + } else { + $y1 = $ypad1 + $depth * $frameheight; + $y2 = $ypad1 + ($depth + 1) * $frameheight - $framepad; + } + + my $samples = sprintf "%.0f", ($etime - $stime) * $factor; + (my $samples_txt = $samples) # add commas per perlfaq5 + =~ s/(^[-+]?\d+?(?=(?>(?:\d{3})+)(?!\d))|\G\d{3}(?=\d))/$1,/g; + + my $info; + if ($func eq "" and $depth == 0) { + $info = "all ($samples_txt $countname, 100%)"; + } else { + my $pct = sprintf "%.2f", ((100 * $samples) / ($timemax * $factor)); + my $escaped_func = $func; + # clean up SVG breaking characters: + $escaped_func =~ s/&/&/g; + $escaped_func =~ s//>/g; + $escaped_func =~ s/"/"/g; + $escaped_func =~ s/_\[[kwij]\]$//; # strip any annotation + unless (defined $delta) { + $info = "$escaped_func ($samples_txt $countname, $pct%)"; + } else { + my $d = $negate ? -$delta : $delta; + my $deltapct = sprintf "%.2f", ((100 * $d) / ($timemax * $factor)); + $deltapct = $d > 0 ? "+$deltapct" : $deltapct; + $info = "$escaped_func ($samples_txt $countname, $pct%; $deltapct%)"; + } + } + + my $nameattr = { %{ $nameattr{$func}||{} } }; # shallow clone + $nameattr->{title} ||= $info; + $im->group_start($nameattr); + + my $color; + if ($func eq "--") { + $color = $vdgrey; + } elsif ($func eq "-") { + $color = $dgrey; + } elsif (defined $delta) { + $color = color_scale($delta, $maxdelta); + } elsif ($palette) { + $color = color_map($colors, $func); + } else { + $color = color($colors, $hash, $func); + } + $im->filledRectangle($x1, $y1, $x2, $y2, $color, 'rx="2" ry="2"'); + + my $chars = int( ($x2 - $x1) / ($fontsize * $fontwidth)); + my $text = ""; + if ($chars >= 3) { # room for one char plus two dots + $func =~ s/_\[[kwij]\]$//; # strip any annotation + $text = substr $func, 0, $chars; + substr($text, -2, 2) = ".." if $chars < length $func; + $text =~ s/&/&/g; + $text =~ s//>/g; + } + $im->stringTTF(undef, $x1 + 3, 3 + ($y1 + $y2) / 2, $text); + + $im->group_end($nameattr); +} +$im->group_end(); + +print $im->svg; + +if ($palette) { + write_palette(); +} + +# vim: ts=8 sts=8 sw=8 noexpandtab diff --git a/test/flameGraph/run.sh b/test/flameGraph/run.sh new file mode 100755 index 000000000..961a3fb24 --- /dev/null +++ b/test/flameGraph/run.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +#perf record -e cpu-clock -g -C 1 + +[ $# -lt 2 ] && echo -e "[usage]: $0 {perf.data.file} {out.file}" && exit 1 + +infile=$1 +outfile=$2 +[ ! -f $infile ] && echo -e "can't find input perf.data file $inflie, please use `perf record` to generate it" +[ _$outfile = _ ] && echo -e "invalid out.file name" && exit 1 + +perf script -i $infile > perf.unfold +./stackcollapse-perf.pl perf.unfold &> perf.folded +./flamegraph.pl perf.folded > $outfile.svg +rm -f perf.unfold +rm -f perf.folded +echo "succeed to generate $outfile.svg" diff --git a/test/flameGraph/stackcollapse-perf.pl b/test/flameGraph/stackcollapse-perf.pl new file mode 100755 index 000000000..fd3c78e28 --- /dev/null +++ b/test/flameGraph/stackcollapse-perf.pl @@ -0,0 +1,430 @@ +#!/usr/bin/perl -w +# +# stackcollapse-perf.pl collapse perf samples into single lines. +# +# Parses a list of multiline stacks generated by "perf script", and +# outputs a semicolon separated stack followed by a space and a count. +# If memory addresses (+0xd) are present, they are stripped, and resulting +# identical stacks are colased with their counts summed. +# +# USAGE: ./stackcollapse-perf.pl [options] infile > outfile +# +# Run "./stackcollapse-perf.pl -h" to list options. +# +# Example input: +# +# swapper 0 [000] 158665.570607: cpu-clock: +# ffffffff8103ce3b native_safe_halt ([kernel.kallsyms]) +# ffffffff8101c6a3 default_idle ([kernel.kallsyms]) +# ffffffff81013236 cpu_idle ([kernel.kallsyms]) +# ffffffff815bf03e rest_init ([kernel.kallsyms]) +# ffffffff81aebbfe start_kernel ([kernel.kallsyms].init.text) +# [...] +# +# Example output: +# +# swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 1 +# +# Input may be created and processed using: +# +# perf record -a -g -F 997 sleep 60 +# perf script | ./stackcollapse-perf.pl > out.stacks-folded +# +# The output of "perf script" should include stack traces. If these are missing +# for you, try manually selecting the perf script output; eg: +# +# perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace | ... +# +# This is also required for the --pid or --tid options, so that the output has +# both the PID and TID. +# +# Copyright 2012 Joyent, Inc. All rights reserved. +# Copyright 2012 Brendan Gregg. All rights reserved. +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at docs/cddl1.txt or +# http://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at docs/cddl1.txt. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# 02-Mar-2012 Brendan Gregg Created this. +# 02-Jul-2014 " " Added process name to stacks. + +use strict; +use Getopt::Long; + +my %collapsed; + +sub remember_stack { + my ($stack, $count) = @_; + $collapsed{$stack} += $count; +} +my $annotate_kernel = 0; # put an annotation on kernel function +my $annotate_jit = 0; # put an annotation on jit symbols +my $annotate_all = 0; # enale all annotations +my $include_pname = 1; # include process names in stacks +my $include_pid = 0; # include process ID with process name +my $include_tid = 0; # include process & thread ID with process name +my $include_addrs = 0; # include raw address where a symbol can't be found +my $tidy_java = 1; # condense Java signatures +my $tidy_generic = 1; # clean up function names a little +my $target_pname; # target process name from perf invocation +my $event_filter = ""; # event type filter, defaults to first encountered event +my $event_defaulted = 0; # whether we defaulted to an event (none provided) +my $event_warning = 0; # if we printed a warning for the event + +my $show_inline = 0; +my $show_context = 0; + +my $srcline_in_input = 0; # if there are extra lines with source location (perf script -F+srcline) +GetOptions('inline' => \$show_inline, + 'context' => \$show_context, + 'srcline' => \$srcline_in_input, + 'pid' => \$include_pid, + 'kernel' => \$annotate_kernel, + 'jit' => \$annotate_jit, + 'all' => \$annotate_all, + 'tid' => \$include_tid, + 'addrs' => \$include_addrs, + 'event-filter=s' => \$event_filter) +or die < outfile\n + --pid # include PID with process names [1] + --tid # include TID and PID with process names [1] + --inline # un-inline using addr2line + --all # all annotations (--kernel --jit) + --kernel # annotate kernel functions with a _[k] + --jit # annotate jit functions with a _[j] + --context # adds source context to --inline + --srcline # parses output of 'perf script -F+srcline' and adds source context + --addrs # include raw addresses where symbols can't be found + --event-filter=EVENT # event name filter\n +[1] perf script must emit both PID and TIDs for these to work; eg, Linux < 4.1: + perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace + for Linux >= 4.1: + perf script -F comm,pid,tid,cpu,time,event,ip,sym,dso,trace + If you save this output add --header on Linux >= 3.14 to include perf info. +USAGE_END + +if ($annotate_all) { + $annotate_kernel = $annotate_jit = 1; +} + +my %inlineCache; + +my %nmCache; + +sub inlineCacheAdd { + my ($pc, $mod, $result) = @_; + if (defined($inlineCache{$pc})) { + $inlineCache{$pc}{$mod} = $result; + } else { + $inlineCache{$pc} = {$mod => $result}; + } +} + +# for the --inline option +sub inline { + my ($pc, $rawfunc, $mod) = @_; + + return $inlineCache{$pc}{$mod} if defined($inlineCache{$pc}{$mod}); + + # capture addr2line output + my $a2l_output = `addr2line -a $pc -e $mod -i -f -s -C`; + + # remove first line + $a2l_output =~ s/^(.*\n){1}//; + + if ($a2l_output =~ /\?\?\n\?\?:0/) { + # if addr2line fails and rawfunc is func+offset, then fall back to it + if ($rawfunc =~ /^(.+)\+0x([0-9a-f]+)$/) { + my $func = $1; + my $addr = hex $2; + + $nmCache{$mod}=`nm $mod` unless defined $nmCache{$mod}; + + if ($nmCache{$mod} =~ /^([0-9a-f]+) . \Q$func\E$/m) { + my $base = hex $1; + my $newPc = sprintf "0x%x", $base+$addr; + my $result = inline($newPc, '', $mod); + inlineCacheAdd($pc, $mod, $result); + return $result; + } + } + } + + my @fullfunc; + my $one_item = ""; + for (split /^/, $a2l_output) { + chomp $_; + + # remove discriminator info if exists + $_ =~ s/ \(discriminator \S+\)//; + + if ($one_item eq "") { + $one_item = $_; + } else { + if ($show_context == 1) { + unshift @fullfunc, $one_item . ":$_"; + } else { + unshift @fullfunc, $one_item; + } + $one_item = ""; + } + } + + my $result = join ";" , @fullfunc; + + inlineCacheAdd($pc, $mod, $result); + + return $result; +} + +my @stack; +my $pname; +my $m_pid; +my $m_tid; + +# +# Main loop +# +while (defined($_ = <>)) { + + # find the name of the process launched by perf, by stepping backwards + # over the args to find the first non-option (no dash): + if (/^# cmdline/) { + my @args = split ' ', $_; + foreach my $arg (reverse @args) { + if ($arg !~ /^-/) { + $target_pname = $arg; + $target_pname =~ s:.*/::; # strip pathname + last; + } + } + } + + # skip remaining comments + next if m/^#/; + chomp; + + # end of stack. save cached data. + if (m/^$/) { + # ignore filtered samples + next if not $pname; + + if ($include_pname) { + if (defined $pname) { + unshift @stack, $pname; + } else { + unshift @stack, ""; + } + } + remember_stack(join(";", @stack), 1) if @stack; + undef @stack; + undef $pname; + next; + } + + # + # event record start + # + if (/^(\S.+?)\s+(\d+)\/*(\d+)*\s+/) { + # default "perf script" output has TID but not PID + # eg, "java 25607 4794564.109216: cycles:" + # eg, "java 12688 [002] 6544038.708352: cpu-clock:" + # eg, "V8 WorkerThread 25607 4794564.109216: cycles:" + # eg, "java 24636/25607 [000] 4794564.109216: cycles:" + # eg, "java 12688/12764 6544038.708352: cpu-clock:" + # eg, "V8 WorkerThread 24636/25607 [000] 94564.109216: cycles:" + # other combinations possible + my ($comm, $pid, $tid) = ($1, $2, $3); + if (not $tid) { + $tid = $pid; + $pid = "?"; + } + + if (/(\S+):\s*$/) { + my $event = $1; + + if ($event_filter eq "") { + # By default only show events of the first encountered + # event type. Merging together different types, such as + # instructions and cycles, produces misleading results. + $event_filter = $event; + $event_defaulted = 1; + } elsif ($event ne $event_filter) { + if ($event_defaulted and $event_warning == 0) { + # only print this warning if necessary: + # when we defaulted and there was + # multiple event types. + print STDERR "Filtering for events of type: $event\n"; + $event_warning = 1; + } + next; + } + } + + ($m_pid, $m_tid) = ($pid, $tid); + + if ($include_tid) { + $pname = "$comm-$m_pid/$m_tid"; + } elsif ($include_pid) { + $pname = "$comm-$m_pid"; + } else { + $pname = "$comm"; + } + $pname =~ tr/ /_/; + + # + # stack line + # + } elsif (/^\s*(\w+)\s*(.+) \((\S*)\)/) { + # ignore filtered samples + next if not $pname; + + my ($pc, $rawfunc, $mod) = ($1, $2, $3); + + if ($show_inline == 1 && $mod !~ m/(perf-\d+.map|kernel\.|\[[^\]]+\])/) { + my $inlineRes = inline($pc, $rawfunc, $mod); + # - empty result this happens e.g., when $mod does not exist or is a path to a compressed kernel module + # if this happens, the user will see error message from addr2line written to stderr + # - if addr2line results in "??" , then it's much more sane to fall back than produce a '??' in graph + if($inlineRes ne "" and $inlineRes ne "??" and $inlineRes ne "??:??:0" ) { + unshift @stack, $inlineRes; + next; + } + } + + # Linux 4.8 included symbol offsets in perf script output by default, eg: + # 7fffb84c9afc cpu_startup_entry+0x800047c022ec ([kernel.kallsyms]) + # strip these off: + $rawfunc =~ s/\+0x[\da-f]+$//; + + next if $rawfunc =~ /^\(/; # skip process names + + my $is_unknown=0; + my @inline; + for (split /\->/, $rawfunc) { + my $func = $_; + + if ($func eq "[unknown]") { + if ($mod ne "[unknown]") { # use module name instead, if known + $func = $mod; + $func =~ s/.*\///; + } else { + $func = "unknown"; + $is_unknown=1; + } + + if ($include_addrs) { + $func = "\[$func \<$pc\>\]"; + } else { + $func = "\[$func\]"; + } + } + + if ($tidy_generic) { + $func =~ s/;/:/g; + if ($func !~ m/\.\(.*\)\./) { + # This doesn't look like a Go method name (such as + # "net/http.(*Client).Do"), so everything after the first open + # paren (that is not part of an "(anonymous namespace)") is + # just noise. + $func =~ s/\((?!anonymous namespace\)).*//; + } + # now tidy this horrible thing: + # 13a80b608e0a RegExp:[&<>\"\'] (/tmp/perf-7539.map) + $func =~ tr/"\'//d; + # fall through to $tidy_java + } + + if ($tidy_java and $pname eq "java") { + # along with $tidy_generic, converts the following: + # Lorg/mozilla/javascript/ContextFactory;.call(Lorg/mozilla/javascript/ContextAction;)Ljava/lang/Object; + # Lorg/mozilla/javascript/ContextFactory;.call(Lorg/mozilla/javascript/C + # Lorg/mozilla/javascript/MemberBox;.(Ljava/lang/reflect/Method;)V + # into: + # org/mozilla/javascript/ContextFactory:.call + # org/mozilla/javascript/ContextFactory:.call + # org/mozilla/javascript/MemberBox:.init + $func =~ s/^L// if $func =~ m:/:; + } + + # + # Annotations + # + # detect inlined from the @inline array + # detect kernel from the module name; eg, frames to parse include: + # ffffffff8103ce3b native_safe_halt ([kernel.kallsyms]) + # 8c3453 tcp_sendmsg (/lib/modules/4.3.0-rc1-virtual/build/vmlinux) + # 7d8 ipv4_conntrack_local+0x7f8f80b8 ([nf_conntrack_ipv4]) + # detect jit from the module name; eg: + # 7f722d142778 Ljava/io/PrintStream;::print (/tmp/perf-19982.map) + if (scalar(@inline) > 0) { + $func .= "_[i]"; # inlined + } elsif ($annotate_kernel == 1 && $mod =~ m/(^\[|vmlinux$)/ && $mod !~ /unknown/) { + $func .= "_[k]"; # kernel + } elsif ($annotate_jit == 1 && $mod =~ m:/tmp/perf-\d+\.map:) { + $func .= "_[j]"; # jitted + } + + # + # Source lines + # + # + # Sample outputs: + # | a.out 35081 252436.005167: 667783 cycles: + # | 408ebb some_method_name+0x8b (/full/path/to/a.out) + # | uniform_int_dist.h:300 + # | 4069f5 main+0x935 (/full/path/to/a.out) + # | file.cpp:137 + # | 7f6d2148eb25 __libc_start_main+0xd5 (/lib64/libc-2.33.so) + # | libc-2.33.so[27b25] + # + # | a.out 35081 252435.738165: 306459 cycles: + # | 7f6d213c2750 [unknown] (/usr/lib64/libkmod.so.2.3.6) + # | libkmod.so.2.3.6[6750] + # + # | a.out 35081 252435.738373: 315813 cycles: + # | 7f6d215ca51b __strlen_avx2+0x4b (/lib64/libc-2.33.so) + # | libc-2.33.so[16351b] + # | 7ffc71ee9580 [unknown] ([unknown]) + # | + # + # | a.out 35081 252435.718940: 247984 cycles: + # | ffffffff814f9302 up_write+0x32 ([kernel.kallsyms]) + # | [kernel.kallsyms][ffffffff814f9302] + if($srcline_in_input and not $is_unknown){ + $_ = <>; + chomp; + s/\[.*?\]//g; + s/^\s*//g; + s/\s*$//g; + $func.=':'.$_ unless $_ eq ""; + } + + push @inline, $func; + } + + unshift @stack, @inline; + } else { + warn "Unrecognized line: $_"; + } +} + +foreach my $k (sort { $a cmp $b } keys %collapsed) { + print "$k $collapsed{$k}\n"; +} diff --git a/test/ipset/dpip.log b/test/ipset/dpip.log new file mode 100644 index 000000000..3340cc033 --- /dev/null +++ b/test/ipset/dpip.log @@ -0,0 +1,270 @@ +global +[ OK ] ./bin/dpip ipset list +[ OK ] ./bin/dpip ipset show +bitmap:ip +[ OK ] ./bin/dpip ipset create foo bitmap:ip range 192.168.0.0/16 +[ OK ] ./bin/dpip ipset add foo 192.168.1.0/26 +[ OK ] ./bin/dpip ipset test foo 192.168.1.32 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.2.1 EXPECT false +[ OK ] ./bin/dpip ipset add foo 192.168.2.1 +[ OK ] ./bin/dpip ipset test foo 192.168.2.1 EXPECT true +[ OK ] ./bin/dpip ipset add foo 10.100.100.100 +[ OK ] ./bin/dpip ipset test foo 10.100.100.100 EXPECT false +[ OK ] ./bin/dpip ipset destroy foo +bitmap:port +[ OK ] ./bin/dpip ipset create foo bitmap:port range 0-65535 +[ OK ] ./bin/dpip ipset add foo tcp:80 +[ OK ] ./bin/dpip ipset add foo tcp:8080 +[ OK ] ./bin/dpip ipset test foo tcp:80 EXPECT true +[ OK ] ./bin/dpip ipset test foo tcp:8080 EXPECT true +[ OK ] ./bin/dpip ipset test foo udp:80 EXPECT false +[ OK ] ./bin/dpip ipset test foo tcp:41235 EXPECT false +[ OK ] ./bin/dpip ipset add foo udp:80 +[ OK ] ./bin/dpip ipset test foo udp:80 EXPECT true +[ OK ] ./bin/dpip ipset del foo tcp:8080 +[ OK ] ./bin/dpip ipset test foo tcp:8080 EXPECT false +[ OK ] ./bin/dpip ipset flush foo +[ OK ] ./bin/dpip ipset destroy foo +bitmap:ip,mac +[ OK ] ./bin/dpip ipset create foo bitmap:ip,mac range 192.168.0.0/16 +[ OK ] ./bin/dpip ipset add foo 192.168.1.1,12:34:56:78:9A:BC +[ OK ] ./bin/dpip ipset add foo 192.168.2.2 +[ OK ] ./bin/dpip ipset test foo 192.168.1.1,12:34:56:78:9A:BC EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.1.1,12:34:56:78:A9:BC EXPECT false +[ OK ] ./bin/dpip ipset test foo 192.168.1.1,0:0:0:0:0:0 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.1.1 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.2.2 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.2.2,1:2:3:4:5:6 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.2.1 EXPECT false +[ OK ] ./bin/dpip ipset destroy foo +hash:ip +[ OK ] ./bin/dpip ipset create foo hash:ip comment +[ OK ] ./bin/dpip ipset add foo 10.100.100.100 comment a-single-address +[ OK ] ./bin/dpip ipset add foo 192.168.1.0/24 +[ OK ] ./bin/dpip ipset list foo +[ OK ] ./bin/dpip ipset test foo 10.100.100.100 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.1.12 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.2.0 EXPECT false +[ OK ] ./bin/dpip ipset test foo 0.0.0.0 EXPECT false +[ OK ] ./bin/dpip ipset destroy foo +[ OK ] ./bin/dpip ipset -6 create bar hash:ip hashsize 128 maxelem 4096 +[ OK ] ./bin/dpip ipset add bar 2001::1 +[ OK ] ./bin/dpip ipset add bar 2001::2:1 +[ OK ] ./bin/dpip ipset add bar :: +[ OK ] ./bin/dpip ipset test bar 2001::2:1 EXPECT true +[ OK ] ./bin/dpip ipset test bar ::1 EXPECT false +[ OK ] ./bin/dpip ipset test bar :: EXPECT true +[ OK ] ./bin/dpip ipset destroy bar +hash:ip,port +[ OK ] ./bin/dpip ipset create foo hash:ip,port +[ OK ] ./bin/dpip ipset add foo 192.168.1.0/30,tcp:80-82 +[ OK ] ./bin/dpip ipset add foo 192.168.1.0/30,udp:80-82 +[ OK ] ./bin/dpip ipset -v test foo 192.168.1.1,tcp:81 EXPECT 192.168.1.1,tcp:81 is in set foo +[ OK ] ./bin/dpip ipset test foo 192.168.1.0,upd:80 EXPECT false +[ OK ] ./bin/dpip ipset add foo 172.27.1.3-172.27.1.5 +[ OK ] ./bin/dpip ipset test foo 172.27.1.5 EXPECT true +[ OK ] ./bin/dpip ipset test foo 172.27.1.4,0 EXPECT true +[ OK ] ./bin/dpip ipset test foo 172.27.1.4,tcp:0 EXPECT false +[ OK ] ./bin/dpip ipset add foo 172.27.20.20-172.27.20.21,80-82 +[ OK ] ./bin/dpip ipset test foo 172.27.20.20,81 EXPECT true +[ OK ] ./bin/dpip ipset test foo 172.27.20.20,tcp:81 EXPECT false +[ OK ] ./bin/dpip ipset flush foo +[ OK ] ./bin/dpip ipset destroy foo +[ OK ] ./bin/dpip ipset -6 create bar hash:ip,port +[ OK ] ./bin/dpip ipset add bar 2001::1,tcp:8080-8082 +[ OK ] ./bin/dpip ipset add bar 2001::1,udp:80 +[ OK ] ./bin/dpip ipset add bar 2001::2,0 +[ OK ] ./bin/dpip ipset test bar 2001::1,tcp:8081 EXPECT true +[ OK ] ./bin/dpip ipset test bar 2001::1,udp:8081 EXPECT false +[ OK ] ./bin/dpip ipset test bar 2001::1,udp:80 EXPECT true +[ OK ] ./bin/dpip ipset test bar 2001::2 EXPECT true +[ OK ] ./bin/dpip ipset destroy bar +hash:net +[ OK ] ./bin/dpip ipset create foo hash:net +[ OK ] ./bin/dpip ipset add foo 192.168.0.0/24 +[ OK ] ./bin/dpip ipset add foo 10.1.0.0/16 +[ OK ] ./bin/dpip ipset add foo 192.168.0.100/30 nomatch +[ OK ] ./bin/dpip ipset test foo 10.1.100.100 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.0.104 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.0.102 EXPECT false +[ OK ] ./bin/dpip ipset add foo 10.1.1.1 nomatch +[ OK ] ./bin/dpip ipset test foo 10.1.1.1 EXPECT false +[ OK ] ./bin/dpip ipset del foo 10.1.1.1 +[ OK ] ./bin/dpip ipset test foo 10.1.1.1 EXPECT true +[ OK ] ./bin/dpip ipset destroy foo +[ OK ] ./bin/dpip ipset -6 create bar hash:net +[ OK ] ./bin/dpip ipset add bar 2001::/64 +[ OK ] ./bin/dpip ipset test bar 2001::4:3:2:1 EXPECT true +[ OK ] ./bin/dpip ipset test bar 2001:1::4:3:2:1 EXPECT false +[ OK ] ./bin/dpip ipset test bar 2001::1 EXPECT true +[ OK ] ./bin/dpip ipset add bar 2001::/120 nomatch +[ OK ] ./bin/dpip ipset test bar 2001::1 EXPECT false +[ OK ] ./bin/dpip ipset destroy bar +hash:ip,port,ip +[ OK ] ./bin/dpip ipset create foo hash:ip,port,ip comment +[ OK ] ./bin/dpip ipset add foo 192.168.1.16/30,tcp:8080-8082,192.168.2.100-192.168.2.105 comment a-test-range +[ OK ] ./bin/dpip ipset test foo 192.168.1.18,tcp:8081,192.168.2.101 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.1.16,tcp:8080,192.168.2.105 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.1.20,tcp:8081,192.168.2.101 EXPECT false +[ OK ] ./bin/dpip ipset test foo 192.168.1.18,tcp:8081,192.168.2.106 EXPECT false +[ OK ] ./bin/dpip ipset test foo 192.168.1.18,udp:8081,192.168.2.101 EXPECT false +[ OK ] ./bin/dpip ipset test foo 192.168.1.19,8081,192.168.2.101 EXPECT false +[ OK ] ./bin/dpip ipset del foo 192.168.1.18/31,tcp:8081,192.168.2.101 +[ OK ] ./bin/dpip ipset test foo 192.168.1.18,tcp:8081,192.168.2.101 EXPECT false +[ OK ] ./bin/dpip ipset destroy foo +[ OK ] ./bin/dpip ipset -6 create bar hash:ip,port,ip +[ OK ] ./bin/dpip ipset add bar 2001::1,udp:80-82,2002::2 +[ OK ] ./bin/dpip ipset add bar 2001::1,tcp:80-82,2002::2 +[ OK ] ./bin/dpip ipset add bar 2001::1,80-82,2002::2 +[ OK ] ./bin/dpip ipset test bar 2001::1,udp:81,2002::2 EXPECT true +[ OK ] ./bin/dpip ipset test bar 2001::1,tcp:80,2002::2 EXPECT true +[ OK ] ./bin/dpip ipset test bar 2001::1,82,2002::2 EXPECT true +[ OK ] ./bin/dpip ipset test bar 2001::2,81,2002::2 EXPECT false +[ OK ] ./bin/dpip ipset test bar 2001::1,tcp:8080,2002::2 EXPECT false +[ OK ] ./bin/dpip ipset test bar 2001::1,udp:80,2002::1 EXPECT false +[ OK ] ./bin/dpip ipset del bar 2001::1,80-82,2002::2 +[ OK ] ./bin/dpip ipset test bar 2001::1,82,2002::2 EXPECT false +[ OK ] ./bin/dpip ipset destroy bar +hash:net,port,net,port +[ OK ] ./bin/dpip ipset create foo hash:net,port,net,port +[ OK ] ./bin/dpip ipset add foo 192.168.10.0/24,0,192.168.20.0/24,0 +[ OK ] ./bin/dpip ipset test foo 192.168.10.123,0,192.168.20.123,0 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.10.123,tcp:0,192.168.20.123,tcp:0 EXPECT false +[ OK ] ./bin/dpip ipset add foo 192.168.10.123,tcp:0,192.168.20.123,tcp:0 +[ OK ] ./bin/dpip ipset test foo 192.168.10.123,tcp:0,192.168.20.123,tcp:0 EXPECT true +[ OK ] ./bin/dpip ipset add foo 192.168.10.64/26,0,192.168.20.64/26,0 nomatch +[ OK ] ./bin/dpip ipset test foo 192.168.10.123,0,192.168.20.123,0 EXPECT false +[ OK ] ./bin/dpip ipset add foo 192.168.10.123,0,192.168.20.123,0 +[ OK ] ./bin/dpip ipset test foo 192.168.10.123,0,192.168.20.123,0 EXPECT true +[ OK ] ./bin/dpip ipset flush foo +[ OK ] ./bin/dpip ipset add foo 10.64.68.0-10.64.68.100,tcp:80-82,10.128.0.0/16,tcp:8080 +[ OK ] ./bin/dpip ipset list -v foo +[ OK ] ./bin/dpip ipset test foo 10.64.68.66,tcp:81,10.128.11.22,tcp:8080 EXPECT true +[ OK ] ./bin/dpip ipset add foo 10.64.68.64/29,tcp:81,10.128.11.0/24,tcp:8080 nomatch +[ OK ] ./bin/dpip ipset test foo 10.64.68.66,tcp:81,10.128.11.22,tcp:8080 EXPECT false +[ OK ] ./bin/dpip ipset destroy foo +[ OK ] ./bin/dpip ipset -6 create bar hash:net,port,net,port comment +[ OK ] ./bin/dpip ipset add bar 2001::a:b:c:d/64,udp:8080-8081,2002::/64,udp:6000-6001 +[ OK ] ./bin/dpip ipset test bar 2001::1:2:3:4,udp:8080,2002::1:2:3:4,udp:6001 EXPECT true +[ OK ] ./bin/dpip ipset test bar 2001::1:2:3:4:5,udp:8080,2002::1:2:3:4,udp:6001 EXPECT false +[ OK ] ./bin/dpip ipset test bar 2001::1:2:3:4,udp:8080,2002::1:2:3:4:5,udp:6001 EXPECT false +[ OK ] ./bin/dpip ipset test bar 2001::1:2:3:4,udp:8082,2002::1:2:3:4,udp:6001 EXPECT false +[ OK ] ./bin/dpip ipset add bar 2001::/64,udp:8080,2002::1:2:0:0/96,udp:6000-6001 nomatch comment bad-guys +[ OK ] ./bin/dpip ipset test bar 2001::1:2:3:4,udp:8080,2002::1:2:3:4,udp:6001 EXPECT false +[ OK ] ./bin/dpip ipset test bar 2001::1:2:3:4,udp:8081,2002::1:2:3:4,udp:6001 EXPECT true +[ OK ] ./bin/dpip ipset test bar 2001::1:2:3:4,udp:8080,2002::2:3:4:5,udp:6001 EXPECT true +[ OK ] ./bin/dpip ipset destroy bar +hash:net,port,iface +[ OK ] ./bin/dpip ipset create foo hash:net,port,iface comment +[ OK ] ./bin/dpip ipset add foo 10.64.13.131/16,tcp:80-82,dpdk0 +[ OK ] ./bin/dpip ipset test foo 10.64.111.222,tcp:81,dpdk0 EXPECT true +[ OK ] ./bin/dpip ipset test foo 10.64.111.222,81,dpdk0 EXPECT false +[ OK ] ./bin/dpip ipset add foo 10.64.88.100-10.64.88.200,tcp:82,dpdk0 nomatch comment bad-guys +[ OK ] ./bin/dpip ipset list foo -v +[ OK ] ./bin/dpip ipset test foo 10.64.88.111,tcp:81,dpdk0 EXPECT true +[ OK ] ./bin/dpip ipset test foo 10.64.88.111,tcp:82,dpdk0 EXPECT false +[ OK ] ./bin/dpip ipset add foo 10.64.88.111,tcp:82,dpdk0 comment you-are-an-exception +[ OK ] ./bin/dpip ipset test foo 10.64.88.111,tcp:82,dpdk0 EXPECT true +[ OK ] ./bin/dpip ipset flush foo +[ OK ] ./bin/dpip ipset destroy foo +[ OK ] ./bin/dpip ipset -6 create bar hash:net,port,iface hashsize 300 maxelem 1000 +[ OK ] ./bin/dpip ipset add bar 2001:beef::/64,udp:100-102,dpdk0 +[ OK ] ./bin/dpip ipset test bar 2001:beef::abcd,udp:100,dpdk0 EXPECT true +[ OK ] ./bin/dpip ipset add bar 2001:beef::abcd/100,udp:100,dpdk0 nomatch +[ OK ] ./bin/dpip ipset test bar 2001:beef::abcd,udp:100,dpdk0 EXPECT false +[ OK ] ./bin/dpip ipset del bar 2001:beef::abcd/100,udp:100,dpdk0 +[ OK ] ./bin/dpip ipset test bar 2001:beef::abcd,udp:100,dpdk0 EXPECT true +[ OK ] ./bin/dpip ipset destroy bar +hash:net,port +[ OK ] ./bin/dpip ipset create foo hash:net,port comment +[ OK ] ./bin/dpip ipset add foo 192.168.100.0-192.168.102.30,tcp:10240 +[ OK ] ./bin/dpip ipset test foo 192.168.100.111,tcp:10240 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.101.111,tcp:10240 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.101.111,tcp:10241 EXPECT false +[ OK ] ./bin/dpip ipset test foo 192.168.102.111,tcp:10241 EXPECT false +[ OK ] ./bin/dpip ipset test foo 192.168.102.30,tcp:10240 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.102.30,10240 EXPECT false +[ OK ] ./bin/dpip ipset test foo 192.168.102.31,tcp:10240 EXPECT false +[ OK ] ./bin/dpip ipset add foo 192.168.100.101/25,tcp:10240 nomatch comment bad-guys +[ OK ] ./bin/dpip ipset test foo 192.168.100.111,tcp:10240 EXPECT false +[ OK ] ./bin/dpip ipset flush foo +[ OK ] ./bin/dpip ipset add foo 10.128.34.211-10.128.37.189,3000-3001 +[ OK ] ./bin/dpip ipset test foo 10.128.35.141,3001 EXPECT true +[ OK ] ./bin/dpip ipset add foo 10.128.35.100-10.128.35.150,3000-3001 nomatch +[ OK ] ./bin/dpip ipset test foo 10.128.34.210,3000 EXPECT false +[ OK ] ./bin/dpip ipset test foo 10.128.34.211,3000 EXPECT true +[ OK ] ./bin/dpip ipset test foo 10.128.37.185,3001 EXPECT true +[ OK ] ./bin/dpip ipset test foo 10.128.37.190,3001 EXPECT false +[ OK ] ./bin/dpip ipset test foo 10.128.35.141,3001 EXPECT false +[ OK ] ./bin/dpip ipset destroy foo +[ OK ] ./bin/dpip ipset -6 create bar hash:net,port maxelem 1024 +[ OK ] ./bin/dpip ipset add bar 2001::/64,tcp:80-88 +[ OK ] ./bin/dpip ipset test bar 2001::1:2:3:4,tcp:85 EXPECT true +[ OK ] ./bin/dpip ipset test bar 2001::1111:2222:3333:4444,tcp:88 EXPECT true +[ OK ] ./bin/dpip ipset test bar 2001::1:2:3:4,tcp:89 EXPECT false +[ OK ] ./bin/dpip ipset test bar 2001::1:2:3:4:5,tcp:85 EXPECT false +[ OK ] ./bin/dpip ipset test bar 2001::eeee:aaaa:1:6,tcp:85 EXPECT true +[ OK ] ./bin/dpip ipset add bar 2001::eeee:aaaa:1243:6789/96,tcp:84-86 nomatch +[ OK ] ./bin/dpip ipset test bar 2001::eeee:aaaa:1:6,tcp:85 EXPECT false +[ OK ] ./bin/dpip ipset destroy bar +hash:net,port,net +[ OK ] ./bin/dpip ipset create foo hash:net,port,net +[ OK ] ./bin/dpip ipset add foo 192.168.188.20-192.168.190.36,2021-2022,192.168.33.223-192.168.34.123 +[ OK ] ./bin/dpip ipset flush foo +[ OK ] ./bin/dpip ipset add foo 10.60.0.0/16,tcp:10240-10242,10.130.0.0/16 +[ OK ] ./bin/dpip ipset test foo 10.60.12.34,tcp:10241,10.130.56.78 EXPECT true +[ OK ] ./bin/dpip ipset test foo 10.60.0.0,tcp:10242,10.130.255.255 EXPECT true +[ OK ] ./bin/dpip ipset test foo 10.61.0.0,tcp:10240,10.130.255.255 EXPECT false +[ OK ] ./bin/dpip ipset test foo 10.60.0.0,udp:10240,10.130.255.255 EXPECT false +[ OK ] ./bin/dpip ipset test foo 10.60.100.168,tcp:10242,10.130.100.192 EXPECT true +[ OK ] ./bin/dpip ipset add foo 10.60.100.100-10.60.100.200,tcp:10242,10.130.100.100-10.130.100.200 nomatch +[ OK ] ./bin/dpip ipset test foo 10.60.100.168,tcp:10242,10.130.100.192 EXPECT false +[ OK ] ./bin/dpip ipset test foo 10.60.100.168,tcp:10241,10.130.100.192 EXPECT true +[ OK ] ./bin/dpip ipset test foo 10.60.100.201,tcp:10242,10.130.100.192 EXPECT true +[ OK ] ./bin/dpip ipset destroy foo +[ OK ] ./bin/dpip ipset -6 create bar hash:net,port,net hashsize 1024 maxelem 4096 comment +[ OK ] ./bin/dpip ipset add bar 210e:36a9::aa:bbbb/96,udp:8080-8082,2408:a91e::cc:dddd/96 comment test-entries +[ OK ] ./bin/dpip ipset test bar 210e:36a9::12:3456,udp:8080,2408:a91e::78:9abc EXPECT true +[ OK ] ./bin/dpip ipset test bar 210e:36a9::ff:ffff,udp:8080,2408:a91e:: EXPECT true +[ OK ] ./bin/dpip ipset test bar 210e:36a9::,udp:8082,2408:a91e::ff:ffff EXPECT true +[ OK ] ./bin/dpip ipset add bar 210e:36a9::12:3456/102,udp:8080,2408:a91e::78:9abc/102 nomatch +[ OK ] ./bin/dpip ipset test bar 210e:36a9::12:3456,udp:8080,2408:a91e::78:9abc EXPECT false +[ OK ] ./bin/dpip ipset del bar 210e:36a9::12:3456/102,udp:8080,2408:a91e::78:9abc/102 +[ OK ] ./bin/dpip ipset test bar 210e:36a9::ff:ffff,udp:8080,2408:a91e:: EXPECT true +[ OK ] ./bin/dpip ipset test bar 210e:36a9::12:3456,udp:8080,2408:a91e::78:9abc EXPECT true +[ OK ] ./bin/dpip ipset destroy bar +hash:ip,port,net +[ OK ] ./bin/dpip ipset create foo hash:ip,port,net comment +[ OK ] ./bin/dpip ipset add foo 192.168.12.1/24,tcp:8080,192.168.100.0/24 +[ OK ] ./bin/dpip ipset test foo 192.168.12.211,tcp:8080,192.168.100.211 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.12.0,tcp:8080,192.168.100.255 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.12.211,tcp:8080,192.168.101.0 EXPECT false +[ OK ] ./bin/dpip ipset test foo 192.168.13.0,tcp:8080,192.168.100.211 EXPECT false +[ OK ] ./bin/dpip ipset test foo 192.168.12.211,8080,192.168.100.211 EXPECT false +[ OK ] ./bin/dpip ipset add foo 192.168.12.200-192.168.12.255,tcp:8080,192.168.100.200-192.168.100.255 nomatch +[ OK ] ./bin/dpip ipset list -v +[ OK ] ./bin/dpip ipset test foo 192.168.12.211,tcp:8080,192.168.100.111 EXPECT true +[ OK ] ./bin/dpip ipset test foo 192.168.12.211,tcp:8080,192.168.100.211 EXPECT false +[ OK ] ./bin/dpip ipset add foo 192.168.12.211,tcp:8080,192.168.100.211/32 comment I'm-innocent +[ OK ] ./bin/dpip ipset test foo 192.168.12.211,tcp:8080,192.168.100.211 EXPECT true +[ OK ] ./bin/dpip ipset del foo 192.168.12.200-192.168.12.255,tcp:8080,192.168.100.200-192.168.100.255 +[ OK ] ./bin/dpip ipset flush foo +[ OK ] ./bin/dpip ipset add foo 10.61.240.1-10.61.240.9,udp:10240-10242,10.110.123.102/21 +[ OK ] ./bin/dpip ipset test foo 10.61.240.3,udp:10240,10.110.123.123 EXPECT true +[ OK ] ./bin/dpip ipset test foo 10.61.240.6,udp:10241,10.110.120.1 EXPECT true +[ OK ] ./bin/dpip ipset test foo 10.61.240.9,udp:10242,10.110.127.255 EXPECT true +[ OK ] ./bin/dpip ipset test foo 10.61.240.3,udp:10243,10.110.123.123 EXPECT false +[ OK ] ./bin/dpip ipset destroy foo +[ OK ] ./bin/dpip ipset -6 create bar hash:ip,port,net +[ OK ] ./bin/dpip ipset add bar 2001::1,8080-8082,2002::/64 +[ OK ] ./bin/dpip ipset add bar 2001::2,8080-8082,2002::/64 +[ OK ] ./bin/dpip ipset add bar 2001::3,8080-8082,2002::/64 +[ OK ] ./bin/dpip ipset add bar 2001::1,8080-8082,2002::aaaa:bbbb:ccc1:2222/108 nomatch +[ OK ] ./bin/dpip ipset test bar 2001::1,8081,2002::1:2:3:4 EXPECT true +[ OK ] ./bin/dpip ipset test bar 2001::1,8081,2002::1:2:3:4:5 EXPECT false +[ OK ] ./bin/dpip ipset test bar 2001::2,8080,2002:: EXPECT true +[ OK ] ./bin/dpip ipset test bar 2001::1,8081,2002::aaaa:bbbb:ccc1:2345 EXPECT false +[ OK ] ./bin/dpip ipset test bar 2001::1,8081,2002::aaaa:bbbb:cc11:2345 EXPECT true +[ OK ] ./bin/dpip ipset destroy bar + +IPSET TEST RESULT: PASS diff --git a/test/ipset/dpip.sh b/test/ipset/dpip.sh new file mode 100755 index 000000000..326dadd0a --- /dev/null +++ b/test/ipset/dpip.sh @@ -0,0 +1,390 @@ +#!/bin/env bash + +verbose=0 # decide if to print command errors, 0 for "no" and 1 for "yes" +result="" # final result, "Fail" or "OK" +cmdout="" # command output + +while getopts "v:" opt; +do + case $opt in + v) + if [ _$OPTARG = _v ]; then + verbose=2 + else + verbose=1 + fi + #echo "set verbose: $verbose" + shift + ;; + ?) + echo -e "invalid option: $opt" + exit 1 + ;; + esac +done + +[ $# -ne 1 ] && echo -e "Usage: $0 [-v|-vv] dpip-executable-file" && exit 1 + +[ ! -x $1 ] && echo -e "invalid dpip executable file" && exit 1 +dpip=$1 + +alias ipset="run $1 ipset" +shopt -s expand_aliases + +# print testing result when exit +trap print_result SIGINT SIGTERM EXIT +function print_result() +{ + local retval=1 + + [ _$result = _ ] && result="PASS" && retval=0 + echo -e "\nIPSET TEST RESULT: $result" + + exit $retval +} + +function run() +{ + local cmd=$* + local expect="" + local retval; + echo "$cmd" | grep " EXPECT " > /dev/null + if [ $? -eq 0 ]; then + expect=${cmd##*EXPECT } + cmd=${cmd% EXPECT*} + #echo -e "expect: $expect\ncommand: $cmd" + fi + + if [ $verbose -eq 1 ]; then + cmdout=$($cmd) # output errors only + else + cmdout=$($cmd 2>/dev/null) # output nothing + fi + retval=$? + + if [ $verbose -gt 1 -a "_$cmdout" != "_" ]; then + echo -e "$cmdout" # output all + fi + + if [ $retval -ne 0 ]; + then + echo -e "[ Fail ] $*" + result="Fail" + return 1 + fi + + if [ "_$expect" != "_" ]; then + if [ "_$cmdout" != "_$expect" ]; then + echo -e "[ Fail ] $*" + result="Fail" + return 1 + fi + fi + + echo -e "[ OK ] $*" + return 0 +} + + +# global +echo -e "global" +ipset list +ipset show + +# bitmap:ip +echo -e "bitmap:ip" +ipset create foo bitmap:ip range 192.168.0.0/16 +ipset add foo 192.168.1.0/26 +ipset test foo 192.168.1.32 EXPECT true +ipset test foo 192.168.2.1 EXPECT false +ipset add foo 192.168.2.1 +ipset test foo 192.168.2.1 EXPECT true +ipset add foo 10.100.100.100 +ipset test foo 10.100.100.100 EXPECT false +ipset destroy foo + +# bitmap:port +echo -e "bitmap:port" +ipset create foo bitmap:port range 0-65535 +ipset add foo tcp:80 +ipset add foo tcp:8080 +ipset test foo tcp:80 EXPECT true +ipset test foo tcp:8080 EXPECT true +ipset test foo udp:80 EXPECT false +ipset test foo tcp:41235 EXPECT false +ipset add foo udp:80 +ipset test foo udp:80 EXPECT true +ipset del foo tcp:8080 +ipset test foo tcp:8080 EXPECT false +ipset flush foo +ipset destroy foo + +# bitmap:ip,mac +echo -e "bitmap:ip,mac" +ipset create foo bitmap:ip,mac range 192.168.0.0/16 +ipset add foo 192.168.1.1,12:34:56:78:9A:BC +ipset add foo 192.168.2.2 +ipset test foo 192.168.1.1,12:34:56:78:9A:BC EXPECT true +ipset test foo 192.168.1.1,12:34:56:78:A9:BC EXPECT false +ipset test foo 192.168.1.1,0:0:0:0:0:0 EXPECT true +ipset test foo 192.168.1.1 EXPECT true +ipset test foo 192.168.2.2 EXPECT true +ipset test foo 192.168.2.2,1:2:3:4:5:6 EXPECT true +ipset test foo 192.168.2.1 EXPECT false +ipset destroy foo + +# hash:ip +echo -e "hash:ip" +ipset create foo hash:ip comment +ipset add foo 10.100.100.100 comment a-single-address +ipset add foo 192.168.1.0/24 +ipset list foo +ipset test foo 10.100.100.100 EXPECT true +ipset test foo 192.168.1.12 EXPECT true +ipset test foo 192.168.2.0 EXPECT false +ipset test foo 0.0.0.0 EXPECT false +ipset destroy foo +ipset -6 create bar hash:ip hashsize 128 maxelem 4096 +ipset add bar 2001::1 +ipset add bar 2001::2:1 +ipset add bar :: +ipset test bar 2001::2:1 EXPECT true +ipset test bar ::1 EXPECT false +ipset test bar :: EXPECT true +ipset destroy bar + +# hash:ip,port +echo -e "hash:ip,port" +ipset create foo hash:ip,port +ipset add foo 192.168.1.0/30,tcp:80-82 +ipset add foo 192.168.1.0/30,udp:80-82 +ipset -v test foo 192.168.1.1,tcp:81 EXPECT "192.168.1.1,tcp:81 is in set foo" +ipset test foo 192.168.1.0,upd:80 EXPECT false +ipset add foo 172.27.1.3-172.27.1.5 # match ip only +ipset test foo 172.27.1.5 EXPECT true +ipset test foo 172.27.1.4,0 EXPECT true +ipset test foo 172.27.1.4,tcp:0 EXPECT false +ipset add foo 172.27.20.20-172.27.20.21,80-82 # zero proto match +ipset test foo 172.27.20.20,81 EXPECT true +ipset test foo 172.27.20.20,tcp:81 EXPECT false +ipset flush foo +ipset destroy foo +ipset -6 create bar hash:ip,port +ipset add bar 2001::1,tcp:8080-8082 +ipset add bar 2001::1,udp:80 +ipset add bar 2001::2,0 # match ip only +ipset test bar 2001::1,tcp:8081 EXPECT true +ipset test bar 2001::1,udp:8081 EXPECT false +ipset test bar 2001::1,udp:80 EXPECT true +ipset test bar 2001::2 EXPECT true +ipset destroy bar + +# hash:net +echo -e "hash:net" +ipset create foo hash:net +ipset add foo 192.168.0.0/24 +ipset add foo 10.1.0.0/16 +ipset add foo 192.168.0.100/30 nomatch +ipset test foo 10.1.100.100 EXPECT true +ipset test foo 192.168.0.104 EXPECT true +ipset test foo 192.168.0.102 EXPECT false +ipset add foo 10.1.1.1 nomatch +ipset test foo 10.1.1.1 EXPECT false +ipset del foo 10.1.1.1 +ipset test foo 10.1.1.1 EXPECT true +ipset destroy foo +ipset -6 create bar hash:net +ipset add bar 2001::/64 +ipset test bar 2001::4:3:2:1 EXPECT true +ipset test bar 2001:1::4:3:2:1 EXPECT false +ipset test bar 2001::1 EXPECT true +ipset add bar 2001::/120 nomatch +ipset test bar 2001::1 EXPECT false +ipset destroy bar + +# hash:ip,port,ip +echo -e "hash:ip,port,ip" +ipset create foo hash:ip,port,ip comment +ipset add foo 192.168.1.16/30,tcp:8080-8082,192.168.2.100-192.168.2.105 comment "a-test-range" +ipset test foo 192.168.1.18,tcp:8081,192.168.2.101 EXPECT true +ipset test foo 192.168.1.16,tcp:8080,192.168.2.105 EXPECT true +ipset test foo 192.168.1.20,tcp:8081,192.168.2.101 EXPECT false +ipset test foo 192.168.1.18,tcp:8081,192.168.2.106 EXPECT false +ipset test foo 192.168.1.18,udp:8081,192.168.2.101 EXPECT false +ipset test foo 192.168.1.19,8081,192.168.2.101 EXPECT false +ipset del foo 192.168.1.18/31,tcp:8081,192.168.2.101 +ipset test foo 192.168.1.18,tcp:8081,192.168.2.101 EXPECT false +ipset destroy foo +ipset -6 create bar hash:ip,port,ip +ipset add bar 2001::1,udp:80-82,2002::2 +ipset add bar 2001::1,tcp:80-82,2002::2 +ipset add bar 2001::1,80-82,2002::2 +ipset test bar 2001::1,udp:81,2002::2 EXPECT true +ipset test bar 2001::1,tcp:80,2002::2 EXPECT true +ipset test bar 2001::1,82,2002::2 EXPECT true +ipset test bar 2001::2,81,2002::2 EXPECT false +ipset test bar 2001::1,tcp:8080,2002::2 EXPECT false +ipset test bar 2001::1,udp:80,2002::1 EXPECT false +ipset del bar 2001::1,80-82,2002::2 +ipset test bar 2001::1,82,2002::2 EXPECT false +ipset destroy bar + +# hash:net,port,net,port +echo -e "hash:net,port,net,port" +ipset create foo hash:net,port,net,port +ipset add foo 192.168.10.0/24,0,192.168.20.0/24,0 +ipset test foo 192.168.10.123,0,192.168.20.123,0 EXPECT true +ipset test foo 192.168.10.123,tcp:0,192.168.20.123,tcp:0 EXPECT false +ipset add foo 192.168.10.123,tcp:0,192.168.20.123,tcp:0 +ipset test foo 192.168.10.123,tcp:0,192.168.20.123,tcp:0 EXPECT true +ipset add foo 192.168.10.64/26,0,192.168.20.64/26,0 nomatch +ipset test foo 192.168.10.123,0,192.168.20.123,0 EXPECT false +ipset add foo 192.168.10.123,0,192.168.20.123,0 +ipset test foo 192.168.10.123,0,192.168.20.123,0 EXPECT true +ipset flush foo +ipset add foo 10.64.68.0-10.64.68.100,tcp:80-82,10.128.0.0/16,tcp:8080 +ipset list -v foo +ipset test foo 10.64.68.66,tcp:81,10.128.11.22,tcp:8080 EXPECT true +ipset add foo 10.64.68.64/29,tcp:81,10.128.11.0/24,tcp:8080 nomatch +ipset test foo 10.64.68.66,tcp:81,10.128.11.22,tcp:8080 EXPECT false +ipset destroy foo +ipset -6 create bar hash:net,port,net,port comment +ipset add bar 2001::a:b:c:d/64,udp:8080-8081,2002::/64,udp:6000-6001 +ipset test bar 2001::1:2:3:4,udp:8080,2002::1:2:3:4,udp:6001 EXPECT true +ipset test bar 2001::1:2:3:4:5,udp:8080,2002::1:2:3:4,udp:6001 EXPECT false +ipset test bar 2001::1:2:3:4,udp:8080,2002::1:2:3:4:5,udp:6001 EXPECT false +ipset test bar 2001::1:2:3:4,udp:8082,2002::1:2:3:4,udp:6001 EXPECT false +ipset add bar 2001::/64,udp:8080,2002::1:2:0:0/96,udp:6000-6001 nomatch comment "bad-guys" +ipset test bar 2001::1:2:3:4,udp:8080,2002::1:2:3:4,udp:6001 EXPECT false +ipset test bar 2001::1:2:3:4,udp:8081,2002::1:2:3:4,udp:6001 EXPECT true +ipset test bar 2001::1:2:3:4,udp:8080,2002::2:3:4:5,udp:6001 EXPECT true +ipset destroy bar + +# hash:net,port,iface +echo -e "hash:net,port,iface" +$dpip link show dpdk0 >/dev/null 2>&1 +if [ $? -eq 0 ]; then + ipset create foo hash:net,port,iface comment + ipset add foo 10.64.13.131/16,tcp:80-82,dpdk0 + ipset test foo 10.64.111.222,tcp:81,dpdk0 EXPECT true + ipset test foo 10.64.111.222,81,dpdk0 EXPECT false + $dpip link show dpdk1 >/dev/null 2>&1 + if [ $? -eq 0 ]; then + ipset test foo 10.64.111.222,tcp:81,dpdk1 EXPECT false + fi + ipset add foo 10.64.88.100-10.64.88.200,tcp:82,dpdk0 nomatch comment "bad-guys" + ipset list foo -v + ipset test foo 10.64.88.111,tcp:81,dpdk0 EXPECT true + ipset test foo 10.64.88.111,tcp:82,dpdk0 EXPECT false + ipset add foo 10.64.88.111,tcp:82,dpdk0 comment "you-are-an-exception" + ipset test foo 10.64.88.111,tcp:82,dpdk0 EXPECT true + ipset flush foo + ipset destroy foo + ipset -6 create bar hash:net,port,iface hashsize 300 maxelem 1000 + ipset add bar 2001:beef::/64,udp:100-102,dpdk0 + ipset test bar 2001:beef::abcd,udp:100,dpdk0 EXPECT true + ipset add bar 2001:beef::abcd/100,udp:100,dpdk0 nomatch + ipset test bar 2001:beef::abcd,udp:100,dpdk0 EXPECT false + ipset del bar 2001:beef::abcd/100,udp:100,dpdk0 + ipset test bar 2001:beef::abcd,udp:100,dpdk0 EXPECT true + ipset destroy bar +else + echo -e "port dpdk0 not found, skipping hash:net,port,iface test" +fi + +# hash:net,port +echo -e "hash:net,port" +ipset create foo hash:net,port comment +ipset add foo 192.168.100.0-192.168.102.30,tcp:10240 +ipset test foo 192.168.100.111,tcp:10240 EXPECT true +ipset test foo 192.168.101.111,tcp:10240 EXPECT true +ipset test foo 192.168.101.111,tcp:10241 EXPECT false +ipset test foo 192.168.102.111,tcp:10241 EXPECT false +ipset test foo 192.168.102.30,tcp:10240 EXPECT true +ipset test foo 192.168.102.30,10240 EXPECT false +ipset test foo 192.168.102.31,tcp:10240 EXPECT false +ipset add foo 192.168.100.101/25,tcp:10240 nomatch comment "bad-guys" +ipset test foo 192.168.100.111,tcp:10240 EXPECT false +ipset flush foo +ipset add foo 10.128.34.211-10.128.37.189,3000-3001 +ipset test foo 10.128.35.141,3001 EXPECT true +ipset add foo 10.128.35.100-10.128.35.150,3000-3001 nomatch +ipset test foo 10.128.34.210,3000 EXPECT false +ipset test foo 10.128.34.211,3000 EXPECT true +ipset test foo 10.128.37.185,3001 EXPECT true +ipset test foo 10.128.37.190,3001 EXPECT false +ipset test foo 10.128.35.141,3001 EXPECT false +ipset destroy foo +ipset -6 create bar hash:net,port maxelem 1024 +ipset add bar 2001::/64,tcp:80-88 +ipset test bar 2001::1:2:3:4,tcp:85 EXPECT true +ipset test bar 2001::1111:2222:3333:4444,tcp:88 EXPECT true +ipset test bar 2001::1:2:3:4,tcp:89 EXPECT false +ipset test bar 2001::1:2:3:4:5,tcp:85 EXPECT false +ipset test bar 2001::eeee:aaaa:1:6,tcp:85 EXPECT true +ipset add bar 2001::eeee:aaaa:1243:6789/96,tcp:84-86 nomatch +ipset test bar 2001::eeee:aaaa:1:6,tcp:85 EXPECT false +ipset destroy bar + +# hash:net,port,net +echo -e "hash:net,port,net" +ipset create foo hash:net,port,net +ipset add foo 192.168.188.20-192.168.190.36,2021-2022,192.168.33.223-192.168.34.123 +ipset flush foo +ipset add foo 10.60.0.0/16,tcp:10240-10242,10.130.0.0/16 +ipset test foo 10.60.12.34,tcp:10241,10.130.56.78 EXPECT true +ipset test foo 10.60.0.0,tcp:10242,10.130.255.255 EXPECT true +ipset test foo 10.61.0.0,tcp:10240,10.130.255.255 EXPECT false +ipset test foo 10.60.0.0,udp:10240,10.130.255.255 EXPECT false +ipset test foo 10.60.100.168,tcp:10242,10.130.100.192 EXPECT true +ipset add foo 10.60.100.100-10.60.100.200,tcp:10242,10.130.100.100-10.130.100.200 nomatch +ipset test foo 10.60.100.168,tcp:10242,10.130.100.192 EXPECT false +ipset test foo 10.60.100.168,tcp:10241,10.130.100.192 EXPECT true +ipset test foo 10.60.100.201,tcp:10242,10.130.100.192 EXPECT true +ipset destroy foo +ipset -6 create bar hash:net,port,net hashsize 1024 maxelem 4096 comment +ipset add bar 210e:36a9::aa:bbbb/96,udp:8080-8082,2408:a91e::cc:dddd/96 comment "test-entries" +ipset test bar 210e:36a9::12:3456,udp:8080,2408:a91e::78:9abc EXPECT true +ipset test bar 210e:36a9::ff:ffff,udp:8080,2408:a91e:: EXPECT true +ipset test bar 210e:36a9::,udp:8082,2408:a91e::ff:ffff EXPECT true +ipset add bar 210e:36a9::12:3456/102,udp:8080,2408:a91e::78:9abc/102 nomatch +ipset test bar 210e:36a9::12:3456,udp:8080,2408:a91e::78:9abc EXPECT false +ipset del bar 210e:36a9::12:3456/102,udp:8080,2408:a91e::78:9abc/102 +ipset test bar 210e:36a9::ff:ffff,udp:8080,2408:a91e:: EXPECT true +ipset test bar 210e:36a9::12:3456,udp:8080,2408:a91e::78:9abc EXPECT true +ipset destroy bar + +# hash:ip,port,net +echo -e "hash:ip,port,net" +ipset create foo hash:ip,port,net comment +ipset add foo 192.168.12.1/24,tcp:8080,192.168.100.0/24 +ipset test foo 192.168.12.211,tcp:8080,192.168.100.211 EXPECT true +ipset test foo 192.168.12.0,tcp:8080,192.168.100.255 EXPECT true +ipset test foo 192.168.12.211,tcp:8080,192.168.101.0 EXPECT false +ipset test foo 192.168.13.0,tcp:8080,192.168.100.211 EXPECT false +ipset test foo 192.168.12.211,8080,192.168.100.211 EXPECT false +ipset add foo 192.168.12.200-192.168.12.255,tcp:8080,192.168.100.200-192.168.100.255 nomatch +ipset list -v +ipset test foo 192.168.12.211,tcp:8080,192.168.100.111 EXPECT true +ipset test foo 192.168.12.211,tcp:8080,192.168.100.211 EXPECT false +ipset add foo 192.168.12.211,tcp:8080,192.168.100.211/32 comment "I'm-innocent" +ipset test foo 192.168.12.211,tcp:8080,192.168.100.211 EXPECT true +ipset del foo 192.168.12.200-192.168.12.255,tcp:8080,192.168.100.200-192.168.100.255 +ipset flush foo +ipset add foo 10.61.240.1-10.61.240.9,udp:10240-10242,10.110.123.102/21 +ipset test foo 10.61.240.3,udp:10240,10.110.123.123 EXPECT true +ipset test foo 10.61.240.6,udp:10241,10.110.120.1 EXPECT true +ipset test foo 10.61.240.9,udp:10242,10.110.127.255 EXPECT true +ipset test foo 10.61.240.3,udp:10243,10.110.123.123 EXPECT false +ipset destroy foo +ipset -6 create bar hash:ip,port,net +ipset add bar 2001::1,8080-8082,2002::/64 +ipset add bar 2001::2,8080-8082,2002::/64 +ipset add bar 2001::3,8080-8082,2002::/64 +ipset add bar 2001::1,8080-8082,2002::aaaa:bbbb:ccc1:2222/108 nomatch +ipset test bar 2001::1,8081,2002::1:2:3:4 EXPECT true +ipset test bar 2001::1,8081,2002::1:2:3:4:5 EXPECT false +ipset test bar 2001::2,8080,2002:: EXPECT true +ipset test bar 2001::1,8081,2002::aaaa:bbbb:ccc1:2345 EXPECT false +ipset test bar 2001::1,8081,2002::aaaa:bbbb:cc11:2345 EXPECT true +ipset destroy bar diff --git a/test/release/v1.9.2/performance.data b/test/release/v1.9.2/performance.data new file mode 100644 index 000000000..743cb2ff0 --- /dev/null +++ b/test/release/v1.9.2/performance.data @@ -0,0 +1,22 @@ +* TCP CPS/CC Tests +workers,cps;ipackets/pps,opackets/pps,ibytes/Bps,obytes/Bps;connections;pktRx,pktTx,bitsRx,bitsTx,dropTx +1,200000;1211533,1211847,99143458,102396220;1472000;600020,599988,393618488,382378808,0 +2,360000;2166961,2166955,177320954,183100299;2701000;1072119,1076034,703360424,685830112,0 +4,660000;3960726,3960788,324114391,334680450;4941000;1980045,1980054,1298916032,1261958232,0 +8,1060000;6360626,6360628,520511025,537472046;7949000;3180092,3180068,2086137680,2026768232,0 +10,1240000;7440784,7440727,608903706,628741279;9299000;3718514,3719316,2439334056,2370499504,0 +16,1070000;6420639,6420548,525422150,542537169;8019000;3210000,3209989,2105751088,2045839664,0 (cross-numa-node) + +* UDP PPS Tests +workers,connections;ipackets/pps,opackets/pps,ibytes/Bps,obytes/Bps;pktRx,pktTx,bitsRx,bitsTx,dropTx +1,2900;2900244,2900221,174014668,174013684;1449993,1450000,695996816,498800000,0 +2,5000;5000418,5000370,300024968,300022497;2499954,2500000,1199978096,860000000,0 +4,9200;9201066,9201048,552063906,552062986;4486101,4600001,2153329128,1582400344,0 +8,9450;9451027,9451004,567061568,567060365;4723923,4724932,2267483216,1625376608,0 + +* Throughput Tests +workers,connections;ipackets/pps,opackets/pps,ibytes/Bps,obytes/Bps;pktRx,pktTx,bitsRx,bitsTx,dropTx +1,1000;1424608,1424599,1215824068,1215816616;712263,712285,4866168760,4860632840,0 +2,1000;1424748,1424738,1215947746,1215939706;712247,712263,4866065328,4860482712,0 +4,1000;1424876,1424870,1216052235,1216047912;712258,712238,4866134600,4860312112,0 +8,1000;1424788,1424787,1215971428,1215970249;712261,712260,4866160976,4860462240,0 diff --git a/test/release/v1.9.2/performance.md b/test/release/v1.9.2/performance.md new file mode 100644 index 000000000..9ed7094e3 --- /dev/null +++ b/test/release/v1.9.2/performance.md @@ -0,0 +1,257 @@ +DPVS v1.9.2 Performance Tests +=== + +* [Test Platform](#platform) +* [TCP CPS/CC Tests](#cps/cc) +* [UDP PPS Tests](#pps) +* [Throughput Tests](#throughput) + + + + +# Test Platform + +The performance of DPVS v1.9.2 is examined on two physical servers, one serves as DPVS server, and the other as both backend server(RS) and client(Client). RS and Client take advantages of [dperf](https://github.com/baidu/dperf), a high performance benchmark tool based on DPDK developed by baidu. The dperf server process and dperf client process use isolated NIC interfaces, CPU coers, and hugepage memory in order to run both processes on a single node. + +### DPVS Server + ++ CPU: Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz, 2 Sockets, 12 Cores per Socket, 2 Threads per Core ++ Memory: 188G Bytes ++ NIC: Intel Corporation Ethernet Controller 10-Gigabit X540-AT2 ++ OS: Centos 7.6 ++ DPVS: v1.9.2 + +### Dperf Server/Client + ++ CPU: Intel(R) Xeon(R) CPU E5-2650 v3 @ 2.30GHz, 2 Sockets, 10 Cores per Socket, 2 Threads per Core ++ Memory: 62G Bytes ++ NIC: Intel Corporation Ethernet Controller 10-Gigabit X540-AT2 ++ OS: Centos 7.6 ++ Dperf: v1.2.0 + + + +# TCP CPS/CC Tests + +CPS(Connections per Second) and CC (Concurrent Connections) tests are performed by using the extreme small sized packets (payload_size=1) and variable `cps` of dperf clients. We gradually increase the `cps` of dperf clients until packet loss is seen in DPVS, and then the corresponding CPS and CC are the performance data that we need. + +### Dperf Client + +``` +mode client +cpu 8-15 +slow_start 60 +tx_burst 128 +launch_num 10 +payload_size 1 +duration 90s +protocol tcp +cps [refer to performance data] +port 0000:04:00.0 192.168.0.30 192.168.7.254 +client 192.168.3.0 50 +server 192.168.5.1 8 +listen 80 1 +``` + +### Dperf Server + +``` +mode server +cpu 0-7 +tx_burst 128 +payload_size 1 +duration 100d +port 0000:04:00.1 192.168.1.30 192.168.7.254 +client 192.168.0.28 1 +client 192.168.1.28 1 +client 192.168.1.30 1 +client 192.168.3.0 200 +server 192.168.6.100 8 +listen 80 1 +``` + +### DPVS + ++ Service: 192.168.5.[1-8]:80, TCP, FullNAT, rr, syn-proxy off ++ Local IP: 192.168.3.[100-149] + +``` +TCP 192.168.5.1:80 rr + -> 192.168.6.100:80 FullNat 100 0 4 + -> 192.168.6.101:80 FullNat 100 0 4 + -> 192.168.6.102:80 FullNat 100 0 2 + -> 192.168.6.103:80 FullNat 100 0 1 + -> 192.168.6.104:80 FullNat 100 0 0 + -> 192.168.6.105:80 FullNat 100 0 0 + -> 192.168.6.106:80 FullNat 100 0 1 + -> 192.168.6.107:80 FullNat 100 0 2 +TCP 192.168.5.2:80 rr + -> 192.168.6.100:80 FullNat 100 0 1 + -> 192.168.6.101:80 FullNat 100 0 2 + ... +... +``` + +### Performance Data + +| workers | cps | ipackets/pps | opackets/pps | ibytes/Bps | obytes/Bps | connections | dperf:pktRx | dperf:pktTx | dperf:bitsRx | dperf:bitsTx | dperf:dropTx | +| ------- | --------- | ------------ | ------------ | ----------- | ----------- | ----------- | ----------- | ----------- | ------------- | ------------- | ------------ | +| 1 | 200,000 | 1,211,533 | 1,211,847 | 99,143,458 | 102,396,220 | 1,472,000 | 600,020 | 599,988 | 393,618,488 | 382,378,808 | 0 | +| 2 | 360,000 | 2,166,961 | 2,166,955 | 177,320,954 | 183,100,299 | 2,701,000 | 1,072,119 | 1,076,034 | 703,360,424 | 685,830,112 | 0 | +| 4 | 660,000 | 3,960,726 | 3,960,788 | 324,114,391 | 334,680,450 | 4,941,000 | 1,980,045 | 1,980,054 | 1,298,916,032 | 1,261,958,232 | 0 | +| 8 | 1,060,000 | 6,360,626 | 6,360,628 | 520,511,025 | 537,472,046 | 7,949,000 | 3,180,092 | 3,180,068 | 2,086,137,680 | 2,026,768,232 | 0 | +| 10 | 1,240,000 | 7,440,784 | 7,440,727 | 608,903,706 | 628,741,279 | 9,299,000 | 3,718,514 | 3,719,316 | 2,439,334,056 | 2,370,499,504 | 0 | +| 16 | 1,070,000 | 6,420,639 | 6,420,548 | 525,422,150 | 542,537,169 | 8,019,000 | 3,210,000 | 3,209,989 | 2,105,751,088 | 2,045,839,664 | 0 | + + +![CPS/CC](./pics/tcp_cps.png) + +In case of 8-workers, DPVS v1.9.2 can establish **1,000,000 new connections per second**, and hold **8,000,000 concurrent connections** in the meanwhile. The performance gains approximately linearly when worker number is below 10. But an obvious performance loss is seen in 16-workers. One reason is that DPVS doesn't eliminate all racing conditions in datapath, and the problem gets worse with the increase of worker number. Besides, some DPVS workers are assigned to the CPU cores on NUMA socket different from that of NIC when running with 16-workers. Our DPVS server only has 12 CPU cores available per socket. + +Let's make a deep insight into the `cpu-clock` events of DPVS with Linux performance analysis tool `perf`. We build DPVS with debug info and then run CPC/CC tests with 1-worker and 8-workers, with dperf `cps` configured to be 100,000 and 600,000 respectively. The performance flame graphs are shown below. + +![perf-flame-worker-1](./pics/worker1.svg) + +![perf-flame-worker-8](./pics/worker8.svg) + + + +# UDP PPS Tests + +In PPS tests, dperf clients keep a fixed `cps` of 3k and `keepalive` of 2ms, and adjust concurrent connections `cc` to generate different `pps` traffic. The same with CPS/CC tests, an extreme small payload of 1 byte is used. We use UDP protocol for the tests. Besides, `tx_burst` in dperf client is set to 1 to reduce traffic surge. + +### Dperf Client + +``` +mode client +cpu 8-15 +slow_start 60 +tx_burst 128 +launch_num 1 +payload_size 1 +duration 90s +protocol udp +cps 3k +cc [refer to performance data] +keepalive 2ms +port 0000:04:00.0 192.168.0.30 192.168.7.254 +client 192.168.3.0 50 +server 192.168.5.1 8 +listen 80 1 +``` +### Dperf Server + +``` +mode server +cpu 0-7 +tx_burst 128 +payload_size 1 +duration 100d +protocol udp +keepalive 10s +port 0000:04:00.1 192.168.1.30 192.168.7.254 +client 192.168.0.28 1 +client 192.168.1.28 1 +client 192.168.1.30 1 +client 192.168.3.0 200 +server 192.168.6.100 8 +listen 80 1 +``` + +### DPVS + ++ Service: 192.168.5.[1-8]:80, UDP, FullNAT, rr, uoa off ++ Local IP: 192.168.3.[100-149] + +``` +UDP 192.168.5.1:80 rr + -> 192.168.6.100:80 FullNat 100 0 0 + -> 192.168.6.101:80 FullNat 100 0 0 + -> 192.168.6.102:80 FullNat 100 0 0 + -> 192.168.6.103:80 FullNat 100 0 0 + -> 192.168.6.104:80 FullNat 100 0 0 + -> 192.168.6.105:80 FullNat 100 0 0 + -> 192.168.6.106:80 FullNat 100 0 0 + -> 192.168.6.107:80 FullNat 100 0 0 +UDP 192.168.5.2:80 rr + -> 192.168.6.100:80 FullNat 100 0 0 + -> 192.168.6.101:80 FullNat 100 0 0 + ... +... +``` + +### Performance Data + +| workers | connections | ipackets/pps | opackets/pps | ibytes/Bps | obytes/Bps | dperf:pktRx | dperf:pktTx | dperf:bitsRx | dperf:bitsTx | dperf:dropTx | +| ------- | ----------- | ------------ | ------------ | ----------- | ----------- | ----------- | ----------- | ------------- | ------------- | ------------ | +| 1 | 2,900 | 2,900,244 | 2,900,221 | 174,014,668 | 174,013,684 | 1,449,993 | 1,450,000 | 695,996,816 | 498,800,000 | 0 | +| 2 | 5,000 | 5,000,418 | 5,000,370 | 300,024,968 | 300,022,497 | 2,499,954 | 2,500,000 | 1,199,978,096 | 860,000,000 | 0 | +| 4 | 9,200 | 9,201,066 | 9,201,048 | 552,063,906 | 552,062,986 | 4,486,101 | 4,600,001 | 2,153,329,128 | 1,582,400,344 | 0 | +| 8 | 9,450 | 9,451,027 | 9,451,004 | 567,061,568 | 567,060,365 | 4,723,923 | 4,724,932 | 2,267,483,216 | 1,625,376,608 | 0 | + +![PPS](./pics/udp_pps.png) + +As shown above, DPVS v1.9.2 can reach the peak of PPS (i.e, about 9,000,000 PPS) with 4-workers in the tests. We may need a 25G/100G NIC for a higher PPS test. + + + +# Throughput Tests + +In throughput tests, dperf clients keep a fixed `cps` of 400 and `keepalive` of 1ms, and adjust concurrent connections `cc` to generate different `pps` traffic. The `payload_size` of both dperf server and dperf client are set to 800 bytes, and TCP protocol is used. + +### Dperf Client + +``` +mode client +cpu 8-15 +slow_start 60 +tx_burst 128 +launch_num 10 +payload_size 800 +duration 90s +protocol tcp +cps 400 +cc [refer to performance data] +keepalive 1ms +port 0000:04:00.0 192.168.0.30 192.168.7.254 +client 192.168.3.0 50 +server 192.168.5.1 8 +listen 80 1 +``` + +### Dperf Server + +``` +mode server +cpu 0-7 +tx_burst 128 +payload_size 800 +duration 100d +protocol tcp +keepalive 10s +port 0000:04:00.1 192.168.1.30 192.168.7.254 +client 192.168.0.28 1 +client 192.168.1.28 1 +client 192.168.1.30 1 +client 192.168.3.0 200 +server 192.168.6.100 8 +listen 80 1 +``` + +## DPVS + +DPVS configurations are the same with the `TCP CPS/CC Tests`. + + +### Performance Data + +| workers | connections | ipackets/pps | opackets/pps | ibytes/Bps | obytes/Bps | dperf:pktRx | dperf:pktTx | dperf:bitsRx | dperf:bitsTx | dperf:dropTx | +| ------- | ----------- | ------------ | ------------ | ------------- | ------------- | ----------- | ----------- | ------------- | ------------- | ------------ | +| 1 | 1,000 | 1,424,608 | 1,424,599 | 1,215,824,068 | 1,215,816,616 | 712,263 | 712,285 | 4,866,168,760 | 4,860,632,840 | 0 | +| 2 | 1,000 | 1,424,748 | 1,424,738 | 1,215,947,746 | 1,215,939,706 | 712,247 | 712,263 | 4,866,065,328 | 4,860,482,712 | 0 | +| 4 | 1,000 | 1,424,876 | 1,424,870 | 1,216,052,235 | 1,216,047,912 | 712,258 | 712,238 | 4,866,134,600 | 4,860,312,112 | 0 | +| 8 | 1,000 | 1,424,788 | 1,424,787 | 1,215,971,428 | 1,215,970,249 | 712,261 | 712,260 | 4,866,160,976 | 4,860,462,240 | 0 | + +![Throughput](./pics/tcp_throughput.png) + +As shown above, DPVS v1.9.2 fills with ease the full bandwith of 10G NIC using only one worker. diff --git a/test/release/v1.9.2/pics/tcp_cps.png b/test/release/v1.9.2/pics/tcp_cps.png new file mode 100644 index 000000000..738d5bcd1 Binary files /dev/null and b/test/release/v1.9.2/pics/tcp_cps.png differ diff --git a/test/release/v1.9.2/pics/tcp_throughput.png b/test/release/v1.9.2/pics/tcp_throughput.png new file mode 100644 index 000000000..b658d0dfd Binary files /dev/null and b/test/release/v1.9.2/pics/tcp_throughput.png differ diff --git a/test/release/v1.9.2/pics/udp_pps.png b/test/release/v1.9.2/pics/udp_pps.png new file mode 100644 index 000000000..78406056a Binary files /dev/null and b/test/release/v1.9.2/pics/udp_pps.png differ diff --git a/test/release/v1.9.2/pics/worker1.svg b/test/release/v1.9.2/pics/worker1.svg new file mode 100644 index 000000000..2de70817f --- /dev/null +++ b/test/release/v1.9.2/pics/worker1.svg @@ -0,0 +1,3440 @@ + + + + + + + + + + + + + + +Flame Graph + +Reset Zoom +Search +ic + + + +dp_vs_conn_hashkey (41 samples, 0.01%) + + + +dp_vs_conn_put (182 samples, 0.05%) + + + +rte_eth_rx_burst (7,395 samples, 1.85%) +r.. + + +tcp_fnat_out_handler (71 samples, 0.02%) + + + +tcp_in_adjust_seq (38 samples, 0.01%) + + + +validate_xmit_mbuf (55 samples, 0.01%) + + + +rte_pktmbuf_prepend (160 samples, 0.04%) + + + +rte_atomic32_inc (377 samples, 0.09%) + + + +__rte_jhash_3words (85 samples, 0.02%) + + + +rte_pktmbuf_trim (611 samples, 0.15%) + + + +timer_pending (58 samples, 0.01%) + + + +rte_atomic32_dec (351 samples, 0.09%) + + + +list_empty (39 samples, 0.01%) + + + +lcore-worker-1 (400,419 samples, 100.00%) +lcore-worker-1 + + +list_empty (1,540 samples, 0.38%) + + + +htons@plt (61 samples, 0.02%) + + + +ip4_hdr (52 samples, 0.01%) + + + +htons (90 samples, 0.02%) + + + +__dp_vs_xmit_fnat4 (92 samples, 0.02%) + + + +rte_arch_bswap32 (221 samples, 0.06%) + + + +rte_ether_addr_copy (36 samples, 0.01%) + + + +dpvs_timer_update (97 samples, 0.02%) + + + +port_tab_hashkey (416 samples, 0.10%) + + + +dp_vs_conn_is_template (39 samples, 0.01%) + + + +slave_lcore_loop_func (3,973 samples, 0.99%) + + + +rte_get_tsc_cycles (67 samples, 0.02%) + + + +dp_vs_in (244,276 samples, 61.01%) +dp_vs_in + + +netif_xmit (66 samples, 0.02%) + + + +ip4_hdrlen (69 samples, 0.02%) + + + +timer_pending (211 samples, 0.05%) + + + +__vdso_clock_gettime (44 samples, 0.01%) + + + +ip4_is_frag (733 samples, 0.18%) + + + +list_empty (196 samples, 0.05%) + + + +iftraf_pkt_in (251 samples, 0.06%) + + + +netif_hard_xmit (2,770 samples, 0.69%) + + + +rte_pktmbuf_tailroom (134 samples, 0.03%) + + + +rte_is_zero_ether_addr (809 samples, 0.20%) + + + +dp_vs_rr_schedule (1,052 samples, 0.26%) + + + +rte_atomic32_inc (639 samples, 0.16%) + + + +rte_lcore_id (126 samples, 0.03%) + + + +__list_add (96 samples, 0.02%) + + + +timeval_to_ticks (1,062 samples, 0.27%) + + + +tcp_state_idx (628 samples, 0.16%) + + + +dp_vs_redirect_alloc (121 samples, 0.03%) + + + +rte_pktmbuf_headroom (43 samples, 0.01%) + + + +dpvs_time_rand_delay (39 samples, 0.01%) + + + +__get_laddr (2,628 samples, 0.66%) + + + +__dp_vs_service_get (1,425 samples, 0.36%) + + + +rte_arch_bswap16 (779 samples, 0.19%) + + + +dpvs_time_rand_delay (620 samples, 0.15%) + + + +rte_is_zero_ether_addr (449 samples, 0.11%) + + + +netif_hard_xmit (50 samples, 0.01%) + + + +mbuf_header_pointer (552 samples, 0.14%) + + + +rte_atomic32_dec (335 samples, 0.08%) + + + +tcp_in_remove_ts (836 samples, 0.21%) + + + +seq_scale (98 samples, 0.02%) + + + +dp_vs_conn_hashkey (67 samples, 0.02%) + + + +rte_lcore_id (54 samples, 0.01%) + + + +dp_vs_redirect_unhash (34 samples, 0.01%) + + + +dp_vs_conn_detach_timer (39 samples, 0.01%) + + + +tcp_in_init_seq (2,672 samples, 0.67%) + + + +__dp_vs_out_xmit_fnat4 (66 samples, 0.02%) + + + +rte_arch_bswap32 (208 samples, 0.05%) + + + +tcp_send_csum (2,225 samples, 0.56%) + + + +SHA1_Final (215 samples, 0.05%) + + + +[unknown] (864 samples, 0.22%) + + + +rte_atomic32_inc (81 samples, 0.02%) + + + +timer_sched_lock (41 samples, 0.01%) + + + +mbuf_userdata (201 samples, 0.05%) + + + +ip4_hdr (457 samples, 0.11%) + + + +mbuf_userdata (70 samples, 0.02%) + + + +whtlst_hashkey (1,910 samples, 0.48%) + + + +OPENSSL_cleanse (396 samples, 0.10%) + + + +rte_atomic32_inc (605 samples, 0.15%) + + + +lcore_process_packets (123 samples, 0.03%) + + + +rte_prefetch0 (196 samples, 0.05%) + + + +rte_ether_addr_copy (263 samples, 0.07%) + + + +mbuf_may_pull (873 samples, 0.22%) + + + +__random_r (90 samples, 0.02%) + + + +netif_hard_xmit (1,451 samples, 0.36%) + + + +ip4_hdr (334 samples, 0.08%) + + + +ipv4_output_fin (184 samples, 0.05%) + + + +dp_vs_conn_resend_packets (38 samples, 0.01%) + + + +dp_vs_fill_iphdr (2,871 samples, 0.72%) + + + +dp_vs_conn_new (48,190 samples, 12.03%) +dp_vs_conn_new + + +htonl (82 samples, 0.02%) + + + +depth_to_mask (107 samples, 0.03%) + + + +rte_atomic32_inc (48 samples, 0.01%) + + + +rte_atomic32_dec (355 samples, 0.09%) + + + +dpvs_time_rand_delay (425 samples, 0.11%) + + + +rte_arch_bswap32 (192 samples, 0.05%) + + + +__dp_vs_in (82 samples, 0.02%) + + + +inet_addr_equal (81 samples, 0.02%) + + + +get_level_ticks (571 samples, 0.14%) + + + +rte_lcore_id (47 samples, 0.01%) + + + +dp_vs_service_lookup (1,603 samples, 0.40%) + + + +neigh_confirm (124 samples, 0.03%) + + + +dp_vs_redirect_hash (49 samples, 0.01%) + + + +dp_vs_save_outxmit_info (65 samples, 0.02%) + + + +get_level_ticks (38 samples, 0.01%) + + + +netif_xmit (5,143 samples, 1.28%) + + + +timer_sched_unlock (209 samples, 0.05%) + + + +lcore_job_recv_fwd (76 samples, 0.02%) + + + +rte_lcore_id (49 samples, 0.01%) + + + +this_lcore_sched (55 samples, 0.01%) + + + +ipv4_output (7,763 samples, 1.94%) +i.. + + +dp_vs_conn_sa_release (43 samples, 0.01%) + + + +rte_atomic32_inc (81 samples, 0.02%) + + + +lcore_job_timer_manage (58,882 samples, 14.71%) +lcore_job_timer_manage + + +dp_vs_synproxy_snat_handler (508 samples, 0.13%) + + + +rte_pktmbuf_headroom (49 samples, 0.01%) + + + +rte_get_main_lcore (75 samples, 0.02%) + + + +dp_vs_fast_outxmit_fnat (44 samples, 0.01%) + + + +rte_atomic32_clear (68 samples, 0.02%) + + + +ip4_hdrlen (1,311 samples, 0.33%) + + + +__list_add (502 samples, 0.13%) + + + +mbuf_userdata_reset (229 samples, 0.06%) + + + +get_level_ticks (593 samples, 0.15%) + + + +ipv4_rcv (78 samples, 0.02%) + + + +rte_jhash_3words (119 samples, 0.03%) + + + +rte_lcore_id (85 samples, 0.02%) + + + +netif_hard_xmit (4,208 samples, 1.05%) + + + +rte_arch_bswap32 (44 samples, 0.01%) + + + +all (400,419 samples, 100%) + + + +rte_lcore_id (328 samples, 0.08%) + + + +rte_eth_rx_burst (141 samples, 0.04%) + + + +rte_lcore_id (73 samples, 0.02%) + + + +dp_vs_conn_clear_in_timer (34 samples, 0.01%) + + + +ip4_hdr (99 samples, 0.02%) + + + +rte_atomic32_inc (434 samples, 0.11%) + + + +__list_add (158 samples, 0.04%) + + + +timer_sched_lock (172 samples, 0.04%) + + + +dp_vs_conn_put (13,350 samples, 3.33%) +dp_.. + + +dp_vs_conn_is_template (112 samples, 0.03%) + + + +eal_thread_loop (394,869 samples, 98.61%) +eal_thread_loop + + +__rte_raw_cksum (1,312 samples, 0.33%) + + + +qsch_sched_all (172 samples, 0.04%) + + + +msg_slave_process (3,738 samples, 0.93%) + + + +netif_xmit (3,466 samples, 0.87%) + + + +dp_vs_laddr_bind (402 samples, 0.10%) + + + +rte_constant_bswap16 (80 samples, 0.02%) + + + +tcp_in_add_toa (10,139 samples, 2.53%) +tc.. + + +dp_vs_blklst_lookup (162 samples, 0.04%) + + + +SHA1_Update (58 samples, 0.01%) + + + +rte_lcore_id (509 samples, 0.13%) + + + +rte_ether_addr_copy (512 samples, 0.13%) + + + +dp_vs_pre_routing (239 samples, 0.06%) + + + +INET_HOOK (260,530 samples, 65.06%) +INET_HOOK + + +ip4_hdrlen (228 samples, 0.06%) + + + +timer_pending (227 samples, 0.06%) + + + +dp_vs_conn_set_timeout (816 samples, 0.20%) + + + +neigh_key_cmp (889 samples, 0.22%) + + + +dp_vs_fast_xmit_fnat (25,839 samples, 6.45%) +dp_vs_fa.. + + +mbuf_may_pull (667 samples, 0.17%) + + + +netif_xmit (1,766 samples, 0.44%) + + + +dp_vs_service_hashkey (526 samples, 0.13%) + + + +rte_lcore_id (510 samples, 0.13%) + + + +rte_arch_bswap16 (39 samples, 0.01%) + + + +dp_vs_conn_expire (51,252 samples, 12.80%) +dp_vs_conn_expire + + +tcp_conn_sched (53,670 samples, 13.40%) +tcp_conn_sched + + +list_move_tail (663 samples, 0.17%) + + + +rte_pktmbuf_headroom (67 samples, 0.02%) + + + +eth_addr_equal (1,040 samples, 0.26%) + + + +__clock_gettime (892 samples, 0.22%) + + + +rte_is_zero_ether_addr (97 samples, 0.02%) + + + +mbuf_userdata (48 samples, 0.01%) + + + +rte_get_tsc_cycles (2,056 samples, 0.51%) + + + +mbuf_may_pull (227 samples, 0.06%) + + + +dp_vs_out_xmit_fnat (142 samples, 0.04%) + + + +rte_arch_bswap16 (742 samples, 0.19%) + + + +dp_vs_service_lookup (1,375 samples, 0.34%) + + + +dp_vs_in (113 samples, 0.03%) + + + +mbuf_may_pull (218 samples, 0.05%) + + + +ip4_hdr (37 samples, 0.01%) + + + +__rte_jhash_3words (1,800 samples, 0.45%) + + + +__laddr_step (184 samples, 0.05%) + + + +__list_del (52 samples, 0.01%) + + + +ip4_hdr (99 samples, 0.02%) + + + +__rte_raw_cksum_reduce (551 samples, 0.14%) + + + +dp_vs_conn_unhash (9,659 samples, 2.41%) +dp.. + + +__vdso_clock_gettime (700 samples, 0.17%) + + + +__list_del (299 samples, 0.07%) + + + +INET_HOOK (8,074 samples, 2.02%) +I.. + + +get_level_ticks (40 samples, 0.01%) + + + +seq_before (101 samples, 0.03%) + + + +__list_add (134 samples, 0.03%) + + + +ip4_hdr (52 samples, 0.01%) + + + +af_inet_hooks (419 samples, 0.10%) + + + +neigh_process_ring (101 samples, 0.03%) + + + +rte_lcore_id (73 samples, 0.02%) + + + +dp_vs_stats_out (1,747 samples, 0.44%) + + + +inet_addr_equal (1,029 samples, 0.26%) + + + +dp_vs_conn_is_template (93 samples, 0.02%) + + + +rte_lcore_id (82 samples, 0.02%) + + + +ixgbe_recv_pkts_bulk_alloc (69 samples, 0.02%) + + + +dp_vs_synproxy_snat_handler (83 samples, 0.02%) + + + +dp_vs_conn_is_template (179 samples, 0.04%) + + + +dp_vs_dest_is_avail (399 samples, 0.10%) + + + +dp_vs_proto_lookup (588 samples, 0.15%) + + + +ip4_hdr (204 samples, 0.05%) + + + +timeval_to_ticks (45 samples, 0.01%) + + + +__list_del (57 samples, 0.01%) + + + +list_add_tail (45 samples, 0.01%) + + + +ip4_is_frag (421 samples, 0.11%) + + + +mbuf_may_pull (496 samples, 0.12%) + + + +iftraf_pkt_in (98 samples, 0.02%) + + + +ipv4_rcv (269,601 samples, 67.33%) +ipv4_rcv + + +lcore_process_packets (291,276 samples, 72.74%) +lcore_process_packets + + +rte_pktmbuf_append (447 samples, 0.11%) + + + +rte_pktmbuf_append (338 samples, 0.08%) + + + +tcp_send_csum (64 samples, 0.02%) + + + +rte_atomic32_dec_and_test (379 samples, 0.09%) + + + +validate_xmit_mbuf (69 samples, 0.02%) + + + +ip4_hdr (38 samples, 0.01%) + + + +rte_pktmbuf_adj (948 samples, 0.24%) + + + +dpvs_timer_update (99 samples, 0.02%) + + + +rte_atomic32_inc (2,965 samples, 0.74%) + + + +htons (80 samples, 0.02%) + + + +__dp_vs_conn_hash (8,838 samples, 2.21%) +_.. + + +af_inet_hooks (155 samples, 0.04%) + + + +inet_addr_equal (65 samples, 0.02%) + + + +rte_raw_cksum (2,561 samples, 0.64%) + + + +dp_vs_out_xmit_fnat (30,978 samples, 7.74%) +dp_vs_out_.. + + +ixgbe_xmit_pkts (9,393 samples, 2.35%) +i.. + + +sa_pool_hash (821 samples, 0.21%) + + + +__nrand48_r (41 samples, 0.01%) + + + +rte_arch_bswap32 (1,345 samples, 0.34%) + + + +__list_del (95 samples, 0.02%) + + + +blklst_hashkey (2,368 samples, 0.59%) + + + +rte_pktmbuf_lastseg (71 samples, 0.02%) + + + +ip4_hdrlen (75 samples, 0.02%) + + + +rte_atomic32_set (184 samples, 0.05%) + + + +rte_atomic32_inc (578 samples, 0.14%) + + + +dp_vs_conn_hashkey (2,137 samples, 0.53%) + + + +rte_arch_bswap32 (120 samples, 0.03%) + + + +rte_arch_bswap16 (163 samples, 0.04%) + + + +rte_atomic32_inc (700 samples, 0.17%) + + + +rte_lcore_id (46 samples, 0.01%) + + + +rte_pktmbuf_tailroom (205 samples, 0.05%) + + + +mbuf_userdata (37 samples, 0.01%) + + + +rte_atomic32_dec (410 samples, 0.10%) + + + +list_empty (729 samples, 0.18%) + + + +rte_jhash_3words (6,972 samples, 1.74%) + + + +rte_eth_tx_burst (9,624 samples, 2.40%) +rt.. + + +dp_vs_save_xmit_info (2,293 samples, 0.57%) + + + +ntohl@plt (127 samples, 0.03%) + + + +rte_lcore_id (60 samples, 0.01%) + + + +INIT_LIST_HEAD (83 samples, 0.02%) + + + +rte_ipv4_phdr_cksum (37 samples, 0.01%) + + + +tcp_secure_sequence_number (1,258 samples, 0.31%) + + + +list_add_tail (923 samples, 0.23%) + + + +rte_socket_id (122 samples, 0.03%) + + + +[libcrypto.so.1.0.2k] (123 samples, 0.03%) + + + +neigh_hashkey (348 samples, 0.09%) + + + +neigh_key_cmp (88 samples, 0.02%) + + + +rte_get_main_lcore (49 samples, 0.01%) + + + +neigh_key_cmp (513 samples, 0.13%) + + + +lcore_job_xmit (136 samples, 0.03%) + + + +rte_ether_addr_copy (179 samples, 0.04%) + + + +dpvs_timer_update (9,289 samples, 2.32%) +d.. + + +rte_atomic32_dec (602 samples, 0.15%) + + + +netif_xmit (142 samples, 0.04%) + + + +rte_lcore_id (569 samples, 0.14%) + + + +sa_fetch (22,936 samples, 5.73%) +sa_fetch + + +rte_pktmbuf_prepend (188 samples, 0.05%) + + + +__memset_sse2 (142 samples, 0.04%) + + + +rte_timer_manage (53,183 samples, 13.28%) +rte_timer_manage + + +mbuf_userdata (110 samples, 0.03%) + + + +dp_vs_conn_is_in_timer (40 samples, 0.01%) + + + +rte_is_zero_ether_addr (503 samples, 0.13%) + + + +rte_atomic32_dec (327 samples, 0.08%) + + + +__clock_gettime (392 samples, 0.10%) + + + +rte_atomic32_dec_and_test (384 samples, 0.10%) + + + +ip4_hdrlen (61 samples, 0.02%) + + + +rte_eal_get_configuration (37 samples, 0.01%) + + + +mbuf_userdata (44 samples, 0.01%) + + + +list_del (910 samples, 0.23%) + + + +rte_rdtsc (69 samples, 0.02%) + + + +timer_pending (66 samples, 0.02%) + + + +list_empty (88 samples, 0.02%) + + + +dp_vs_synproxy_dnat_handler (45 samples, 0.01%) + + + +neigh_confirm (4,681 samples, 1.17%) + + + +rte_ipv4_phdr_cksum (4,749 samples, 1.19%) + + + +ifa_lookup (11,232 samples, 2.81%) +if.. + + +inet_is_addr_any (629 samples, 0.16%) + + + +dp_vs_conn_cache_rt (162 samples, 0.04%) + + + +rte_pktmbuf_adj (276 samples, 0.07%) + + + +dp_vs_conn_alloc (1,939 samples, 0.48%) + + + +ip4_hdrlen (755 samples, 0.19%) + + + +ip4_hdrlen (554 samples, 0.14%) + + + +dp_vs_conn_is_template (40 samples, 0.01%) + + + +netif_deliver_mbuf (284,022 samples, 70.93%) +netif_deliver_mbuf + + +rte_atomic32_dec_and_test (387 samples, 0.10%) + + + +dp_vs_conn_refresh_timer (10,775 samples, 2.69%) +dp.. + + +rte_raw_cksum (59 samples, 0.01%) + + + +ip4_hdr (315 samples, 0.08%) + + + +dp_vs_redirect_init (59 samples, 0.01%) + + + +dpvs_timer_update (9,641 samples, 2.41%) +dp.. + + +rte_atomic32_inc (3,326 samples, 0.83%) + + + +rte_atomic32_dec_and_test (470 samples, 0.12%) + + + +xmit_inbound (68,636 samples, 17.14%) +xmit_inbound + + +__memset_sse2 (801 samples, 0.20%) + + + +rte_mbuf_refcnt_read (79 samples, 0.02%) + + + +__dp_vs_pre_routing (9,502 samples, 2.37%) +_.. + + +pkt_type_tab_hashkey (348 samples, 0.09%) + + + +rte_atomic32_read (88 samples, 0.02%) + + + +dp_vs_proto_lookup (117 samples, 0.03%) + + + +rte_lcore_id (81 samples, 0.02%) + + + +rte_lcore_id (175 samples, 0.04%) + + + +netif_port_get (265 samples, 0.07%) + + + +tcp_out_save_seq (61 samples, 0.02%) + + + +rte_raw_cksum (1,762 samples, 0.44%) + + + +list_add_tail (245 samples, 0.06%) + + + +tcp_state_trans (139 samples, 0.03%) + + + +__list_del_entry (344 samples, 0.09%) + + + +netif_tx_burst (9,953 samples, 2.49%) +ne.. + + +dp_vs_whtlst_allow (4,494 samples, 1.12%) + + + +dp_vs_conn_get (29,944 samples, 7.48%) +dp_vs_conn.. + + +rte_lcore_id (41 samples, 0.01%) + + + +timeval_to_ticks (965 samples, 0.24%) + + + +inet_addr_ifa_get (13,947 samples, 3.48%) +ine.. + + +SHA1 (234 samples, 0.06%) + + + +slave_lcore_loop_func (67 samples, 0.02%) + + + +dp_vs_fill_iphdr (2,453 samples, 0.61%) + + + +inet_addr_ifa_get (13,911 samples, 3.47%) +ine.. + + +inet_addr_equal (679 samples, 0.17%) + + + +timer_sched_lock (52 samples, 0.01%) + + + +msg_slave_process (75 samples, 0.02%) + + + +ip4_hdr (281 samples, 0.07%) + + + +dp_vs_dest_is_valid (605 samples, 0.15%) + + + +dp_vs_fill_iphdr (117 samples, 0.03%) + + + +netif_rcv_mbuf (86 samples, 0.02%) + + + +pkt_type_get (162 samples, 0.04%) + + + +ip4_hdr (664 samples, 0.17%) + + + +inet_addr_equal (553 samples, 0.14%) + + + +rte_jhash_3words (50 samples, 0.01%) + + + +__memcpy_ssse3_back (91 samples, 0.02%) + + + +dp_vs_xmit_fnat (52,133 samples, 13.02%) +dp_vs_xmit_fnat + + +rte_atomic32_inc (998 samples, 0.25%) + + + +this_lcore_sched (362 samples, 0.09%) + + + +dp_vs_laddr_bind (26,882 samples, 6.71%) +dp_vs_lad.. + + +tcp_fnat_in_handler (18,202 samples, 4.55%) +tcp_f.. + + +inet_addr_fold (168 samples, 0.04%) + + + +rte_get_timer_cycles (2,246 samples, 0.56%) + + + +dp_vs_conn_is_template (59 samples, 0.01%) + + + +dp_vs_redirect_get (72 samples, 0.02%) + + + +INET_HOOK (7,320 samples, 1.83%) +I.. + + +rte_lcore_id (55 samples, 0.01%) + + + +dp_vs_stats_conn (282 samples, 0.07%) + + + +list_add_tail (2,617 samples, 0.65%) + + + +rte_ipv4_hdr_len (55 samples, 0.01%) + + + +SHA1_Final (77 samples, 0.02%) + + + +af_inet_hooks (66 samples, 0.02%) + + + +dp_vs_conn_free (35 samples, 0.01%) + + + +tuplehash_to_conn (55 samples, 0.01%) + + + +inet_addr_equal (284 samples, 0.07%) + + + +dp_vs_dest_is_avail (45 samples, 0.01%) + + + +ip4_hdr (275 samples, 0.07%) + + + +rte_lcore_id (47 samples, 0.01%) + + + +__list_del (96 samples, 0.02%) + + + +rte_lcore_id (71 samples, 0.02%) + + + +ip4_hdrlen (498 samples, 0.12%) + + + +inet_addr_ifa_put (455 samples, 0.11%) + + + +route_local_hashkey (248 samples, 0.06%) + + + +mbuf_header_pointer (127 samples, 0.03%) + + + +rte_arch_bswap32 (219 samples, 0.05%) + + + +mbuf_header_pointer (349 samples, 0.09%) + + + +dp_vs_synproxy_dnat_handler (92 samples, 0.02%) + + + +dp_vs_conn_is_in_timer (46 samples, 0.01%) + + + +__dpvs_timer_sched (6,594 samples, 1.65%) + + + +syn_proxy_is_ack_storm (83 samples, 0.02%) + + + +af_inet_hooks (66 samples, 0.02%) + + + +syn_proxy_is_ack_storm (247 samples, 0.06%) + + + +dp_vs_service_hashkey (629 samples, 0.16%) + + + +ip4_hdrlen (205 samples, 0.05%) + + + +ip4_hdr (77 samples, 0.02%) + + + +__dp_vs_fast_xmit_fnat4 (25,318 samples, 6.32%) +__dp_vs_.. + + +dp_vs_stats_in (1,639 samples, 0.41%) + + + +sa_pool_fetch (3,202 samples, 0.80%) + + + +timer_sched_lock (164 samples, 0.04%) + + + +rte_rdtsc (2,053 samples, 0.51%) + + + +tcp_hdr (1,035 samples, 0.26%) + + + +__memset_sse2 (61 samples, 0.02%) + + + +neigh_output (5,136 samples, 1.28%) + + + +ip4_hdr (61 samples, 0.02%) + + + +ip4_hdr (101 samples, 0.03%) + + + +dp_vs_conn_unbind_dest (1,075 samples, 0.27%) + + + +__dp_vs_xmit_fnat4 (51,330 samples, 12.82%) +__dp_vs_xmit_fnat4 + + +sa4_fetch (22,641 samples, 5.65%) +sa4_fetch + + +tcp_send_rst (68 samples, 0.02%) + + + +lrand48 (44 samples, 0.01%) + + + +tcp_in_init_seq (34 samples, 0.01%) + + + +ip4_hdr (73 samples, 0.02%) + + + +INIT_LIST_HEAD (422 samples, 0.11%) + + + +INET_HOOK (140 samples, 0.03%) + + + +__dp_vs_service_get (1,211 samples, 0.30%) + + + +rte_arch_bswap16 (423 samples, 0.11%) + + + +dpvs_job_loop (394,281 samples, 98.47%) +dpvs_job_loop + + +dp_vs_synproxy_ack_rcv (88 samples, 0.02%) + + + +list_empty (44 samples, 0.01%) + + + +rte_atomic16_read (41 samples, 0.01%) + + + +tcp_state_trans (9,054 samples, 2.26%) +t.. + + +netif_update_worker_loop_cnt (306 samples, 0.08%) + + + +dp_vs_xmit_fnat (143 samples, 0.04%) + + + +list_del (50 samples, 0.01%) + + + +__memset_sse2 (56 samples, 0.01%) + + + +__dp_vs_fast_xmit_fnat4 (83 samples, 0.02%) + + + +netif_port_get (433 samples, 0.11%) + + + +neigh_fill_mac (272 samples, 0.07%) + + + +rte_constant_bswap16 (231 samples, 0.06%) + + + +list_add_tail (119 samples, 0.03%) + + + +ip4_hdr (61 samples, 0.02%) + + + +neigh_lookup_entry (1,097 samples, 0.27%) + + + +dp_vs_conn_hashkey (7,759 samples, 1.94%) +d.. + + +rte_pktmbuf_lastseg (38 samples, 0.01%) + + + +__rte_raw_cksum_reduce (261 samples, 0.07%) + + + +lcore_stats_burst (579 samples, 0.14%) + + + +dp_vs_conn_free (746 samples, 0.19%) + + + +list_del (1,336 samples, 0.33%) + + + +__list_del_entry (302 samples, 0.08%) + + + +inet_addr_equal (905 samples, 0.23%) + + + +rte_lcore_id (551 samples, 0.14%) + + + +rte_is_zero_ether_addr (91 samples, 0.02%) + + + +dp_vs_blklst_lookup (6,813 samples, 1.70%) + + + +ip4_hdr (169 samples, 0.04%) + + + +dp_vs_conn_unhash (43 samples, 0.01%) + + + +dp_vs_synproxy_syn_rcv (149 samples, 0.04%) + + + +dp_vs_synproxy_ack_rcv (53 samples, 0.01%) + + + +port_tab_hashkey (97 samples, 0.02%) + + + +tcp_send_csum (4,123 samples, 1.03%) + + + +list_add (3,280 samples, 0.82%) + + + +rte_is_zero_ether_addr (672 samples, 0.17%) + + + +validate_xmit_mbuf (198 samples, 0.05%) + + + +netif_port_get (103 samples, 0.03%) + + + +this_lcore_sched (42 samples, 0.01%) + + + +__dp_vs_fast_outxmit_fnat4 (26,311 samples, 6.57%) +__dp_vs_.. + + +memset@plt (90 samples, 0.02%) + + + +rte_ether_addr_copy (253 samples, 0.06%) + + + +list_empty (93 samples, 0.02%) + + + +rte_atomic32_inc (294 samples, 0.07%) + + + +mbuf_userdata (47 samples, 0.01%) + + + +neigh_key_cmp (41 samples, 0.01%) + + + +rte_ipv4_phdr_cksum (1,546 samples, 0.39%) + + + +tcp_hdr (684 samples, 0.17%) + + + +ip4_hdrlen (153 samples, 0.04%) + + + +eth_type_parse (157 samples, 0.04%) + + + +SHA1_Update (98 samples, 0.02%) + + + +dp_vs_conn_set_timeout (316 samples, 0.08%) + + + +rte_atomic32_inc (3,087 samples, 0.77%) + + + +route_out_local_lookup (1,336 samples, 0.33%) + + + +tcp_conn_lookup (165 samples, 0.04%) + + + +rte_ipv4_phdr_cksum (3,114 samples, 0.78%) + + + +rte_atomic32_dec_and_test (389 samples, 0.10%) + + + +list_del (45 samples, 0.01%) + + + +rte_arch_bswap32 (47 samples, 0.01%) + + + +dpvs_timer_cancel_nolock (349 samples, 0.09%) + + + +rte_mbuf_refcnt_read (82 samples, 0.02%) + + + +rte_atomic32_set (52 samples, 0.01%) + + + +lcore_process_arp_ring (86 samples, 0.02%) + + + +rte_atomic32_dec (1,694 samples, 0.42%) + + + +htons (87 samples, 0.02%) + + + +ipv4_output_fin2 (6,673 samples, 1.67%) + + + +rte_atomic32_read (453 samples, 0.11%) + + + +tcp_out_adjust_seq (1,766 samples, 0.44%) + + + +dp_vs_save_outxmit_info (1,761 samples, 0.44%) + + + +tcp_secure_sequence_number (46 samples, 0.01%) + + + +dp_vs_conn_put (13,594 samples, 3.39%) +dp_.. + + +mbuf_may_pull (37 samples, 0.01%) + + + +route4_output (2,479 samples, 0.62%) + + + +dp_vs_proto_lookup (66 samples, 0.02%) + + + +rte_pktmbuf_prepend (512 samples, 0.13%) + + + +get_level_ticks (60 samples, 0.01%) + + + +rte_lcore_id (89 samples, 0.02%) + + + +tcp_send_csum (70 samples, 0.02%) + + + +tcp_send_csum (6,355 samples, 1.59%) + + + +dp_vs_redirect_free (36 samples, 0.01%) + + + +rte_prefetch0 (222 samples, 0.06%) + + + +list_add_tail (151 samples, 0.04%) + + + +inet_addr_fold (89 samples, 0.02%) + + + +dp_vs_conn_set_timeout (67 samples, 0.02%) + + + +htonl (44 samples, 0.01%) + + + +rte_lcore_id (76 samples, 0.02%) + + + +timer_pending (37 samples, 0.01%) + + + +dp_vs_dest_is_avail (664 samples, 0.17%) + + + +dp_vs_conn_is_in_timer (350 samples, 0.09%) + + + +dp_vs_conn_clear_in_timer (37 samples, 0.01%) + + + +rte_ether_addr_copy (42 samples, 0.01%) + + + +neigh_hashkey (66 samples, 0.02%) + + + +tcp_out_init_seq (70 samples, 0.02%) + + + +rte_arch_bswap32 (188 samples, 0.05%) + + + +pkt_type_tab_hashkey (75 samples, 0.02%) + + + +dp_vs_conn_get_timeout (114 samples, 0.03%) + + + +__list_del (583 samples, 0.15%) + + + +validate_xmit_mbuf (123 samples, 0.03%) + + + +mbuf_header_pointer (141 samples, 0.04%) + + + +dev_get_idev (47 samples, 0.01%) + + + +rte_arch_bswap32 (93 samples, 0.02%) + + + +ifa_put (428 samples, 0.11%) + + + +rte_is_zero_ether_addr (99 samples, 0.02%) + + + +dp_vs_conn_is_template (251 samples, 0.06%) + + + +ip4_hdr (47 samples, 0.01%) + + + +rte_arch_bswap32 (34 samples, 0.01%) + + + +netif_xmit (93 samples, 0.02%) + + + +list_add_tail (130 samples, 0.03%) + + + +ixgbe_recv_pkts_bulk_alloc (5,559 samples, 1.39%) + + + +inet_addr_ifa_put (458 samples, 0.11%) + + + +dp_vs_dest_is_avail (53 samples, 0.01%) + + + +timer_pending (44 samples, 0.01%) + + + +tcp_out_adjust_mss (319 samples, 0.08%) + + + +dp_vs_fill_iphdr (141 samples, 0.04%) + + + +tcp_hdr (45 samples, 0.01%) + + + +SHA1_Init (268 samples, 0.07%) + + + +get_level_ticks (221 samples, 0.06%) + + + +INET_HOOK (34 samples, 0.01%) + + + +dp_vs_conn_get (215 samples, 0.05%) + + + +dp_vs_synproxy_syn_rcv (4,196 samples, 1.05%) + + + +rte_lcore_id (45 samples, 0.01%) + + + +rte_atomic32_inc (112 samples, 0.03%) + + + +rte_atomic32_read (222 samples, 0.06%) + + + +__dpvs_timer_sched (6,035 samples, 1.51%) + + + +dp_vs_conn_is_template (40 samples, 0.01%) + + + +lcore_stats_burst (70 samples, 0.02%) + + + +rte_raw_cksum (833 samples, 0.21%) + + + +tcp_conn_lookup (52,240 samples, 13.05%) +tcp_conn_lookup + + +ifa_lookup (11,397 samples, 2.85%) +if.. + + +__list_add (481 samples, 0.12%) + + + +rte_get_main_lcore (95 samples, 0.02%) + + + +ip4_hdr (106 samples, 0.03%) + + + +mbuf_header_pointer (96 samples, 0.02%) + + + +dp_vs_synproxy_dnat_handler (66 samples, 0.02%) + + + +tcp_hdr (86 samples, 0.02%) + + + +tcp_in_add_toa (3,069 samples, 0.77%) + + + +rte_pktmbuf_adj (204 samples, 0.05%) + + + +htonl@plt (38 samples, 0.01%) + + + +mbuf_may_pull (128 samples, 0.03%) + + + +inet_addr_fold (198 samples, 0.05%) + + + +lcore_process_redirect_ring (160 samples, 0.04%) + + + +ip4_is_frag (83 samples, 0.02%) + + + +dp_vs_stats_in (55 samples, 0.01%) + + + +netif_hard_xmit (78 samples, 0.02%) + + + +SHA1_Final (39 samples, 0.01%) + + + +rte_atomic32_dec_and_test (4,197 samples, 1.05%) + + + +rte_pktmbuf_trim (86 samples, 0.02%) + + + +rte_arch_bswap32 (197 samples, 0.05%) + + + +dp_vs_pre_routing (10,270 samples, 2.56%) +dp.. + + +__memset_sse2 (236 samples, 0.06%) + + + +rte_pktmbuf_prepend (35 samples, 0.01%) + + + +dp_vs_conn_set_in_timer (90 samples, 0.02%) + + + +ip4_hdr (77 samples, 0.02%) + + + +netif_port_get (3,459 samples, 0.86%) + + + +rte_atomic32_dec (8,008 samples, 2.00%) +r.. + + +sha1_block_data_order (42 samples, 0.01%) + + + +rte_lcore_id (66 samples, 0.02%) + + + +__nrand48_r (142 samples, 0.04%) + + + +timeval_to_ticks (361 samples, 0.09%) + + + +list_del (917 samples, 0.23%) + + + +__rte_raw_cksum (464 samples, 0.12%) + + + +ip4_hdr (76 samples, 0.02%) + + + +__dp_vs_pre_routing (224 samples, 0.06%) + + + +list_del (34 samples, 0.01%) + + + +rte_atomic32_dec (44 samples, 0.01%) + + + +__strncmp_sse42 (209 samples, 0.05%) + + + +neigh_entry_state_trans (809 samples, 0.20%) + + + +__drand48_iterate (141 samples, 0.04%) + + + +__rte_raw_cksum_reduce (858 samples, 0.21%) + + + +lcore_process_redirect_ring (304 samples, 0.08%) + + + +__dp_vs_out_xmit_fnat4 (29,581 samples, 7.39%) +__dp_vs_ou.. + + +ip4_hdr (532 samples, 0.13%) + + + +rte_raw_cksum (86 samples, 0.02%) + + + +__dpvs_timer_sched (81 samples, 0.02%) + + + +dp_vs_schedule (51,256 samples, 12.80%) +dp_vs_schedule + + +ip4_hdrlen (1,216 samples, 0.30%) + + + +rte_mbuf_refcnt_read (58 samples, 0.01%) + + + +netif_rcv_mbuf (280,958 samples, 70.17%) +netif_rcv_mbuf + + +rte_get_main_lcore (34 samples, 0.01%) + + + +rte_ipv4_hdr_len (44 samples, 0.01%) + + + +rte_ipv4_hdr_len (90 samples, 0.02%) + + + +rte_lcore_id (55 samples, 0.01%) + + + +rte_atomic32_dec (1,814 samples, 0.45%) + + + +__dp_vs_in (243,067 samples, 60.70%) +__dp_vs_in + + +ip4_hdr (273 samples, 0.07%) + + + +rte_pktmbuf_headroom (38 samples, 0.01%) + + + +ip4_hdrlen (184 samples, 0.05%) + + + +rte_atomic32_dec (44 samples, 0.01%) + + + +netif_deliver_mbuf (226 samples, 0.06%) + + + +ip4_hdr (83 samples, 0.02%) + + + +tcp_fnat_in_handler (48 samples, 0.01%) + + + +dp_vs_redirect_ring_proc (171 samples, 0.04%) + + + +dev_get_idev (502 samples, 0.13%) + + + +rte_pktmbuf_lastseg (211 samples, 0.05%) + + + +tuplehash_to_conn (311 samples, 0.08%) + + + +sa_pool_release (4,241 samples, 1.06%) + + + +ip4_hdr (186 samples, 0.05%) + + + +inet_addr_fold (220 samples, 0.05%) + + + +pkt_type_get (5,304 samples, 1.32%) + + + +list_empty (489 samples, 0.12%) + + + +qsch_sched_all (509 samples, 0.13%) + + + +sa_release (24,466 samples, 6.11%) +sa_release + + +rte_arch_bswap16 (450 samples, 0.11%) + + + +list_empty (107 samples, 0.03%) + + + +rte_is_zero_ether_addr (66 samples, 0.02%) + + + +this_lcore_sched (380 samples, 0.09%) + + + +htonl (39 samples, 0.01%) + + + +rte_ether_addr_copy (432 samples, 0.11%) + + + +dp_vs_dest_is_avail (609 samples, 0.15%) + + + +htonl (287 samples, 0.07%) + + + +rte_arch_bswap16 (310 samples, 0.08%) + + + +dp_vs_conn_refresh_timer (63 samples, 0.02%) + + + +ifa_put (424 samples, 0.11%) + + + +OPENSSL_cleanse (336 samples, 0.08%) + + + +inet_addr_equal (3,393 samples, 0.85%) + + + +tcp_fnat_out_handler (16,173 samples, 4.04%) +tcp_.. + + +tcp_hdr (355 samples, 0.09%) + + + +dp_vs_conn_free_packets (140 samples, 0.03%) + + + +list_add_tail (42 samples, 0.01%) + + + +timeval_to_ticks (45 samples, 0.01%) + + + +mbuf_userdata (34 samples, 0.01%) + + + +[libcrypto.so.1.0.2k] (4,116 samples, 1.03%) + + + +ip4_hdrlen (122 samples, 0.03%) + + + +mbuf_may_pull (101 samples, 0.03%) + + + +lrand48 (53 samples, 0.01%) + + + +iftraf_pkt_out (38 samples, 0.01%) + + + +mbuf_may_pull (52 samples, 0.01%) + + + +mbuf_may_pull (63 samples, 0.02%) + + + +do_lcore_job (514 samples, 0.13%) + + + +rte_atomic32_read (422 samples, 0.11%) + + + +dp_vs_conn_is_template (34 samples, 0.01%) + + + +dp_vs_conn_fill_param (142 samples, 0.04%) + + + +ip4_is_frag (81 samples, 0.02%) + + + +rte_pktmbuf_adj (117 samples, 0.03%) + + + +put_laddr (429 samples, 0.11%) + + + +timer_sched_unlock (63 samples, 0.02%) + + + +do_lcore_job (386,978 samples, 96.64%) +do_lcore_job + + +htons (100 samples, 0.02%) + + + +tcp_conn_expire (3,638 samples, 0.91%) + + + +lcore_job_timer_manage (66 samples, 0.02%) + + + +timer_expire (52,130 samples, 13.02%) +timer_expire + + +rte_lcore_id (78 samples, 0.02%) + + + +ip4_hdr (266 samples, 0.07%) + + + +neigh_fill_mac (1,301 samples, 0.32%) + + + +timer_pending (50 samples, 0.01%) + + + +__list_del (56 samples, 0.01%) + + + +list_del (45 samples, 0.01%) + + + +inet_addr_equal (194 samples, 0.05%) + + + +dp_vs_dest_get_weight (43 samples, 0.01%) + + + +tcp_fnat_in_handler (10,584 samples, 2.64%) +tc.. + + +mbuf_userdata (142 samples, 0.04%) + + + +ip4_hdr (39 samples, 0.01%) + + + +dpvs_timer_sched (2,893 samples, 0.72%) + + + +route4_output (54 samples, 0.01%) + + + +dp_vs_conn_resend_packets (2,839 samples, 0.71%) + + + +lcore_process_arp_ring (3,664 samples, 0.92%) + + + +__list_del (546 samples, 0.14%) + + + +__list_del (37 samples, 0.01%) + + + +inet_addr_fold (45 samples, 0.01%) + + + +inet_addr_equal (3,713 samples, 0.93%) + + + +route4_put (595 samples, 0.15%) + + + +inet_is_addr_any (70 samples, 0.02%) + + + +rte_atomic32_inc (665 samples, 0.17%) + + + +dp_vs_stats_out (59 samples, 0.01%) + + + +lcore_job_xmit (12,510 samples, 3.12%) +lco.. + + +netif_rx_burst (7,929 samples, 1.98%) +n.. + + +rte_lcore_id (128 samples, 0.03%) + + + +idev_put (346 samples, 0.09%) + + + +[unknown] (1,430 samples, 0.36%) + + + +dp_vs_conn_bind_dest (1,242 samples, 0.31%) + + + +list_move_tail (540 samples, 0.13%) + + + +netif_update_worker_loop_cnt (74 samples, 0.02%) + + + +mbuf_header_pointer (714 samples, 0.18%) + + + +dp_vs_fast_xmit_fnat (41 samples, 0.01%) + + + +dp_vs_whtlst_allow (90 samples, 0.02%) + + + +__random (634 samples, 0.16%) + + + +ip_addr_netcmp (314 samples, 0.08%) + + + +tcp_in_adjust_seq (194 samples, 0.05%) + + + +dp_vs_conn_refresh_timer (10,395 samples, 2.60%) +dp.. + + +list_add_tail (2,121 samples, 0.53%) + + + +sa_pool_hash (923 samples, 0.23%) + + + +dp_vs_conn_hash (9,053 samples, 2.26%) +d.. + + +dp_vs_conn_is_template (81 samples, 0.02%) + + + +dp_vs_save_xmit_info (58 samples, 0.01%) + + + +rte_lcore_id (62 samples, 0.02%) + + + +ip4_hdr (404 samples, 0.10%) + + + +ip4_hdr (76 samples, 0.02%) + + + +rte_atomic32_read (46 samples, 0.01%) + + + +blklst_hashkey (146 samples, 0.04%) + + + +__rte_raw_cksum (991 samples, 0.25%) + + + +netif_port_get (176 samples, 0.04%) + + + +ip4_hdr (61 samples, 0.02%) + + + +rte_arch_bswap16 (1,315 samples, 0.33%) + + + +__rte_jhash_3words (6,612 samples, 1.65%) + + + +rte_pktmbuf_prepend (335 samples, 0.08%) + + + +rte_pktmbuf_headroom (67 samples, 0.02%) + + + +dp_vs_redirect_alloc (305 samples, 0.08%) + + + +tcp_in_adjust_seq (347 samples, 0.09%) + + + +this_lcore_sched (124 samples, 0.03%) + + + +lcore_job_recv_fwd (308,158 samples, 76.96%) +lcore_job_recv_fwd + + +dev_get_idev (548 samples, 0.14%) + + + +whtlst_hashkey (119 samples, 0.03%) + + + +rte_arch_bswap32 (76 samples, 0.02%) + + + +idev_put (377 samples, 0.09%) + + + +rte_get_main_lcore (82 samples, 0.02%) + + + +ip4_hdr (529 samples, 0.13%) + + + +ip4_hdr (89 samples, 0.02%) + + + +rte_pktmbuf_headroom (89 samples, 0.02%) + + + +dp_vs_conn_refresh_timer (55 samples, 0.01%) + + + +rte_jhash_3words (1,908 samples, 0.48%) + + + +this_lcore_sched (144 samples, 0.04%) + + + +rte_atomic32_inc (397 samples, 0.10%) + + + +ipv4_output_fin (6,815 samples, 1.70%) + + + +mbuf_userdata (39 samples, 0.01%) + + + +dp_vs_conn_detach_timer (601 samples, 0.15%) + + + +__list_add (407 samples, 0.10%) + + + +ip4_hdrlen (1,411 samples, 0.35%) + + + +dp_vs_conn_is_template (70 samples, 0.02%) + + + +__vdso_clock_gettime (239 samples, 0.06%) + + + +__dpvs_timer_sched (85 samples, 0.02%) + + + +rte_lcore_id (41 samples, 0.01%) + + + +netif_rx_burst (157 samples, 0.04%) + + + +eth_type_parse (1,427 samples, 0.36%) + + + +neigh_hashkey (620 samples, 0.15%) + + + +dp_vs_dest_put (436 samples, 0.11%) + + + +dp_vs_laddr_unbind (25,798 samples, 6.44%) +dp_vs_la.. + + +mbuf_userdata (36 samples, 0.01%) + + + +seq_before (111 samples, 0.03%) + + + +tcp_state_idx (94 samples, 0.02%) + + + +rte_timer_tick_cb (52,979 samples, 13.23%) +rte_timer_tick_cb + + +timer_pending (200 samples, 0.05%) + + + +tcp_out_save_seq (1,063 samples, 0.27%) + + + +__dpvs_timer_sched (2,338 samples, 0.58%) + + + +__dp_vs_fast_outxmit_fnat4 (68 samples, 0.02%) + + + +__memset_sse2 (583 samples, 0.15%) + + + +iftraf_pkt_out (36 samples, 0.01%) + + + +ip4_hdrlen (49 samples, 0.01%) + + + +dp_vs_fast_outxmit_fnat (26,968 samples, 6.73%) +dp_vs_fas.. + + +mbuf_userdata_reset (285 samples, 0.07%) + + + +dp_vs_conn_attach_timer (3,352 samples, 0.84%) + + + +dp_vs_redirect_ring_proc (65 samples, 0.02%) + + + +xmit_outbound (47,282 samples, 11.81%) +xmit_outbound + + +timer_sched_unlock (223 samples, 0.06%) + + + +sa_pool_destroy (4,354 samples, 1.09%) + + + +rte_pktmbuf_headroom (94 samples, 0.02%) + + + +ip4_hdr (114 samples, 0.03%) + + + +list_del (108 samples, 0.03%) + + + +rte_arch_bswap16 (293 samples, 0.07%) + + + +inet_addr_equal (75 samples, 0.02%) + + + +ip4_hdrlen (190 samples, 0.05%) + + + +rte_atomic32_dec (40 samples, 0.01%) + + + +net_cmp (470 samples, 0.12%) + + + +rte_pktmbuf_prepend (349 samples, 0.09%) + + + +route_out_net_lookup (1,009 samples, 0.25%) + + + +timer_pending (212 samples, 0.05%) + + + +dp_vs_dest_is_overload (38 samples, 0.01%) + + + +dp_vs_conn_is_in_timer (360 samples, 0.09%) + + + +mbuf_header_pointer (722 samples, 0.18%) + + + + diff --git a/test/release/v1.9.2/pics/worker8.svg b/test/release/v1.9.2/pics/worker8.svg new file mode 100644 index 000000000..906853a55 --- /dev/null +++ b/test/release/v1.9.2/pics/worker8.svg @@ -0,0 +1,11556 @@ + + + + + + + + + + + + + + +Flame Graph + +Reset Zoom +Search +ic + + + +dp_vs_conn_hashkey (5,063 samples, 0.19%) + + + +rte_atomic32_inc (270 samples, 0.01%) + + + +dp_vs_conn_expire (35,766 samples, 1.36%) + + + +dp_vs_conn_unhash (6,591 samples, 0.25%) + + + +__list_add (358 samples, 0.01%) + + + +INET_HOOK (192,037 samples, 7.29%) +INET_HOOK + + +dpvs_timer_update (6,369 samples, 0.24%) + + + +this_lcore_sched (258 samples, 0.01%) + + + +neigh_hashkey (237 samples, 0.01%) + + + +__rte_jhash_3words (4,221 samples, 0.16%) + + + +dp_vs_in (181,755 samples, 6.90%) +dp_vs_in + + +dp_vs_fill_iphdr (2,094 samples, 0.08%) + + + +mbuf_may_pull (488 samples, 0.02%) + + + +idev_put (746 samples, 0.03%) + + + +dp_vs_save_xmit_info (1,616 samples, 0.06%) + + + +timeval_to_ticks (277 samples, 0.01%) + + + +__list_add (286 samples, 0.01%) + + + +rte_atomic32_inc (538 samples, 0.02%) + + + +eth_type_parse (1,309 samples, 0.05%) + + + +futex_wait (766 samples, 0.03%) + + + +futex_wait_setup (452 samples, 0.02%) + + + +rte_eth_rx_burst (11,062 samples, 0.42%) + + + +list_empty (2,191 samples, 0.08%) + + + +rte_atomic32_inc (348 samples, 0.01%) + + + +ixgbe_xmit_pkts (6,854 samples, 0.26%) + + + +__dp_vs_fast_xmit_fnat4 (18,888 samples, 0.72%) + + + +__rte_raw_cksum_reduce (378 samples, 0.01%) + + + +ip4_is_frag (340 samples, 0.01%) + + + +list_add_tail (1,172 samples, 0.04%) + + + +dp_vs_conn_hash (6,174 samples, 0.23%) + + + +system_call_fastpath (914 samples, 0.03%) + + + +rte_atomic32_dec (473 samples, 0.02%) + + + +rte_arch_bswap32 (968 samples, 0.04%) + + + +__rte_raw_cksum_reduce (548 samples, 0.02%) + + + +tcp_secure_sequence_number (844 samples, 0.03%) + + + +ip4_hdrlen (934 samples, 0.04%) + + + +rte_is_zero_ether_addr (545 samples, 0.02%) + + + +rte_arch_bswap32 (282 samples, 0.01%) + + + +inet_addr_equal (629 samples, 0.02%) + + + +rte_atomic32_dec (464 samples, 0.02%) + + + +rte_atomic32_dec (1,540 samples, 0.06%) + + + +sa_pool_fetch (2,329 samples, 0.09%) + + + +system_call_after_swapgs (249 samples, 0.01%) + + + +dp_vs_stats_in (1,337 samples, 0.05%) + + + +route_out_net_lookup (705 samples, 0.03%) + + + +dp_vs_dest_put (305 samples, 0.01%) + + + +inet_is_addr_any (498 samples, 0.02%) + + + +__list_del (261 samples, 0.01%) + + + +netif_deliver_mbuf (207,019 samples, 7.86%) +netif_deliv.. + + +__dp_vs_pre_routing (7,110 samples, 0.27%) + + + +ixgbe_xmit_pkts (7,026 samples, 0.27%) + + + +dp_vs_fill_iphdr (1,763 samples, 0.07%) + + + +sa_pool_release (2,976 samples, 0.11%) + + + +rte_eth_rx_burst (11,013 samples, 0.42%) + + + +ixgbe_recv_pkts_bulk_alloc (7,763 samples, 0.29%) + + + +rte_atomic32_dec_and_test (295 samples, 0.01%) + + + +dp_vs_service_lookup (964 samples, 0.04%) + + + +this_lcore_sched (259 samples, 0.01%) + + + +ip4_hdr (358 samples, 0.01%) + + + +dp_vs_rr_schedule (919 samples, 0.03%) + + + +ifa_put (322 samples, 0.01%) + + + +dp_vs_conn_get (17,032 samples, 0.65%) + + + +dp_vs_dest_is_avail (396 samples, 0.02%) + + + +__dp_vs_xmit_fnat4 (37,337 samples, 1.42%) + + + +netif_port_get (255 samples, 0.01%) + + + +neigh_hashkey (651 samples, 0.02%) + + + +neigh_key_cmp (328 samples, 0.01%) + + + +INIT_LIST_HEAD (336 samples, 0.01%) + + + +__rte_raw_cksum (303 samples, 0.01%) + + + +dp_vs_in (179,772 samples, 6.83%) +dp_vs_in + + +idev_put (761 samples, 0.03%) + + + +INIT_LIST_HEAD (343 samples, 0.01%) + + + +sa_pool_hash (466 samples, 0.02%) + + + +rte_atomic32_dec_and_test (3,399 samples, 0.13%) + + + +inet_addr_ifa_put (366 samples, 0.01%) + + + +ip4_hdr (336 samples, 0.01%) + + + +dpvs_timer_update (5,908 samples, 0.22%) + + + +timer_expire (36,390 samples, 1.38%) + + + +pkt_type_get (3,557 samples, 0.14%) + + + +mbuf_may_pull (344 samples, 0.01%) + + + +dp_vs_service_lookup (1,390 samples, 0.05%) + + + +port_tab_hashkey (285 samples, 0.01%) + + + +tcp_in_add_toa (8,086 samples, 0.31%) + + + +neigh_fill_mac (634 samples, 0.02%) + + + +rte_is_zero_ether_addr (462 samples, 0.02%) + + + +dev_get_idev (1,436 samples, 0.05%) + + + +rte_atomic32_inc (1,253 samples, 0.05%) + + + +rte_ipv4_phdr_cksum (1,061 samples, 0.04%) + + + +dpvs_timer_update (6,650 samples, 0.25%) + + + +neigh_fill_mac (677 samples, 0.03%) + + + +rte_atomic32_inc (1,438 samples, 0.05%) + + + +dp_vs_service_hashkey (411 samples, 0.02%) + + + +rte_atomic32_inc (1,306 samples, 0.05%) + + + +ip4_hdr (312 samples, 0.01%) + + + +netif_xmit (3,567 samples, 0.14%) + + + +rte_lcore_id (379 samples, 0.01%) + + + +dp_vs_whtlst_allow (4,134 samples, 0.16%) + + + +rte_atomic32_read (314 samples, 0.01%) + + + +rte_pktmbuf_prepend (389 samples, 0.01%) + + + +[unknown] (870 samples, 0.03%) + + + +rte_arch_bswap16 (535 samples, 0.02%) + + + +__dpvs_timer_sched (3,853 samples, 0.15%) + + + +dp_vs_pre_routing (7,901 samples, 0.30%) + + + +dp_vs_service_lookup (941 samples, 0.04%) + + + +list_add_tail (622 samples, 0.02%) + + + +__memset_sse2 (534 samples, 0.02%) + + + +dp_vs_conn_get (17,251 samples, 0.66%) + + + +ip4_hdrlen (412 samples, 0.02%) + + + +netif_port_get (297 samples, 0.01%) + + + +rte_atomic32_inc (1,375 samples, 0.05%) + + + +dp_vs_conn_hash (6,206 samples, 0.24%) + + + +try_to_wake_up (239 samples, 0.01%) + + + +neigh_fill_mac (703 samples, 0.03%) + + + +inet_addr_equal (2,109 samples, 0.08%) + + + +__dp_vs_service_get (1,217 samples, 0.05%) + + + +rte_atomic32_dec_and_test (278 samples, 0.01%) + + + +dp_vs_schedule (44,455 samples, 1.69%) + + + +dp_vs_blklst_lookup (4,786 samples, 0.18%) + + + +dp_vs_stats_conn (241 samples, 0.01%) + + + +tcp_send_csum (1,556 samples, 0.06%) + + + +rte_atomic32_dec (736 samples, 0.03%) + + + +swapper (11,948 samples, 0.45%) + + + +rte_atomic32_dec_and_test (242 samples, 0.01%) + + + +rte_lcore_id (391 samples, 0.01%) + + + +tcp_in_adjust_seq (267 samples, 0.01%) + + + +neigh_fill_mac (755 samples, 0.03%) + + + +tcp_state_idx (838 samples, 0.03%) + + + +dpvs_time_rand_delay (511 samples, 0.02%) + + + +sys_futex (895 samples, 0.03%) + + + +dp_vs_conn_bind_dest (1,063 samples, 0.04%) + + + +rte_atomic32_dec_and_test (280 samples, 0.01%) + + + +list_del (790 samples, 0.03%) + + + +dp_vs_xmit_fnat (38,217 samples, 1.45%) + + + +lcore_process_redirect_ring (465 samples, 0.02%) + + + +rte_get_tsc_cycles (4,304 samples, 0.16%) + + + +route4_put (374 samples, 0.01%) + + + +dp_vs_service_hashkey (423 samples, 0.02%) + + + +inet_addr_equal (662 samples, 0.03%) + + + +__rte_raw_cksum (885 samples, 0.03%) + + + +rte_ipv4_phdr_cksum (2,218 samples, 0.08%) + + + +lcore_job_timer_manage (47,572 samples, 1.81%) +l.. + + +tcp_send_csum (2,956 samples, 0.11%) + + + +__dp_vs_service_get (1,202 samples, 0.05%) + + + +futex_wait (719 samples, 0.03%) + + + +__dp_vs_in (177,214 samples, 6.73%) +__dp_vs_in + + +netif_xmit (2,420 samples, 0.09%) + + + +__lll_lock_wait_private (294 samples, 0.01%) + + + +netif_deliver_mbuf (207,978 samples, 7.90%) +netif_deliv.. + + +rte_atomic32_inc (323 samples, 0.01%) + + + +net_cmp (277 samples, 0.01%) + + + +list_del (825 samples, 0.03%) + + + +system_call_after_swapgs (542 samples, 0.02%) + + + +rte_atomic32_dec_and_test (276 samples, 0.01%) + + + +af_inet_hooks (315 samples, 0.01%) + + + +mbuf_header_pointer (271 samples, 0.01%) + + + +rte_rdtsc (4,123 samples, 0.16%) + + + +lcore_process_packets (266 samples, 0.01%) + + + +rte_atomic32_dec_and_test (269 samples, 0.01%) + + + +dp_vs_synproxy_syn_rcv (3,468 samples, 0.13%) + + + +tcp_in_adjust_seq (275 samples, 0.01%) + + + +rte_arch_bswap32 (280 samples, 0.01%) + + + +tcp_secure_sequence_number (799 samples, 0.03%) + + + +dp_vs_blklst_lookup (4,701 samples, 0.18%) + + + +rte_pktmbuf_prepend (441 samples, 0.02%) + + + +dpvs_timer_sched (2,598 samples, 0.10%) + + + +neigh_confirm (3,699 samples, 0.14%) + + + +dp_vs_conn_put (8,891 samples, 0.34%) + + + +__list_add (379 samples, 0.01%) + + + +tcp_conn_expire (2,415 samples, 0.09%) + + + +dp_vs_proto_lookup (421 samples, 0.02%) + + + +dp_vs_conn_refresh_timer (7,408 samples, 0.28%) + + + +__lll_lock_wait_private (1,362 samples, 0.05%) + + + +af_inet_hooks (269 samples, 0.01%) + + + +rte_atomic32_dec_and_test (267 samples, 0.01%) + + + +dp_vs_service_lookup (964 samples, 0.04%) + + + +__list_add (361 samples, 0.01%) + + + +ipv4_rcv (198,582 samples, 7.54%) +ipv4_rcv + + +list_empty (355 samples, 0.01%) + + + +dp_vs_conn_attach_timer (2,822 samples, 0.11%) + + + +dp_vs_stats_out (1,420 samples, 0.05%) + + + +tcp_hdr (687 samples, 0.03%) + + + +rte_raw_cksum (1,158 samples, 0.04%) + + + +ipv4_output (4,872 samples, 0.19%) + + + +__list_add (438 samples, 0.02%) + + + +mbuf_header_pointer (424 samples, 0.02%) + + + +__dp_vs_conn_hash (5,938 samples, 0.23%) + + + +ip4_is_frag (315 samples, 0.01%) + + + +dp_vs_conn_refresh_timer (7,216 samples, 0.27%) + + + +rte_is_zero_ether_addr (361 samples, 0.01%) + + + +lcore-worker-6 (327,311 samples, 12.43%) +lcore-worker-6 + + +dpvs_job_loop (320,814 samples, 12.18%) +dpvs_job_loop + + +system_call_after_swapgs (491 samples, 0.02%) + + + +tcp_hdr (270 samples, 0.01%) + + + +rte_ipv4_phdr_cksum (3,303 samples, 0.13%) + + + +neigh_key_cmp (447 samples, 0.02%) + + + +dp_vs_fast_outxmit_fnat (19,098 samples, 0.73%) + + + +rte_arch_bswap16 (988 samples, 0.04%) + + + +rte_lcore_id (525 samples, 0.02%) + + + +rte_eth_tx_burst (7,237 samples, 0.27%) + + + +tcp_in_remove_ts (623 samples, 0.02%) + + + +tcp_conn_lookup (33,669 samples, 1.28%) + + + +inet_addr_ifa_put (298 samples, 0.01%) + + + +sys_futex (849 samples, 0.03%) + + + +rte_eth_tx_burst (7,369 samples, 0.28%) + + + +system_call_fastpath (827 samples, 0.03%) + + + +rte_is_zero_ether_addr (568 samples, 0.02%) + + + +rte_prefetch0 (296 samples, 0.01%) + + + +lcore_process_packets (215,222 samples, 8.17%) +lcore_proce.. + + +idev_put (802 samples, 0.03%) + + + +sa_fetch (17,698 samples, 0.67%) + + + +dp_vs_conn_expire (35,794 samples, 1.36%) + + + +__dp_vs_conn_hash (6,170 samples, 0.23%) + + + +dp_vs_service_lookup (933 samples, 0.04%) + + + +list_empty (415 samples, 0.02%) + + + +__list_add (488 samples, 0.02%) + + + +rte_arch_bswap16 (276 samples, 0.01%) + + + +__clock_gettime (261 samples, 0.01%) + + + +ipv4_output_fin2 (4,165 samples, 0.16%) + + + +timeval_to_ticks (697 samples, 0.03%) + + + +netif_xmit (1,313 samples, 0.05%) + + + +dp_vs_conn_resend_packets (975 samples, 0.04%) + + + +dev_get_idev (1,209 samples, 0.05%) + + + +list_del (1,639 samples, 0.06%) + + + +inet_addr_ifa_put (369 samples, 0.01%) + + + +rte_atomic32_inc (338 samples, 0.01%) + + + +[libcrypto.so.1.0.2k] (305 samples, 0.01%) + + + +rte_lcore_id (423 samples, 0.02%) + + + +ipv4_output_fin (4,414 samples, 0.17%) + + + +dp_vs_conn_hashkey (4,988 samples, 0.19%) + + + +rte_prefetch0 (270 samples, 0.01%) + + + +list_add (2,394 samples, 0.09%) + + + +rte_pktmbuf_adj (663 samples, 0.03%) + + + +tcp_hdr (312 samples, 0.01%) + + + +route_out_net_lookup (698 samples, 0.03%) + + + +tcp_fnat_in_handler (7,405 samples, 0.28%) + + + +sa_pool_release (3,133 samples, 0.12%) + + + +__dp_vs_fast_outxmit_fnat4 (18,065 samples, 0.69%) + + + +inet_addr_equal (446 samples, 0.02%) + + + +dp_vs_fast_xmit_fnat (19,082 samples, 0.72%) + + + +__rte_jhash_3words (4,266 samples, 0.16%) + + + +rte_atomic32_dec (4,786 samples, 0.18%) + + + +__dp_vs_fast_outxmit_fnat4 (17,873 samples, 0.68%) + + + +dp_vs_proto_lookup (371 samples, 0.01%) + + + +dp_vs_conn_hash (6,116 samples, 0.23%) + + + +__dpvs_timer_sched (3,861 samples, 0.15%) + + + +rte_jhash_3words (4,624 samples, 0.18%) + + + +__memset_sse2 (591 samples, 0.02%) + + + +__rte_raw_cksum (909 samples, 0.03%) + + + +sa_pool_fetch (2,423 samples, 0.09%) + + + +try_to_wake_up (240 samples, 0.01%) + + + +tcp_fnat_in_handler (13,548 samples, 0.51%) + + + +inet_addr_ifa_put (334 samples, 0.01%) + + + +rte_atomic32_dec (284 samples, 0.01%) + + + +__dp_vs_pre_routing (7,116 samples, 0.27%) + + + +blklst_hashkey (1,576 samples, 0.06%) + + + +lcore_process_arp_ring (5,309 samples, 0.20%) + + + +route4_output (1,638 samples, 0.06%) + + + +lcore_job_timer_manage (47,721 samples, 1.81%) +l.. + + +rte_atomic32_inc (1,084 samples, 0.04%) + + + +rte_atomic32_dec (723 samples, 0.03%) + + + +netif_update_worker_loop_cnt (391 samples, 0.01%) + + + +tcp_in_init_seq (1,779 samples, 0.07%) + + + +do_lcore_job (309,680 samples, 11.76%) +do_lcore_job + + +netif_xmit (3,483 samples, 0.13%) + + + +__memset_sse2 (375 samples, 0.01%) + + + +__dpvs_timer_sched (2,294 samples, 0.09%) + + + +ip4_hdrlen (880 samples, 0.03%) + + + +INIT_LIST_HEAD (345 samples, 0.01%) + + + +rte_atomic32_dec (1,485 samples, 0.06%) + + + +__dpvs_timer_sched (4,123 samples, 0.16%) + + + +tcp_fnat_in_handler (13,606 samples, 0.52%) + + + +mbuf_header_pointer (456 samples, 0.02%) + + + +rte_atomic32_dec (694 samples, 0.03%) + + + +dpvs_time_rand_delay (484 samples, 0.02%) + + + +msg_slave_process (5,039 samples, 0.19%) + + + +rte_is_zero_ether_addr (402 samples, 0.02%) + + + +rte_lcore_id (582 samples, 0.02%) + + + +rte_atomic32_inc (481 samples, 0.02%) + + + +dp_vs_stats_out (1,407 samples, 0.05%) + + + +inet_addr_ifa_get (11,006 samples, 0.42%) + + + +tcp_conn_expire (2,561 samples, 0.10%) + + + +netif_port_get (318 samples, 0.01%) + + + +[libcrypto.so.1.0.2k] (300 samples, 0.01%) + + + +rte_eth_tx_burst (7,372 samples, 0.28%) + + + +ip4_hdrlen (333 samples, 0.01%) + + + +INET_HOOK (190,131 samples, 7.22%) +INET_HOOK + + +inet_addr_equal (431 samples, 0.02%) + + + +dp_vs_pre_routing (7,808 samples, 0.30%) + + + +dp_vs_stats_in (1,280 samples, 0.05%) + + + +rte_prefetch0 (312 samples, 0.01%) + + + +netif_hard_xmit (1,968 samples, 0.07%) + + + +list_empty (361 samples, 0.01%) + + + +tcp_fnat_out_handler (11,425 samples, 0.43%) + + + +dp_vs_conn_unhash (6,675 samples, 0.25%) + + + +mbuf_may_pull (440 samples, 0.02%) + + + +tcp_fnat_out_handler (11,020 samples, 0.42%) + + + +list_del (1,600 samples, 0.06%) + + + +ixgbe_recv_pkts_bulk_alloc (7,644 samples, 0.29%) + + + +dev_get_idev (1,391 samples, 0.05%) + + + +dp_vs_rr_schedule (930 samples, 0.04%) + + + +rte_atomic32_inc (2,761 samples, 0.10%) + + + +dp_vs_conn_hashkey (1,528 samples, 0.06%) + + + +netif_xmit (1,190 samples, 0.05%) + + + +rte_atomic32_dec_and_test (300 samples, 0.01%) + + + +netif_hard_xmit (1,860 samples, 0.07%) + + + +tcp_fnat_out_handler (11,337 samples, 0.43%) + + + +put_laddr (303 samples, 0.01%) + + + +rte_pktmbuf_prepend (446 samples, 0.02%) + + + +__dp_vs_out_xmit_fnat4 (21,469 samples, 0.82%) + + + +dpvs_timer_update (6,165 samples, 0.23%) + + + +this_lcore_sched (243 samples, 0.01%) + + + +__list_del_entry (264 samples, 0.01%) + + + +rte_arch_bswap32 (950 samples, 0.04%) + + + +rte_arch_bswap16 (990 samples, 0.04%) + + + +__lll_unlock_wake_private (1,660 samples, 0.06%) + + + +rte_atomic32_dec_and_test (296 samples, 0.01%) + + + +get_level_ticks (381 samples, 0.01%) + + + +netif_port_get (278 samples, 0.01%) + + + +system_call_after_swapgs (243 samples, 0.01%) + + + +INET_HOOK (4,707 samples, 0.18%) + + + +tcp_in_adjust_seq (347 samples, 0.01%) + + + +tcp_state_trans (7,731 samples, 0.29%) + + + +ifa_lookup (7,488 samples, 0.28%) + + + +whtlst_hashkey (1,400 samples, 0.05%) + + + +tcp_send_csum (3,133 samples, 0.12%) + + + +rte_is_zero_ether_addr (519 samples, 0.02%) + + + +ipv4_output_fin2 (4,288 samples, 0.16%) + + + +dp_vs_dest_is_avail (410 samples, 0.02%) + + + +list_move_tail (518 samples, 0.02%) + + + +rte_atomic32_inc (1,700 samples, 0.06%) + + + +neigh_fill_mac (664 samples, 0.03%) + + + +__list_add (476 samples, 0.02%) + + + +__dp_vs_fast_outxmit_fnat4 (18,266 samples, 0.69%) + + + +rte_atomic32_inc (319 samples, 0.01%) + + + +tcp_in_adjust_seq (335 samples, 0.01%) + + + +system_call_fastpath (1,127 samples, 0.04%) + + + +lcore_process_arp_ring (5,217 samples, 0.20%) + + + +dp_vs_conn_alloc (1,284 samples, 0.05%) + + + +tcp_fnat_in_handler (7,729 samples, 0.29%) + + + +ip4_hdr (388 samples, 0.01%) + + + +[unknown] (1,837 samples, 0.07%) + + + +neigh_entry_state_trans (610 samples, 0.02%) + + + +netif_port_get (275 samples, 0.01%) + + + +qsch_sched_all (225 samples, 0.01%) + + + +neigh_output (3,397 samples, 0.13%) + + + +rte_atomic32_read (297 samples, 0.01%) + + + +rte_timer_manage (37,204 samples, 1.41%) + + + +tcp_send_csum (3,029 samples, 0.12%) + + + +dp_vs_synproxy_syn_rcv (3,311 samples, 0.13%) + + + +timeval_to_ticks (638 samples, 0.02%) + + + +lcore_process_arp_ring (5,492 samples, 0.21%) + + + +__clock_gettime (290 samples, 0.01%) + + + +netif_deliver_mbuf (207,071 samples, 7.86%) +netif_deliv.. + + +inet_addr_equal (2,579 samples, 0.10%) + + + +__lll_lock_wait_private (1,435 samples, 0.05%) + + + +__dp_vs_fast_outxmit_fnat4 (18,937 samples, 0.72%) + + + +sa_pool_release (2,969 samples, 0.11%) + + + +dp_vs_conn_hashkey (1,563 samples, 0.06%) + + + +system_call_after_swapgs (314 samples, 0.01%) + + + +ip4_hdr (368 samples, 0.01%) + + + +mbuf_header_pointer (364 samples, 0.01%) + + + +tcp_send_csum (4,524 samples, 0.17%) + + + +inet_addr_equal (2,461 samples, 0.09%) + + + +futex_wait_setup (500 samples, 0.02%) + + + +[libcrypto.so.1.0.2k] (308 samples, 0.01%) + + + +dp_vs_conn_is_in_timer (291 samples, 0.01%) + + + +sys_futex (864 samples, 0.03%) + + + +dp_vs_conn_hashkey (5,188 samples, 0.20%) + + + +rte_atomic32_dec (4,778 samples, 0.18%) + + + +rte_pktmbuf_trim (581 samples, 0.02%) + + + +tcp_send_csum (4,615 samples, 0.18%) + + + +__list_add (284 samples, 0.01%) + + + +ip4_hdrlen (949 samples, 0.04%) + + + +mbuf_may_pull (551 samples, 0.02%) + + + +netif_tx_burst (8,328 samples, 0.32%) + + + +ipv4_rcv (196,523 samples, 7.46%) +ipv4_rcv + + +do_lcore_job (745 samples, 0.03%) + + + +__schedule (1,752 samples, 0.07%) + + + +list_add_tail (1,142 samples, 0.04%) + + + +INET_HOOK (5,345 samples, 0.20%) + + + +rte_atomic32_dec_and_test (283 samples, 0.01%) + + + +lcore_process_redirect_ring (226 samples, 0.01%) + + + +ip4_hdr (362 samples, 0.01%) + + + +rte_ipv4_phdr_cksum (3,325 samples, 0.13%) + + + +neigh_confirm (3,654 samples, 0.14%) + + + +put_laddr (278 samples, 0.01%) + + + +route_out_local_lookup (839 samples, 0.03%) + + + +tcp_secure_sequence_number (848 samples, 0.03%) + + + +rte_is_zero_ether_addr (361 samples, 0.01%) + + + +mbuf_header_pointer (239 samples, 0.01%) + + + +rte_atomic32_inc (486 samples, 0.02%) + + + +__get_laddr (7,882 samples, 0.30%) + + + +rte_atomic32_inc (1,343 samples, 0.05%) + + + +eth_type_parse (1,284 samples, 0.05%) + + + +tcp_in_adjust_seq (258 samples, 0.01%) + + + +inet_addr_equal (424 samples, 0.02%) + + + +tcp_out_save_seq (750 samples, 0.03%) + + + +neigh_hashkey (647 samples, 0.02%) + + + +qsch_sched_all (744 samples, 0.03%) + + + +rte_is_zero_ether_addr (525 samples, 0.02%) + + + +rte_atomic32_inc (2,813 samples, 0.11%) + + + +inet_addr_ifa_get (10,757 samples, 0.41%) + + + +dp_vs_conn_put (8,684 samples, 0.33%) + + + +rte_ipv4_phdr_cksum (1,051 samples, 0.04%) + + + +whtlst_hashkey (1,356 samples, 0.05%) + + + +rte_arch_bswap16 (342 samples, 0.01%) + + + +qsch_sched_all (677 samples, 0.03%) + + + +lcore_stats_burst (1,016 samples, 0.04%) + + + +rte_atomic32_inc (483 samples, 0.02%) + + + +eth_type_parse (1,293 samples, 0.05%) + + + +lcore_job_xmit (12,072 samples, 0.46%) + + + +dp_vs_synproxy_syn_rcv (3,389 samples, 0.13%) + + + +tcp_conn_sched (46,251 samples, 1.76%) + + + +dp_vs_rr_schedule (974 samples, 0.04%) + + + +list_empty (2,239 samples, 0.09%) + + + +inet_addr_equal (2,096 samples, 0.08%) + + + +ifa_put (287 samples, 0.01%) + + + +neigh_lookup_entry (687 samples, 0.03%) + + + +dp_vs_xmit_fnat (36,948 samples, 1.40%) + + + +rte_is_zero_ether_addr (441 samples, 0.02%) + + + +lcore_process_packets (215,475 samples, 8.18%) +lcore_proce.. + + +dev_get_idev (1,440 samples, 0.05%) + + + +lcore_process_arp_ring (4,960 samples, 0.19%) + + + +system_call_after_swapgs (251 samples, 0.01%) + + + +mbuf_header_pointer (269 samples, 0.01%) + + + +__dp_vs_xmit_fnat4 (36,761 samples, 1.40%) + + + +futex_wait_setup (528 samples, 0.02%) + + + +netif_port_get (268 samples, 0.01%) + + + +do_futex (815 samples, 0.03%) + + + +list_move_tail (357 samples, 0.01%) + + + +dp_vs_conn_detach_timer (360 samples, 0.01%) + + + +mbuf_may_pull (601 samples, 0.02%) + + + +netif_tx_burst (8,348 samples, 0.32%) + + + +__vdso_clock_gettime (470 samples, 0.02%) + + + +dp_vs_conn_is_in_timer (238 samples, 0.01%) + + + +timer_expire (37,092 samples, 1.41%) + + + +OPENSSL_cleanse (224 samples, 0.01%) + + + +list_move_tail (361 samples, 0.01%) + + + +dp_vs_fill_iphdr (2,023 samples, 0.08%) + + + +rte_raw_cksum (573 samples, 0.02%) + + + +tcp_in_adjust_seq (343 samples, 0.01%) + + + +lcore_process_packets (216,815 samples, 8.23%) +lcore_proce.. + + +rte_raw_cksum (1,242 samples, 0.05%) + + + +sa4_fetch (17,415 samples, 0.66%) + + + +dp_vs_out_xmit_fnat (22,038 samples, 0.84%) + + + +OPENSSL_cleanse (285 samples, 0.01%) + + + +tcp_send_csum (1,580 samples, 0.06%) + + + +dp_vs_fill_iphdr (1,746 samples, 0.07%) + + + +__dpvs_timer_sched (2,274 samples, 0.09%) + + + +[libcrypto.so.1.0.2k] (2,773 samples, 0.11%) + + + +ifa_lookup (7,728 samples, 0.29%) + + + +dp_vs_conn_hashkey (5,119 samples, 0.19%) + + + +__rte_raw_cksum (312 samples, 0.01%) + + + +tcp_fnat_in_handler (7,867 samples, 0.30%) + + + +dp_vs_service_lookup (939 samples, 0.04%) + + + +rte_atomic32_dec (1,471 samples, 0.06%) + + + +__rte_jhash_3words (1,350 samples, 0.05%) + + + +__get_laddr (8,095 samples, 0.31%) + + + +tcp_hdr (290 samples, 0.01%) + + + +_raw_spin_unlock_irqrestore (239 samples, 0.01%) + + + +rte_atomic32_inc (236 samples, 0.01%) + + + +dp_vs_out_xmit_fnat (21,351 samples, 0.81%) + + + +dpvs_timer_sched (2,729 samples, 0.10%) + + + +tcp_send_csum (1,508 samples, 0.06%) + + + +dp_vs_redirect_ring_proc (227 samples, 0.01%) + + + +msg_slave_process (5,081 samples, 0.19%) + + + +dp_vs_conn_unhash (6,497 samples, 0.25%) + + + +dp_vs_fast_outxmit_fnat (18,728 samples, 0.71%) + + + +__memset_sse2 (389 samples, 0.01%) + + + +msg_slave_process (5,292 samples, 0.20%) + + + +tcp_conn_lookup (33,597 samples, 1.28%) + + + +ipv4_output (4,967 samples, 0.19%) + + + +netif_hard_xmit (1,010 samples, 0.04%) + + + +dp_vs_fast_outxmit_fnat (18,767 samples, 0.71%) + + + +list_add_tail (757 samples, 0.03%) + + + +rte_atomic32_inc (451 samples, 0.02%) + + + +dp_vs_whtlst_allow (4,627 samples, 0.18%) + + + +rte_pktmbuf_append (301 samples, 0.01%) + + + +dp_vs_stats_out (1,318 samples, 0.05%) + + + +dpvs_time_rand_delay (312 samples, 0.01%) + + + +dpvs_job_loop (321,131 samples, 12.19%) +dpvs_job_loop + + +do_futex (1,076 samples, 0.04%) + + + +ip4_hdrlen (344 samples, 0.01%) + + + +ip4_is_frag (543 samples, 0.02%) + + + +rte_is_zero_ether_addr (435 samples, 0.02%) + + + +netif_xmit (2,411 samples, 0.09%) + + + +__list_del_entry (267 samples, 0.01%) + + + +dp_vs_conn_unbind_dest (724 samples, 0.03%) + + + +__dp_vs_fast_xmit_fnat4 (18,852 samples, 0.72%) + + + +idev_put (824 samples, 0.03%) + + + +netif_hard_xmit (3,035 samples, 0.12%) + + + +whtlst_hashkey (1,319 samples, 0.05%) + + + +route4_put (376 samples, 0.01%) + + + +qsch_sched_all (811 samples, 0.03%) + + + +pkt_type_get (3,633 samples, 0.14%) + + + +list_del (829 samples, 0.03%) + + + +rte_jhash_3words (1,355 samples, 0.05%) + + + +__drand48_iterate (618 samples, 0.02%) + + + +ipv4_output (5,118 samples, 0.19%) + + + +dpvs_time_rand_delay (527 samples, 0.02%) + + + +schedule_preempt_disabled (1,781 samples, 0.07%) + + + +dp_vs_laddr_unbind (19,205 samples, 0.73%) + + + +tcp_in_init_seq (1,711 samples, 0.06%) + + + +dp_vs_service_lookup (981 samples, 0.04%) + + + +rte_arch_bswap32 (1,009 samples, 0.04%) + + + +__dp_vs_fast_xmit_fnat4 (18,733 samples, 0.71%) + + + +system_call_fastpath (850 samples, 0.03%) + + + +ip4_is_frag (560 samples, 0.02%) + + + +idev_put (826 samples, 0.03%) + + + +qsch_sched_all (729 samples, 0.03%) + + + +inet_addr_ifa_put (342 samples, 0.01%) + + + +__rte_raw_cksum_reduce (554 samples, 0.02%) + + + +system_call_after_swapgs (385 samples, 0.01%) + + + +rte_atomic32_inc (330 samples, 0.01%) + + + +system_call_after_swapgs (475 samples, 0.02%) + + + +sa_pool_hash (395 samples, 0.01%) + + + +dp_vs_service_hashkey (378 samples, 0.01%) + + + +__list_del (537 samples, 0.02%) + + + +ip4_hdr (244 samples, 0.01%) + + + +__list_add (248 samples, 0.01%) + + + +rte_get_tsc_cycles (4,051 samples, 0.15%) + + + +rte_atomic32_read (283 samples, 0.01%) + + + +rte_arch_bswap16 (630 samples, 0.02%) + + + +ifa_lookup (7,963 samples, 0.30%) + + + +list_del (818 samples, 0.03%) + + + +rte_lcore_id (433 samples, 0.02%) + + + +dp_vs_dest_is_avail (561 samples, 0.02%) + + + +tcp_send_csum (2,997 samples, 0.11%) + + + +ip4_hdr (326 samples, 0.01%) + + + +wake_up_q (243 samples, 0.01%) + + + +get_level_ticks (255 samples, 0.01%) + + + +__rte_jhash_3words (1,328 samples, 0.05%) + + + +inet_addr_ifa_put (341 samples, 0.01%) + + + +rte_eth_tx_burst (7,546 samples, 0.29%) + + + +dp_vs_dest_is_avail (546 samples, 0.02%) + + + +sa_pool_hash (390 samples, 0.01%) + + + +dp_vs_out_xmit_fnat (20,883 samples, 0.79%) + + + +tcp_in_adjust_seq (397 samples, 0.02%) + + + +INET_HOOK (5,256 samples, 0.20%) + + + +lcore_job_recv_fwd (240,432 samples, 9.13%) +lcore_job_rec.. + + +ifa_put (326 samples, 0.01%) + + + +__memset_sse2 (382 samples, 0.01%) + + + +dp_vs_laddr_bind (26,783 samples, 1.02%) + + + +dp_vs_synproxy_snat_handler (434 samples, 0.02%) + + + +sa_pool_hash (443 samples, 0.02%) + + + +sys_futex (806 samples, 0.03%) + + + +dp_vs_conn_hash (6,394 samples, 0.24%) + + + +list_del (791 samples, 0.03%) + + + +__clock_gettime (257 samples, 0.01%) + + + +rte_raw_cksum (521 samples, 0.02%) + + + +rte_atomic32_dec (264 samples, 0.01%) + + + +tcp_state_trans (7,626 samples, 0.29%) + + + +rte_arch_bswap16 (324 samples, 0.01%) + + + +idev_put (706 samples, 0.03%) + + + +__random (3,075 samples, 0.12%) + + + +netif_xmit (3,806 samples, 0.14%) + + + +sa_fetch (17,686 samples, 0.67%) + + + +net_cmp (295 samples, 0.01%) + + + +dp_vs_conn_hash (6,300 samples, 0.24%) + + + +rte_pktmbuf_append (335 samples, 0.01%) + + + +INIT_LIST_HEAD (306 samples, 0.01%) + + + +__list_del_entry (299 samples, 0.01%) + + + +rte_ipv4_phdr_cksum (1,009 samples, 0.04%) + + + +__vdso_clock_gettime (514 samples, 0.02%) + + + +ip4_hdrlen (377 samples, 0.01%) + + + +rte_atomic32_inc (1,454 samples, 0.06%) + + + +__dpvs_timer_sched (2,095 samples, 0.08%) + + + +rte_atomic32_dec_and_test (252 samples, 0.01%) + + + +__dp_vs_pre_routing (7,213 samples, 0.27%) + + + +netif_port_get (298 samples, 0.01%) + + + +list_move_tail (385 samples, 0.01%) + + + +__dp_vs_fast_outxmit_fnat4 (18,578 samples, 0.71%) + + + +ipv4_output_fin (4,278 samples, 0.16%) + + + +rte_atomic32_dec (792 samples, 0.03%) + + + +do_lcore_job (309,786 samples, 11.76%) +do_lcore_job + + +dp_vs_conn_hashkey (1,545 samples, 0.06%) + + + +sys_futex (839 samples, 0.03%) + + + +dp_vs_conn_resend_packets (959 samples, 0.04%) + + + +dpvs_timer_sched (2,652 samples, 0.10%) + + + +ip4_hdrlen (801 samples, 0.03%) + + + +dp_vs_synproxy_snat_handler (379 samples, 0.01%) + + + +__memset_sse2 (592 samples, 0.02%) + + + +ip4_hdr (368 samples, 0.01%) + + + +__rte_raw_cksum_reduce (546 samples, 0.02%) + + + +dpvs_timer_update (5,965 samples, 0.23%) + + + +dp_vs_conn_is_in_timer (288 samples, 0.01%) + + + +tcp_conn_sched (46,692 samples, 1.77%) + + + +rte_arch_bswap32 (295 samples, 0.01%) + + + +inet_addr_equal (388 samples, 0.01%) + + + +dp_vs_laddr_unbind (19,099 samples, 0.73%) + + + +dp_vs_conn_put (9,187 samples, 0.35%) + + + +mbuf_may_pull (497 samples, 0.02%) + + + +system_call_after_swapgs (520 samples, 0.02%) + + + +dp_vs_synproxy_snat_handler (428 samples, 0.02%) + + + +tcp_in_add_toa (2,247 samples, 0.09%) + + + +__random_r (1,114 samples, 0.04%) + + + +do_lcore_job (672 samples, 0.03%) + + + +sa4_fetch (17,821 samples, 0.68%) + + + +neigh_hashkey (259 samples, 0.01%) + + + +ixgbe_recv_pkts_bulk_alloc (7,451 samples, 0.28%) + + + +rte_arch_bswap16 (535 samples, 0.02%) + + + +neigh_key_cmp (649 samples, 0.02%) + + + +ifa_put (324 samples, 0.01%) + + + +INIT_LIST_HEAD (359 samples, 0.01%) + + + +__dp_vs_pre_routing (7,044 samples, 0.27%) + + + +rte_atomic32_inc (261 samples, 0.01%) + + + +netif_rx_burst (11,996 samples, 0.46%) + + + +tcp_state_idx (786 samples, 0.03%) + + + +__lll_lock_wait_private (251 samples, 0.01%) + + + +system_call_fastpath (1,202 samples, 0.05%) + + + +dp_vs_synproxy_syn_rcv (3,382 samples, 0.13%) + + + +__list_add (246 samples, 0.01%) + + + +tcp_in_add_toa (2,246 samples, 0.09%) + + + +rte_lcore_id (341 samples, 0.01%) + + + +tcp_in_remove_ts (547 samples, 0.02%) + + + +dp_vs_conn_new (42,261 samples, 1.60%) + + + +dp_vs_schedule (45,085 samples, 1.71%) + + + +dp_vs_in (178,528 samples, 6.78%) +dp_vs_in + + +rte_raw_cksum (599 samples, 0.02%) + + + +rte_atomic32_dec (255 samples, 0.01%) + + + +__list_add (527 samples, 0.02%) + + + +dp_vs_conn_hashkey (1,558 samples, 0.06%) + + + +ipv4_output (5,010 samples, 0.19%) + + + +futex_wait_setup (467 samples, 0.02%) + + + +ip4_hdr (321 samples, 0.01%) + + + +__lll_lock_wait_private (1,356 samples, 0.05%) + + + +ip4_hdr (344 samples, 0.01%) + + + +rte_eth_tx_burst (7,545 samples, 0.29%) + + + +system_call_after_swapgs (251 samples, 0.01%) + + + +pkt_type_get (3,425 samples, 0.13%) + + + +dp_vs_dest_is_avail (472 samples, 0.02%) + + + +rte_ipv4_phdr_cksum (2,304 samples, 0.09%) + + + +ip4_hdr (315 samples, 0.01%) + + + +netif_xmit (2,441 samples, 0.09%) + + + +ixgbe_xmit_pkts (7,045 samples, 0.27%) + + + +rte_atomic32_dec_and_test (265 samples, 0.01%) + + + +put_laddr (307 samples, 0.01%) + + + +__dp_vs_in (178,867 samples, 6.79%) +__dp_vs_in + + +dp_vs_conn_put (8,841 samples, 0.34%) + + + +timer_expire (36,320 samples, 1.38%) + + + +dp_vs_dest_is_avail (376 samples, 0.01%) + + + +inet_addr_equal (437 samples, 0.02%) + + + +rte_pktmbuf_prepend (251 samples, 0.01%) + + + +rte_get_timer_cycles (4,464 samples, 0.17%) + + + +list_del (782 samples, 0.03%) + + + +dp_vs_service_lookup (1,285 samples, 0.05%) + + + +rte_arch_bswap16 (234 samples, 0.01%) + + + +dp_vs_conn_expire (36,490 samples, 1.39%) + + + +__random_r (831 samples, 0.03%) + + + +ip4_is_frag (289 samples, 0.01%) + + + +list_empty (2,500 samples, 0.09%) + + + +netif_xmit (2,359 samples, 0.09%) + + + +lcore_process_packets (257 samples, 0.01%) + + + +ifa_lookup (7,684 samples, 0.29%) + + + +rte_atomic32_inc (1,590 samples, 0.06%) + + + +xmit_outbound (32,530 samples, 1.24%) + + + +xmit_inbound (48,280 samples, 1.83%) +x.. + + +dpvs_job_loop (322,590 samples, 12.25%) +dpvs_job_loop + + +tcp_in_adjust_seq (363 samples, 0.01%) + + + +dp_vs_dest_is_avail (631 samples, 0.02%) + + + +mbuf_header_pointer (418 samples, 0.02%) + + + +rte_ether_addr_copy (476 samples, 0.02%) + + + +dp_vs_fast_xmit_fnat (19,389 samples, 0.74%) + + + +dp_vs_conn_unhash (6,635 samples, 0.25%) + + + +dp_vs_conn_bind_dest (1,036 samples, 0.04%) + + + +sa_pool_destroy (3,514 samples, 0.13%) + + + +list_move_tail (383 samples, 0.01%) + + + +inet_addr_equal (2,324 samples, 0.09%) + + + +mbuf_header_pointer (397 samples, 0.02%) + + + +mbuf_header_pointer (371 samples, 0.01%) + + + +dp_vs_schedule (44,644 samples, 1.70%) + + + +OPENSSL_cleanse (255 samples, 0.01%) + + + +neigh_key_cmp (418 samples, 0.02%) + + + +rte_atomic32_dec (802 samples, 0.03%) + + + +ip4_hdrlen (488 samples, 0.02%) + + + +ip4_hdrlen (876 samples, 0.03%) + + + +lcore-worker-4 (327,176 samples, 12.42%) +lcore-worker-4 + + +rte_atomic32_dec_and_test (289 samples, 0.01%) + + + +mbuf_header_pointer (488 samples, 0.02%) + + + +[libcrypto.so.1.0.2k] (2,770 samples, 0.11%) + + + +dp_vs_conn_refresh_timer (7,196 samples, 0.27%) + + + +ip4_hdr (406 samples, 0.02%) + + + +rte_atomic32_read (329 samples, 0.01%) + + + +inet_addr_ifa_get (10,610 samples, 0.40%) + + + +rte_atomic32_inc (618 samples, 0.02%) + + + +dp_vs_save_outxmit_info (1,393 samples, 0.05%) + + + +dp_vs_conn_hashkey (1,560 samples, 0.06%) + + + +[libcrypto.so.1.0.2k] (2,920 samples, 0.11%) + + + +rte_is_zero_ether_addr (340 samples, 0.01%) + + + +ifa_lookup (7,782 samples, 0.30%) + + + +whtlst_hashkey (1,369 samples, 0.05%) + + + +__lll_lock_wait_private (249 samples, 0.01%) + + + +idev_put (864 samples, 0.03%) + + + +rte_is_zero_ether_addr (361 samples, 0.01%) + + + +__lll_unlock_wake_private (1,774 samples, 0.07%) + + + +netif_hard_xmit (2,997 samples, 0.11%) + + + +neigh_hashkey (240 samples, 0.01%) + + + +__dp_vs_in (179,330 samples, 6.81%) +__dp_vs_in + + +tcp_fnat_in_handler (7,686 samples, 0.29%) + + + +qsch_sched_all (711 samples, 0.03%) + + + +timeval_to_ticks (672 samples, 0.03%) + + + +rte_atomic32_inc (263 samples, 0.01%) + + + +mbuf_header_pointer (460 samples, 0.02%) + + + +lcore_job_recv_fwd (240,394 samples, 9.13%) +lcore_job_rec.. + + +dp_vs_conn_is_in_timer (256 samples, 0.01%) + + + +rte_ether_addr_copy (431 samples, 0.02%) + + + +__memset_sse2 (341 samples, 0.01%) + + + +rte_lcore_id (353 samples, 0.01%) + + + +dp_vs_fill_iphdr (1,778 samples, 0.07%) + + + +list_empty (317 samples, 0.01%) + + + +tcp_out_save_seq (805 samples, 0.03%) + + + +dp_vs_stats_in (1,347 samples, 0.05%) + + + +neigh_lookup_entry (779 samples, 0.03%) + + + +rte_timer_manage (37,211 samples, 1.41%) + + + +__lll_unlock_wake_private (542 samples, 0.02%) + + + +af_inet_hooks (253 samples, 0.01%) + + + +netif_port_get (269 samples, 0.01%) + + + +rte_arch_bswap16 (257 samples, 0.01%) + + + +rte_pktmbuf_append (288 samples, 0.01%) + + + +tcp_in_add_toa (8,175 samples, 0.31%) + + + +rte_timer_manage (37,736 samples, 1.43%) + + + +rte_raw_cksum (1,707 samples, 0.06%) + + + +rte_atomic32_inc (499 samples, 0.02%) + + + +dp_vs_conn_unbind_dest (782 samples, 0.03%) + + + +dp_vs_dest_is_avail (554 samples, 0.02%) + + + +tcp_out_adjust_seq (1,445 samples, 0.05%) + + + +dp_vs_out_xmit_fnat (21,091 samples, 0.80%) + + + +ipv4_output_fin (4,488 samples, 0.17%) + + + +do_futex (745 samples, 0.03%) + + + +[libcrypto.so.1.0.2k] (300 samples, 0.01%) + + + +rte_atomic32_inc (2,815 samples, 0.11%) + + + +dp_vs_synproxy_snat_handler (420 samples, 0.02%) + + + +sa_release (18,274 samples, 0.69%) + + + +rte_arch_bswap16 (261 samples, 0.01%) + + + +lcore_stats_burst (1,039 samples, 0.04%) + + + +ip4_hdrlen (521 samples, 0.02%) + + + +__clock_gettime (613 samples, 0.02%) + + + +sa_pool_hash (432 samples, 0.02%) + + + +ip4_hdrlen (872 samples, 0.03%) + + + +tcp_secure_sequence_number (958 samples, 0.04%) + + + +__dp_vs_pre_routing (7,289 samples, 0.28%) + + + +sa_pool_destroy (3,515 samples, 0.13%) + + + +INET_HOOK (4,561 samples, 0.17%) + + + +timeval_to_ticks (285 samples, 0.01%) + + + +rte_atomic32_dec_and_test (296 samples, 0.01%) + + + +__random_r (1,049 samples, 0.04%) + + + +rte_atomic32_inc (374 samples, 0.01%) + + + +futex_wait (747 samples, 0.03%) + + + +__memset_sse2 (300 samples, 0.01%) + + + +get_level_ticks (236 samples, 0.01%) + + + +dp_vs_fill_iphdr (1,986 samples, 0.08%) + + + +rte_ether_addr_copy (353 samples, 0.01%) + + + +timeval_to_ticks (729 samples, 0.03%) + + + +rte_ether_addr_copy (357 samples, 0.01%) + + + +__list_add (485 samples, 0.02%) + + + +tcp_hdr (686 samples, 0.03%) + + + +rte_pktmbuf_prepend (398 samples, 0.02%) + + + +rte_atomic32_dec (435 samples, 0.02%) + + + +eth_addr_equal (979 samples, 0.04%) + + + +tcp_fnat_in_handler (7,840 samples, 0.30%) + + + +route4_output (1,791 samples, 0.07%) + + + +sa_pool_release (3,113 samples, 0.12%) + + + +dp_vs_conn_refresh_timer (6,714 samples, 0.25%) + + + +tcp_hdr (464 samples, 0.02%) + + + +dp_vs_conn_alloc (1,353 samples, 0.05%) + + + +mbuf_header_pointer (519 samples, 0.02%) + + + +route4_put (346 samples, 0.01%) + + + +ip4_hdrlen (431 samples, 0.02%) + + + +sa_pool_destroy (3,446 samples, 0.13%) + + + +rte_atomic32_dec (772 samples, 0.03%) + + + +tcp_in_add_toa (8,202 samples, 0.31%) + + + +tcp_hdr (285 samples, 0.01%) + + + +OPENSSL_cleanse (280 samples, 0.01%) + + + +inet_addr_equal (2,069 samples, 0.08%) + + + +dp_vs_out_xmit_fnat (21,359 samples, 0.81%) + + + +netif_hard_xmit (1,842 samples, 0.07%) + + + +__dp_vs_service_get (873 samples, 0.03%) + + + +netif_xmit (2,302 samples, 0.09%) + + + +rte_pktmbuf_lastseg (237 samples, 0.01%) + + + +msg_slave_process (5,026 samples, 0.19%) + + + +sa_fetch (18,047 samples, 0.69%) + + + +futex_wake (802 samples, 0.03%) + + + +rte_atomic32_dec (825 samples, 0.03%) + + + +ip4_hdrlen (885 samples, 0.03%) + + + +rte_pktmbuf_append (296 samples, 0.01%) + + + +dp_vs_stats_out (1,360 samples, 0.05%) + + + +netif_deliver_mbuf (208,933 samples, 7.93%) +netif_deliv.. + + +list_empty (403 samples, 0.02%) + + + +__rte_raw_cksum (322 samples, 0.01%) + + + +__memset_sse2 (377 samples, 0.01%) + + + +rte_prefetch0 (322 samples, 0.01%) + + + +rte_atomic32_inc (477 samples, 0.02%) + + + +dp_vs_conn_expire (36,299 samples, 1.38%) + + + +rte_atomic32_dec_and_test (308 samples, 0.01%) + + + +rte_atomic32_dec (265 samples, 0.01%) + + + +tcp_send_csum (1,534 samples, 0.06%) + + + +netif_tx_burst (8,393 samples, 0.32%) + + + +tcp_send_csum (4,563 samples, 0.17%) + + + +dp_vs_conn_free (544 samples, 0.02%) + + + +list_del (792 samples, 0.03%) + + + +ip4_hdrlen (457 samples, 0.02%) + + + +lcore_stats_burst (1,135 samples, 0.04%) + + + +rte_jhash_3words (1,417 samples, 0.05%) + + + +inet_addr_equal (447 samples, 0.02%) + + + +rte_atomic32_dec (704 samples, 0.03%) + + + +rte_rdtsc (4,452 samples, 0.17%) + + + +__list_del (250 samples, 0.01%) + + + +rte_is_zero_ether_addr (570 samples, 0.02%) + + + +xmit_outbound (34,432 samples, 1.31%) + + + +lcore-worker-5 (327,539 samples, 12.44%) +lcore-worker-5 + + +futex_wake (929 samples, 0.04%) + + + +whtlst_hashkey (1,326 samples, 0.05%) + + + +dp_vs_conn_get (17,373 samples, 0.66%) + + + +__memset_sse2 (379 samples, 0.01%) + + + +rte_eth_tx_burst (7,340 samples, 0.28%) + + + +ipv4_output_fin2 (4,190 samples, 0.16%) + + + +dp_vs_whtlst_allow (4,487 samples, 0.17%) + + + +do_lcore_job (309,752 samples, 11.76%) +do_lcore_job + + +ip4_hdrlen (341 samples, 0.01%) + + + +sa_release (18,413 samples, 0.70%) + + + +inet_addr_ifa_put (316 samples, 0.01%) + + + +netif_port_get (266 samples, 0.01%) + + + +rte_is_zero_ether_addr (490 samples, 0.02%) + + + +qsch_sched_all (270 samples, 0.01%) + + + +neigh_confirm (3,924 samples, 0.15%) + + + +xmit_outbound (33,457 samples, 1.27%) + + + +__list_del_entry (320 samples, 0.01%) + + + +futex_wait (620 samples, 0.02%) + + + +rte_lcore_id (326 samples, 0.01%) + + + +__list_del (511 samples, 0.02%) + + + +dp_vs_conn_set_timeout (253 samples, 0.01%) + + + +netif_update_worker_loop_cnt (450 samples, 0.02%) + + + +sa_release (18,277 samples, 0.69%) + + + +__dp_vs_conn_hash (6,134 samples, 0.23%) + + + +rte_atomic32_inc (395 samples, 0.01%) + + + +dpvs_timer_update (6,354 samples, 0.24%) + + + +rte_atomic32_inc (431 samples, 0.02%) + + + +dp_vs_conn_set_timeout (667 samples, 0.03%) + + + +rte_eth_rx_burst (11,261 samples, 0.43%) + + + +rte_atomic32_inc (1,280 samples, 0.05%) + + + +rte_atomic32_inc (1,105 samples, 0.04%) + + + +sa_pool_hash (411 samples, 0.02%) + + + +mbuf_may_pull (363 samples, 0.01%) + + + +ip4_hdrlen (929 samples, 0.04%) + + + +dp_vs_rr_schedule (869 samples, 0.03%) + + + +route_out_net_lookup (717 samples, 0.03%) + + + +do_futex (1,105 samples, 0.04%) + + + +rte_rdtsc (4,049 samples, 0.15%) + + + +__dp_vs_fast_outxmit_fnat4 (18,779 samples, 0.71%) + + + +ip4_hdrlen (915 samples, 0.03%) + + + +inet_addr_equal (690 samples, 0.03%) + + + +blklst_hashkey (1,585 samples, 0.06%) + + + +rte_arch_bswap32 (941 samples, 0.04%) + + + +tcp_secure_sequence_number (832 samples, 0.03%) + + + +mbuf_header_pointer (386 samples, 0.01%) + + + +[libcrypto.so.1.0.2k] (260 samples, 0.01%) + + + +dp_vs_dest_put (326 samples, 0.01%) + + + +lcore_job_xmit (12,238 samples, 0.46%) + + + +lcore_stats_burst (1,091 samples, 0.04%) + + + +dp_vs_conn_is_in_timer (288 samples, 0.01%) + + + +rte_is_zero_ether_addr (376 samples, 0.01%) + + + +dp_vs_conn_bind_dest (1,029 samples, 0.04%) + + + +port_tab_hashkey (296 samples, 0.01%) + + + +netif_xmit (1,189 samples, 0.05%) + + + +rte_atomic32_dec (449 samples, 0.02%) + + + +eal_thread_loop (322,287 samples, 12.24%) +eal_thread_loop + + +netif_tx_burst (8,363 samples, 0.32%) + + + +rte_atomic32_dec (747 samples, 0.03%) + + + +dp_vs_conn_expire (35,551 samples, 1.35%) + + + +xmit_inbound (49,383 samples, 1.88%) +x.. + + +qsch_sched_all (263 samples, 0.01%) + + + +dp_vs_conn_unbind_dest (735 samples, 0.03%) + + + +ip4_hdrlen (940 samples, 0.04%) + + + +neigh_output (3,574 samples, 0.14%) + + + +tcp_fnat_in_handler (7,290 samples, 0.28%) + + + +neigh_fill_mac (772 samples, 0.03%) + + + +dp_vs_save_xmit_info (1,558 samples, 0.06%) + + + +ip4_hdrlen (835 samples, 0.03%) + + + +do_lcore_job (685 samples, 0.03%) + + + +rte_atomic32_read (313 samples, 0.01%) + + + +netif_xmit (1,207 samples, 0.05%) + + + +whtlst_hashkey (1,344 samples, 0.05%) + + + +this_lcore_sched (246 samples, 0.01%) + + + +dp_vs_conn_hash (6,176 samples, 0.23%) + + + +INET_HOOK (190,305 samples, 7.23%) +INET_HOOK + + +sa_pool_release (2,933 samples, 0.11%) + + + +timeval_to_ticks (286 samples, 0.01%) + + + +rte_atomic32_dec (291 samples, 0.01%) + + + +dp_vs_conn_get (17,397 samples, 0.66%) + + + +__rte_raw_cksum_reduce (353 samples, 0.01%) + + + +ifa_put (321 samples, 0.01%) + + + +sa_pool_hash (450 samples, 0.02%) + + + +inet_addr_ifa_get (11,270 samples, 0.43%) + + + +ip4_hdr (225 samples, 0.01%) + + + +idev_put (800 samples, 0.03%) + + + +inet_addr_equal (254 samples, 0.01%) + + + +__lll_lock_wait_private (257 samples, 0.01%) + + + +dp_vs_conn_put (9,365 samples, 0.36%) + + + +futex_wait (686 samples, 0.03%) + + + +eal_thread_loop (321,988 samples, 12.23%) +eal_thread_loop + + +sys_futex (1,209 samples, 0.05%) + + + +dp_vs_dest_is_valid (541 samples, 0.02%) + + + +rte_is_zero_ether_addr (349 samples, 0.01%) + + + +__dp_vs_service_get (1,247 samples, 0.05%) + + + +list_del (812 samples, 0.03%) + + + +neigh_lookup_entry (792 samples, 0.03%) + + + +rte_jhash_3words (1,395 samples, 0.05%) + + + +rte_atomic32_dec_and_test (253 samples, 0.01%) + + + +rte_is_zero_ether_addr (401 samples, 0.02%) + + + +pkt_type_get (3,626 samples, 0.14%) + + + +tcp_conn_sched (46,842 samples, 1.78%) + + + +rte_arch_bswap32 (295 samples, 0.01%) + + + +__rte_raw_cksum_reduce (384 samples, 0.01%) + + + +rte_atomic32_dec (787 samples, 0.03%) + + + +dp_vs_pre_routing (7,674 samples, 0.29%) + + + +rte_atomic32_inc (2,813 samples, 0.11%) + + + +__list_add (473 samples, 0.02%) + + + +__rte_raw_cksum (317 samples, 0.01%) + + + +get_level_ticks (390 samples, 0.01%) + + + +ip4_hdr (233 samples, 0.01%) + + + +dpvs_timer_update (6,398 samples, 0.24%) + + + +system_call_fastpath (876 samples, 0.03%) + + + +dp_vs_stats_out (1,325 samples, 0.05%) + + + +rte_atomic32_inc (1,787 samples, 0.07%) + + + +__list_del (560 samples, 0.02%) + + + +dp_vs_stats_conn (277 samples, 0.01%) + + + +neigh_key_cmp (338 samples, 0.01%) + + + +dpvs_time_rand_delay (513 samples, 0.02%) + + + +inet_addr_ifa_put (300 samples, 0.01%) + + + +rte_atomic32_dec (834 samples, 0.03%) + + + +rte_is_zero_ether_addr (500 samples, 0.02%) + + + +af_inet_hooks (273 samples, 0.01%) + + + +lcore_process_arp_ring (5,574 samples, 0.21%) + + + +tcp_state_idx (782 samples, 0.03%) + + + +__list_add (461 samples, 0.02%) + + + +inet_addr_equal (2,111 samples, 0.08%) + + + +lcore_job_xmit (12,454 samples, 0.47%) + + + +rte_atomic32_dec_and_test (3,348 samples, 0.13%) + + + +rte_atomic32_inc (1,744 samples, 0.07%) + + + +dp_vs_pre_routing (7,713 samples, 0.29%) + + + +net_cmp (313 samples, 0.01%) + + + +tcp_out_adjust_seq (1,348 samples, 0.05%) + + + +__dpvs_timer_sched (4,259 samples, 0.16%) + + + +__dp_vs_in (177,407 samples, 6.74%) +__dp_vs_in + + +sa_release (18,455 samples, 0.70%) + + + +tcp_in_init_seq (1,734 samples, 0.07%) + + + +__list_del (250 samples, 0.01%) + + + +rte_timer_manage (38,007 samples, 1.44%) + + + +do_lcore_job (733 samples, 0.03%) + + + +ip4_hdrlen (856 samples, 0.03%) + + + +dp_vs_service_hashkey (361 samples, 0.01%) + + + +tcp_conn_lookup (34,792 samples, 1.32%) + + + +dpvs_job_loop (321,175 samples, 12.20%) +dpvs_job_loop + + +port_tab_hashkey (271 samples, 0.01%) + + + +__vdso_clock_gettime (494 samples, 0.02%) + + + +dp_vs_conn_get (17,179 samples, 0.65%) + + + +dp_vs_laddr_unbind (19,284 samples, 0.73%) + + + +__clock_gettime (640 samples, 0.02%) + + + +__rte_raw_cksum_reduce (566 samples, 0.02%) + + + +dp_vs_conn_hashkey (1,486 samples, 0.06%) + + + +rte_arch_bswap16 (255 samples, 0.01%) + + + +__lll_lock_wait_private (251 samples, 0.01%) + + + +get_level_ticks (412 samples, 0.02%) + + + +INET_HOOK (193,970 samples, 7.37%) +INET_HOOK + + +tcp_state_trans (7,889 samples, 0.30%) + + + +futex_wake (835 samples, 0.03%) + + + +rte_atomic32_inc (1,851 samples, 0.07%) + + + +__dp_vs_conn_hash (5,991 samples, 0.23%) + + + +__clock_gettime (273 samples, 0.01%) + + + +dpvs_timer_sched (2,712 samples, 0.10%) + + + +lcore-worker-8 (327,368 samples, 12.43%) +lcore-worker-8 + + +tcp_out_adjust_seq (1,555 samples, 0.06%) + + + +__random (3,090 samples, 0.12%) + + + +ipv4_rcv (198,114 samples, 7.52%) +ipv4_rcv + + +dp_vs_conn_hashkey (5,004 samples, 0.19%) + + + +ip4_hdr (317 samples, 0.01%) + + + +rte_get_tsc_cycles (4,790 samples, 0.18%) + + + +futex_wake (758 samples, 0.03%) + + + +do_lcore_job (309,343 samples, 11.75%) +do_lcore_job + + +__get_laddr (8,017 samples, 0.30%) + + + +dp_vs_conn_get (17,040 samples, 0.65%) + + + +route_out_local_lookup (970 samples, 0.04%) + + + +dp_vs_conn_put (9,442 samples, 0.36%) + + + +tcp_conn_expire (2,435 samples, 0.09%) + + + +__drand48_iterate (554 samples, 0.02%) + + + +__dp_vs_xmit_fnat4 (37,213 samples, 1.41%) + + + +__dpvs_timer_sched (4,236 samples, 0.16%) + + + +dp_vs_blklst_lookup (4,700 samples, 0.18%) + + + +rte_rdtsc (4,236 samples, 0.16%) + + + +netif_hard_xmit (3,022 samples, 0.11%) + + + +dp_vs_stats_in (1,343 samples, 0.05%) + + + +dp_vs_conn_free (544 samples, 0.02%) + + + +inet_addr_ifa_get (10,891 samples, 0.41%) + + + +rte_atomic32_dec (287 samples, 0.01%) + + + +tcp_state_trans (7,688 samples, 0.29%) + + + +dpvs_timer_update (6,295 samples, 0.24%) + + + +qsch_sched_all (289 samples, 0.01%) + + + +tcp_fnat_in_handler (13,990 samples, 0.53%) + + + +rte_arch_bswap16 (345 samples, 0.01%) + + + +rte_eth_rx_burst (11,313 samples, 0.43%) + + + +system_call_after_swapgs (396 samples, 0.02%) + + + +tcp_fnat_in_handler (13,741 samples, 0.52%) + + + +system_call_after_swapgs (542 samples, 0.02%) + + + +mbuf_may_pull (502 samples, 0.02%) + + + +route4_output (1,657 samples, 0.06%) + + + +__list_del_entry (286 samples, 0.01%) + + + +INET_HOOK (4,682 samples, 0.18%) + + + +rte_rdtsc (4,299 samples, 0.16%) + + + +rte_pktmbuf_prepend (240 samples, 0.01%) + + + +rte_pktmbuf_prepend (247 samples, 0.01%) + + + +dp_vs_laddr_bind (26,721 samples, 1.01%) + + + +rte_arch_bswap16 (507 samples, 0.02%) + + + +neigh_key_cmp (347 samples, 0.01%) + + + +dp_vs_dest_is_valid (555 samples, 0.02%) + + + +__list_del (543 samples, 0.02%) + + + +do_futex (858 samples, 0.03%) + + + +dp_vs_proto_lookup (337 samples, 0.01%) + + + +__dpvs_timer_sched (3,680 samples, 0.14%) + + + +__list_del (601 samples, 0.02%) + + + +netif_update_worker_loop_cnt (438 samples, 0.02%) + + + +eth_type_parse (1,277 samples, 0.05%) + + + +ip4_hdrlen (446 samples, 0.02%) + + + +OPENSSL_cleanse (239 samples, 0.01%) + + + +dp_vs_conn_detach_timer (397 samples, 0.02%) + + + +rte_jhash_3words (4,533 samples, 0.17%) + + + +mbuf_header_pointer (370 samples, 0.01%) + + + +dp_vs_stats_out (1,391 samples, 0.05%) + + + +tcp_secure_sequence_number (827 samples, 0.03%) + + + +list_del (858 samples, 0.03%) + + + +list_add_tail (689 samples, 0.03%) + + + +try_to_wake_up (295 samples, 0.01%) + + + +__random_r (857 samples, 0.03%) + + + +rte_atomic32_dec (444 samples, 0.02%) + + + +__rte_jhash_3words (4,394 samples, 0.17%) + + + +dp_vs_dest_is_avail (601 samples, 0.02%) + + + +ip4_hdr (335 samples, 0.01%) + + + +ip4_hdr (416 samples, 0.02%) + + + +inet_addr_equal (682 samples, 0.03%) + + + +sa_pool_fetch (2,373 samples, 0.09%) + + + +tcp_secure_sequence_number (848 samples, 0.03%) + + + +dp_vs_dest_is_avail (528 samples, 0.02%) + + + +system_call_fastpath (1,236 samples, 0.05%) + + + +rte_arch_bswap16 (285 samples, 0.01%) + + + +dp_vs_stats_out (1,367 samples, 0.05%) + + + +rte_atomic32_dec_and_test (324 samples, 0.01%) + + + +ip4_hdr (364 samples, 0.01%) + + + +__get_laddr (8,291 samples, 0.31%) + + + +netif_rx_burst (11,888 samples, 0.45%) + + + +dp_vs_service_lookup (1,267 samples, 0.05%) + + + +netif_port_get (2,504 samples, 0.10%) + + + +__dp_vs_in (180,924 samples, 6.87%) +__dp_vs_in + + +netif_rx_burst (12,196 samples, 0.46%) + + + +__dp_vs_service_get (848 samples, 0.03%) + + + +__list_add (277 samples, 0.01%) + + + +eal_thread_loop (321,718 samples, 12.22%) +eal_thread_loop + + +netif_update_worker_loop_cnt (418 samples, 0.02%) + + + +rte_lcore_id (358 samples, 0.01%) + + + +lcore_job_xmit (12,230 samples, 0.46%) + + + +ip4_hdr (260 samples, 0.01%) + + + +lcore_process_arp_ring (5,625 samples, 0.21%) + + + +rte_atomic32_inc (558 samples, 0.02%) + + + +dp_vs_conn_is_in_timer (283 samples, 0.01%) + + + +OPENSSL_cleanse (275 samples, 0.01%) + + + +dpvs_job_loop (321,267 samples, 12.20%) +dpvs_job_loop + + +inet_addr_ifa_put (340 samples, 0.01%) + + + +netif_rx_burst (11,822 samples, 0.45%) + + + +dp_vs_laddr_bind (26,646 samples, 1.01%) + + + +__dp_vs_fast_outxmit_fnat4 (18,247 samples, 0.69%) + + + +rte_ipv4_phdr_cksum (2,212 samples, 0.08%) + + + +list_empty (2,144 samples, 0.08%) + + + +tcp_send_csum (4,670 samples, 0.18%) + + + +dp_vs_service_hashkey (324 samples, 0.01%) + + + +rte_atomic32_inc (1,396 samples, 0.05%) + + + +__list_del (250 samples, 0.01%) + + + +__clock_gettime (270 samples, 0.01%) + + + +__lll_unlock_wake_private (1,799 samples, 0.07%) + + + +idev_put (718 samples, 0.03%) + + + +__rte_raw_cksum (965 samples, 0.04%) + + + +rte_pktmbuf_trim (580 samples, 0.02%) + + + +route4_output (1,694 samples, 0.06%) + + + +get_level_ticks (236 samples, 0.01%) + + + +tcp_in_add_toa (8,186 samples, 0.31%) + + + +ip4_hdr (418 samples, 0.02%) + + + +tcp_fnat_in_handler (7,486 samples, 0.28%) + + + +inet_addr_equal (2,075 samples, 0.08%) + + + +dp_vs_save_outxmit_info (1,367 samples, 0.05%) + + + +dp_vs_service_hashkey (349 samples, 0.01%) + + + +mbuf_header_pointer (481 samples, 0.02%) + + + +rte_atomic32_dec (4,748 samples, 0.18%) + + + +tcp_out_adjust_mss (281 samples, 0.01%) + + + +rte_arch_bswap16 (255 samples, 0.01%) + + + +dp_vs_blklst_lookup (4,533 samples, 0.17%) + + + +tcp_in_remove_ts (580 samples, 0.02%) + + + +dp_vs_xmit_fnat (36,898 samples, 1.40%) + + + +dp_vs_service_hashkey (347 samples, 0.01%) + + + +dp_vs_conn_set_timeout (647 samples, 0.02%) + + + +rte_atomic32_dec_and_test (277 samples, 0.01%) + + + +inet_addr_equal (696 samples, 0.03%) + + + +__rte_raw_cksum (626 samples, 0.02%) + + + +dp_vs_stats_conn (252 samples, 0.01%) + + + +__list_del (512 samples, 0.02%) + + + +tcp_fnat_in_handler (13,674 samples, 0.52%) + + + +this_lcore_sched (262 samples, 0.01%) + + + +dp_vs_proto_lookup (407 samples, 0.02%) + + + +netif_hard_xmit (1,914 samples, 0.07%) + + + +INET_HOOK (4,822 samples, 0.18%) + + + +netif_update_worker_loop_cnt (428 samples, 0.02%) + + + +list_add (2,272 samples, 0.09%) + + + +dp_vs_conn_put (9,111 samples, 0.35%) + + + +[libcrypto.so.1.0.2k] (2,850 samples, 0.11%) + + + +rte_atomic32_dec (240 samples, 0.01%) + + + +__get_laddr (8,054 samples, 0.31%) + + + +dp_vs_conn_alloc (1,484 samples, 0.06%) + + + +netif_xmit (2,358 samples, 0.09%) + + + +neigh_confirm (3,693 samples, 0.14%) + + + +rte_arch_bswap16 (1,015 samples, 0.04%) + + + +rte_eth_tx_burst (7,405 samples, 0.28%) + + + +route4_put (398 samples, 0.02%) + + + +list_move_tail (388 samples, 0.01%) + + + +netif_port_get (283 samples, 0.01%) + + + +sys_futex (1,163 samples, 0.04%) + + + +blklst_hashkey (1,603 samples, 0.06%) + + + +ip4_hdr (338 samples, 0.01%) + + + +ip4_hdr (313 samples, 0.01%) + + + +sa_fetch (17,671 samples, 0.67%) + + + +rte_atomic32_inc (269 samples, 0.01%) + + + +sys_futex (812 samples, 0.03%) + + + +dp_vs_conn_unbind_dest (763 samples, 0.03%) + + + +list_del (861 samples, 0.03%) + + + +rte_atomic32_dec_and_test (341 samples, 0.01%) + + + +sa_pool_release (3,115 samples, 0.12%) + + + +list_move_tail (579 samples, 0.02%) + + + +dp_vs_conn_refresh_timer (7,511 samples, 0.29%) + + + +neigh_fill_mac (756 samples, 0.03%) + + + +__list_add (425 samples, 0.02%) + + + +ifa_lookup (7,719 samples, 0.29%) + + + +__lll_unlock_wake_private (1,744 samples, 0.07%) + + + +neigh_hashkey (270 samples, 0.01%) + + + +tcp_hdr (702 samples, 0.03%) + + + +rte_atomic32_dec (836 samples, 0.03%) + + + +dp_vs_conn_detach_timer (387 samples, 0.01%) + + + +__rte_jhash_3words (4,354 samples, 0.17%) + + + +sys_futex (1,335 samples, 0.05%) + + + +tcp_in_add_toa (8,099 samples, 0.31%) + + + +tcp_in_add_toa (2,201 samples, 0.08%) + + + +inet_addr_ifa_get (10,771 samples, 0.41%) + + + +mbuf_may_pull (595 samples, 0.02%) + + + +eth_addr_equal (1,015 samples, 0.04%) + + + +net_cmp (323 samples, 0.01%) + + + +__drand48_iterate (624 samples, 0.02%) + + + +dp_vs_service_hashkey (387 samples, 0.01%) + + + +ixgbe_xmit_pkts (7,051 samples, 0.27%) + + + +__list_add (243 samples, 0.01%) + + + +rte_atomic32_dec (300 samples, 0.01%) + + + +ipv4_output_fin (4,442 samples, 0.17%) + + + +rte_prefetch0 (290 samples, 0.01%) + + + +timeval_to_ticks (755 samples, 0.03%) + + + +rte_pktmbuf_append (323 samples, 0.01%) + + + +sa_fetch (17,499 samples, 0.66%) + + + +rte_atomic32_inc (2,791 samples, 0.11%) + + + +dp_vs_synproxy_snat_handler (413 samples, 0.02%) + + + +_raw_spin_unlock_irqrestore (273 samples, 0.01%) + + + +OPENSSL_cleanse (248 samples, 0.01%) + + + +sa_pool_hash (519 samples, 0.02%) + + + +rte_eth_rx_burst (11,151 samples, 0.42%) + + + +get_level_ticks (402 samples, 0.02%) + + + +__clock_gettime (300 samples, 0.01%) + + + +inet_addr_ifa_get (10,712 samples, 0.41%) + + + +__rte_jhash_3words (1,324 samples, 0.05%) + + + +inet_addr_equal (667 samples, 0.03%) + + + +mbuf_may_pull (376 samples, 0.01%) + + + +sa_fetch (17,606 samples, 0.67%) + + + +put_laddr (298 samples, 0.01%) + + + +[libcrypto.so.1.0.2k] (2,891 samples, 0.11%) + + + +lcore-worker-7 (327,700 samples, 12.44%) +lcore-worker-7 + + +timer_expire (36,884 samples, 1.40%) + + + +ip4_hdr (346 samples, 0.01%) + + + +INET_HOOK (190,969 samples, 7.25%) +INET_HOOK + + +rte_arch_bswap16 (616 samples, 0.02%) + + + +dp_vs_fill_iphdr (1,748 samples, 0.07%) + + + +netif_tx_burst (8,534 samples, 0.32%) + + + +rte_atomic32_inc (1,298 samples, 0.05%) + + + +do_lcore_job (310,894 samples, 11.81%) +do_lcore_job + + +ip4_hdr (410 samples, 0.02%) + + + +rte_atomic32_read (274 samples, 0.01%) + + + +ipv4_output_fin (4,529 samples, 0.17%) + + + +tcp_state_idx (791 samples, 0.03%) + + + +rte_atomic32_dec_and_test (261 samples, 0.01%) + + + +get_level_ticks (439 samples, 0.02%) + + + +rte_atomic32_dec (469 samples, 0.02%) + + + +inet_addr_fold (226 samples, 0.01%) + + + +rte_atomic32_inc (1,662 samples, 0.06%) + + + +start_secondary (11,948 samples, 0.45%) + + + +ip_addr_netcmp (232 samples, 0.01%) + + + +do_futex (1,089 samples, 0.04%) + + + +tcp_state_idx (727 samples, 0.03%) + + + +dp_vs_laddr_unbind (19,134 samples, 0.73%) + + + +inet_addr_equal (423 samples, 0.02%) + + + +eth_addr_equal (993 samples, 0.04%) + + + +sys_futex (1,095 samples, 0.04%) + + + +timeval_to_ticks (282 samples, 0.01%) + + + +lcore_job_timer_manage (46,565 samples, 1.77%) + + + +ipv4_output_fin2 (4,313 samples, 0.16%) + + + +neigh_hashkey (258 samples, 0.01%) + + + +tcp_out_save_seq (792 samples, 0.03%) + + + +dp_vs_conn_refresh_timer (7,324 samples, 0.28%) + + + +rte_atomic32_dec_and_test (281 samples, 0.01%) + + + +rte_atomic32_inc (1,672 samples, 0.06%) + + + +rte_atomic32_inc (497 samples, 0.02%) + + + +rte_atomic32_inc (1,778 samples, 0.07%) + + + +ipv4_output (5,131 samples, 0.19%) + + + +dpvs_time_rand_delay (519 samples, 0.02%) + + + +__rte_jhash_3words (4,359 samples, 0.17%) + + + +rte_atomic32_dec_and_test (3,381 samples, 0.13%) + + + +xmit_outbound (32,746 samples, 1.24%) + + + +rte_lcore_id (522 samples, 0.02%) + + + +rte_is_zero_ether_addr (553 samples, 0.02%) + + + +xmit_outbound (33,889 samples, 1.29%) + + + +netif_tx_burst (8,446 samples, 0.32%) + + + +rte_raw_cksum (523 samples, 0.02%) + + + +xmit_outbound (32,014 samples, 1.22%) + + + +dpvs_timer_update (6,319 samples, 0.24%) + + + +sa_pool_release (2,973 samples, 0.11%) + + + +inet_is_addr_any (468 samples, 0.02%) + + + +list_move_tail (525 samples, 0.02%) + + + +dp_vs_dest_is_avail (468 samples, 0.02%) + + + +dp_vs_save_xmit_info (1,577 samples, 0.06%) + + + +dp_vs_pre_routing (7,845 samples, 0.30%) + + + +ip4_hdr (328 samples, 0.01%) + + + +dp_vs_service_lookup (1,332 samples, 0.05%) + + + +dp_vs_service_hashkey (422 samples, 0.02%) + + + +netif_port_get (357 samples, 0.01%) + + + +inet_addr_ifa_get (11,627 samples, 0.44%) + + + +rte_pktmbuf_trim (576 samples, 0.02%) + + + +rte_raw_cksum (1,138 samples, 0.04%) + + + +__list_add (266 samples, 0.01%) + + + +__random (2,749 samples, 0.10%) + + + +dp_vs_conn_put (9,551 samples, 0.36%) + + + +netif_rcv_mbuf (204,585 samples, 7.77%) +netif_rcv_.. + + +rte_atomic32_inc (362 samples, 0.01%) + + + +netif_hard_xmit (1,910 samples, 0.07%) + + + +__lll_unlock_wake_private (475 samples, 0.02%) + + + +eal_thread_loop (322,143 samples, 12.23%) +eal_thread_loop + + +__dp_vs_service_get (1,209 samples, 0.05%) + + + +dp_vs_conn_new (42,158 samples, 1.60%) + + + +rte_pktmbuf_adj (606 samples, 0.02%) + + + +rte_arch_bswap16 (591 samples, 0.02%) + + + +neigh_fill_mac (721 samples, 0.03%) + + + +__lll_unlock_wake_private (520 samples, 0.02%) + + + +ipv4_output (4,852 samples, 0.18%) + + + +ifa_put (290 samples, 0.01%) + + + +dp_vs_service_hashkey (359 samples, 0.01%) + + + +[libcrypto.so.1.0.2k] (2,735 samples, 0.10%) + + + +netif_xmit (1,242 samples, 0.05%) + + + +ip4_hdr (224 samples, 0.01%) + + + +tcp_send_csum (1,497 samples, 0.06%) + + + +timeval_to_ticks (681 samples, 0.03%) + + + +ifa_lookup (7,659 samples, 0.29%) + + + +list_empty (365 samples, 0.01%) + + + +rte_atomic32_inc (1,118 samples, 0.04%) + + + +rte_atomic32_inc (1,451 samples, 0.06%) + + + +ip4_hdr (337 samples, 0.01%) + + + +tick_nohz_idle_enter (977 samples, 0.04%) + + + +ifa_put (294 samples, 0.01%) + + + +dp_vs_service_hashkey (400 samples, 0.02%) + + + +netif_rcv_mbuf (206,726 samples, 7.85%) +netif_rcv_m.. + + +dev_get_idev (1,488 samples, 0.06%) + + + +ip4_is_frag (601 samples, 0.02%) + + + +dp_vs_conn_unbind_dest (736 samples, 0.03%) + + + +ip4_is_frag (588 samples, 0.02%) + + + +tcp_hdr (699 samples, 0.03%) + + + +route_out_local_lookup (859 samples, 0.03%) + + + +list_empty (370 samples, 0.01%) + + + +neigh_entry_state_trans (615 samples, 0.02%) + + + +__lll_lock_wait_private (1,337 samples, 0.05%) + + + +ip4_hdrlen (891 samples, 0.03%) + + + +rte_lcore_id (322 samples, 0.01%) + + + +sa_pool_destroy (3,456 samples, 0.13%) + + + +dp_vs_conn_unbind_dest (759 samples, 0.03%) + + + +__list_del (246 samples, 0.01%) + + + +sa_pool_fetch (2,282 samples, 0.09%) + + + +tcp_in_adjust_seq (270 samples, 0.01%) + + + +list_del (794 samples, 0.03%) + + + +dp_vs_conn_set_timeout (228 samples, 0.01%) + + + +dp_vs_fast_outxmit_fnat (18,333 samples, 0.70%) + + + +dp_vs_conn_new (42,230 samples, 1.60%) + + + +dp_vs_conn_hashkey (1,557 samples, 0.06%) + + + +__memset_sse2 (285 samples, 0.01%) + + + +dp_vs_conn_is_in_timer (282 samples, 0.01%) + + + +msg_slave_process (4,868 samples, 0.18%) + + + +rte_lcore_id (356 samples, 0.01%) + + + +tcp_out_adjust_mss (255 samples, 0.01%) + + + +ip4_is_frag (593 samples, 0.02%) + + + +inet_addr_equal (414 samples, 0.02%) + + + +dp_vs_whtlst_allow (4,194 samples, 0.16%) + + + +dp_vs_dest_is_avail (378 samples, 0.01%) + + + +__rte_jhash_3words (4,418 samples, 0.17%) + + + +mbuf_userdata_reset (231 samples, 0.01%) + + + +rte_atomic32_read (333 samples, 0.01%) + + + +tcp_send_csum (3,044 samples, 0.12%) + + + +rte_is_zero_ether_addr (578 samples, 0.02%) + + + +timeval_to_ticks (709 samples, 0.03%) + + + +rte_atomic32_dec (247 samples, 0.01%) + + + +port_tab_hashkey (267 samples, 0.01%) + + + +dpvs_timer_sched (2,684 samples, 0.10%) + + + +route_out_net_lookup (666 samples, 0.03%) + + + +tcp_hdr (298 samples, 0.01%) + + + +inet_addr_equal (558 samples, 0.02%) + + + +__lll_unlock_wake_private (1,712 samples, 0.07%) + + + +tcp_out_adjust_mss (299 samples, 0.01%) + + + +netif_hard_xmit (2,826 samples, 0.11%) + + + +ipv4_output (5,071 samples, 0.19%) + + + +list_del (856 samples, 0.03%) + + + +rte_lcore_id (368 samples, 0.01%) + + + +__list_del (585 samples, 0.02%) + + + +slave_lcore_loop_func (5,348 samples, 0.20%) + + + +list_empty (2,067 samples, 0.08%) + + + +ip4_hdr (428 samples, 0.02%) + + + +lcore_job_recv_fwd (240,000 samples, 9.11%) +lcore_job_rec.. + + +rte_pktmbuf_trim (610 samples, 0.02%) + + + +netif_port_get (341 samples, 0.01%) + + + +rte_is_zero_ether_addr (411 samples, 0.02%) + + + +xmit_outbound (32,611 samples, 1.24%) + + + +rte_lcore_id (301 samples, 0.01%) + + + +sa_pool_destroy (3,400 samples, 0.13%) + + + +dp_vs_conn_put (9,100 samples, 0.35%) + + + +timeval_to_ticks (714 samples, 0.03%) + + + +netif_hard_xmit (2,807 samples, 0.11%) + + + +dp_vs_save_outxmit_info (1,447 samples, 0.05%) + + + +rte_pktmbuf_append (274 samples, 0.01%) + + + +lcore_process_packets (250 samples, 0.01%) + + + +rte_ether_addr_copy (460 samples, 0.02%) + + + +inet_addr_equal (2,465 samples, 0.09%) + + + +dp_vs_conn_unhash (6,408 samples, 0.24%) + + + +__memset_sse2 (381 samples, 0.01%) + + + +put_laddr (292 samples, 0.01%) + + + +sa4_fetch (17,465 samples, 0.66%) + + + +dp_vs_fast_xmit_fnat (19,219 samples, 0.73%) + + + +INET_HOOK (5,395 samples, 0.20%) + + + +lcore_process_packets (212,991 samples, 8.09%) +lcore_proce.. + + +rte_atomic32_inc (1,683 samples, 0.06%) + + + +list_empty (381 samples, 0.01%) + + + +lcore_process_redirect_ring (421 samples, 0.02%) + + + +dp_vs_conn_refresh_timer (7,040 samples, 0.27%) + + + +this_lcore_sched (239 samples, 0.01%) + + + +lcore_job_xmit (12,338 samples, 0.47%) + + + +rte_arch_bswap16 (282 samples, 0.01%) + + + +system_call_fastpath (1,195 samples, 0.05%) + + + +inet_addr_equal (427 samples, 0.02%) + + + +netif_port_get (270 samples, 0.01%) + + + +netif_hard_xmit (1,121 samples, 0.04%) + + + +neigh_key_cmp (839 samples, 0.03%) + + + +__dp_vs_in (178,729 samples, 6.79%) +__dp_vs_in + + +dev_get_idev (1,424 samples, 0.05%) + + + +[unknown] (877 samples, 0.03%) + + + +__list_add (364 samples, 0.01%) + + + +do_futex (1,251 samples, 0.05%) + + + +dpvs_timer_update (6,498 samples, 0.25%) + + + +inet_addr_equal (394 samples, 0.01%) + + + +INET_HOOK (4,857 samples, 0.18%) + + + +tuplehash_to_conn (224 samples, 0.01%) + + + +tcp_fnat_out_handler (11,029 samples, 0.42%) + + + +dp_vs_laddr_unbind (19,357 samples, 0.74%) + + + +lcore_process_redirect_ring (444 samples, 0.02%) + + + +rte_atomic32_inc (1,254 samples, 0.05%) + + + +__list_del (530 samples, 0.02%) + + + +inet_addr_equal (609 samples, 0.02%) + + + +rte_atomic32_inc (1,751 samples, 0.07%) + + + +tcp_state_trans (7,513 samples, 0.29%) + + + +dp_vs_service_hashkey (419 samples, 0.02%) + + + +tcp_out_save_seq (722 samples, 0.03%) + + + +rte_pktmbuf_append (230 samples, 0.01%) + + + +__rte_raw_cksum_reduce (371 samples, 0.01%) + + + +dp_vs_conn_alloc (1,378 samples, 0.05%) + + + +list_add_tail (1,496 samples, 0.06%) + + + +neigh_key_cmp (651 samples, 0.02%) + + + +__memset_sse2 (255 samples, 0.01%) + + + +dp_vs_dest_is_avail (627 samples, 0.02%) + + + +rte_jhash_3words (1,402 samples, 0.05%) + + + +__vdso_clock_gettime (513 samples, 0.02%) + + + +rte_arch_bswap16 (642 samples, 0.02%) + + + +__rte_raw_cksum (671 samples, 0.03%) + + + +INET_HOOK (5,127 samples, 0.19%) + + + +af_inet_hooks (257 samples, 0.01%) + + + +ixgbe_recv_pkts_bulk_alloc (7,749 samples, 0.29%) + + + +__dp_vs_pre_routing (7,210 samples, 0.27%) + + + +rte_is_zero_ether_addr (345 samples, 0.01%) + + + +netif_port_get (328 samples, 0.01%) + + + +tcp_fnat_in_handler (7,580 samples, 0.29%) + + + +get_level_ticks (235 samples, 0.01%) + + + +route4_put (398 samples, 0.02%) + + + +neigh_hashkey (245 samples, 0.01%) + + + +__list_add (251 samples, 0.01%) + + + +xmit_inbound (49,864 samples, 1.89%) +x.. + + +rte_atomic32_dec (485 samples, 0.02%) + + + +neigh_output (3,343 samples, 0.13%) + + + +dev_get_idev (1,191 samples, 0.05%) + + + +__rte_jhash_3words (1,267 samples, 0.05%) + + + +dp_vs_conn_hashkey (5,113 samples, 0.19%) + + + +ip4_hdrlen (926 samples, 0.04%) + + + +rte_atomic32_dec_and_test (273 samples, 0.01%) + + + +rte_atomic32_dec (4,776 samples, 0.18%) + + + +slave_lcore_loop_func (6,126 samples, 0.23%) + + + +rte_atomic32_read (299 samples, 0.01%) + + + +rte_atomic32_dec_and_test (3,337 samples, 0.13%) + + + +INET_HOOK (190,121 samples, 7.22%) +INET_HOOK + + +rte_arch_bswap16 (315 samples, 0.01%) + + + +__drand48_iterate (586 samples, 0.02%) + + + +ipv4_output_fin (4,395 samples, 0.17%) + + + +neigh_entry_state_trans (550 samples, 0.02%) + + + +rte_atomic32_inc (347 samples, 0.01%) + + + +inet_addr_ifa_put (309 samples, 0.01%) + + + +__dpvs_timer_sched (3,659 samples, 0.14%) + + + +dp_vs_conn_new (42,374 samples, 1.61%) + + + +__memset_sse2 (306 samples, 0.01%) + + + +sa_pool_hash (419 samples, 0.02%) + + + +ip4_hdrlen (508 samples, 0.02%) + + + +mbuf_header_pointer (381 samples, 0.01%) + + + +tcp_conn_lookup (34,763 samples, 1.32%) + + + +rte_atomic32_dec_and_test (321 samples, 0.01%) + + + +inet_addr_fold (250 samples, 0.01%) + + + +dp_vs_conn_attach_timer (3,033 samples, 0.12%) + + + +ixgbe_xmit_pkts (7,043 samples, 0.27%) + + + +list_move_tail (374 samples, 0.01%) + + + +rte_atomic32_inc (255 samples, 0.01%) + + + +inet_addr_equal (647 samples, 0.02%) + + + +list_add (2,412 samples, 0.09%) + + + +netif_deliver_mbuf (209,234 samples, 7.95%) +netif_deliv.. + + +rte_atomic32_dec (801 samples, 0.03%) + + + +__dp_vs_service_get (871 samples, 0.03%) + + + +list_empty (368 samples, 0.01%) + + + +lcore_stats_burst (1,091 samples, 0.04%) + + + +dp_vs_dest_is_valid (531 samples, 0.02%) + + + +dp_vs_conn_free (564 samples, 0.02%) + + + +tcp_in_adjust_seq (265 samples, 0.01%) + + + +rte_pktmbuf_adj (604 samples, 0.02%) + + + +dp_vs_conn_set_timeout (656 samples, 0.02%) + + + +route4_put (391 samples, 0.01%) + + + +rte_arch_bswap16 (555 samples, 0.02%) + + + +neigh_key_cmp (358 samples, 0.01%) + + + +tcp_out_save_seq (773 samples, 0.03%) + + + +dp_vs_fill_iphdr (2,071 samples, 0.08%) + + + +sys_futex (1,149 samples, 0.04%) + + + +dp_vs_conn_hashkey (1,580 samples, 0.06%) + + + +rte_ipv4_phdr_cksum (1,016 samples, 0.04%) + + + +dp_vs_dest_is_avail (588 samples, 0.02%) + + + +do_futex (688 samples, 0.03%) + + + +[libcrypto.so.1.0.2k] (267 samples, 0.01%) + + + +rte_timer_manage (38,110 samples, 1.45%) + + + +ip4_hdrlen (976 samples, 0.04%) + + + +rte_atomic32_dec (4,781 samples, 0.18%) + + + +ip4_hdr (369 samples, 0.01%) + + + +neigh_entry_state_trans (638 samples, 0.02%) + + + +rte_pktmbuf_trim (577 samples, 0.02%) + + + +dp_vs_in (179,579 samples, 6.82%) +dp_vs_in + + +tcp_in_adjust_seq (267 samples, 0.01%) + + + +dp_vs_conn_bind_dest (1,022 samples, 0.04%) + + + +rte_jhash_3words (4,604 samples, 0.17%) + + + +rte_atomic32_inc (1,233 samples, 0.05%) + + + +__vdso_clock_gettime (511 samples, 0.02%) + + + +__list_del (504 samples, 0.02%) + + + +try_to_wake_up (274 samples, 0.01%) + + + +dp_vs_proto_lookup (408 samples, 0.02%) + + + +dp_vs_fast_xmit_fnat (19,000 samples, 0.72%) + + + +inet_addr_equal (418 samples, 0.02%) + + + +rte_lcore_id (349 samples, 0.01%) + + + +netif_port_get (350 samples, 0.01%) + + + +list_empty (2,379 samples, 0.09%) + + + +poll_idle (1,833 samples, 0.07%) + + + +mbuf_may_pull (505 samples, 0.02%) + + + +dp_vs_out_xmit_fnat (22,163 samples, 0.84%) + + + +__get_laddr (8,268 samples, 0.31%) + + + +lcore_process_redirect_ring (244 samples, 0.01%) + + + +dp_vs_save_outxmit_info (1,372 samples, 0.05%) + + + +rte_is_zero_ether_addr (394 samples, 0.01%) + + + +dp_vs_out_xmit_fnat (22,532 samples, 0.86%) + + + +INET_HOOK (4,733 samples, 0.18%) + + + +rte_ether_addr_copy (356 samples, 0.01%) + + + +ixgbe_recv_pkts_bulk_alloc (7,595 samples, 0.29%) + + + +dp_vs_redirect_ring_proc (232 samples, 0.01%) + + + +dp_vs_dest_is_valid (544 samples, 0.02%) + + + +netif_rx_burst (11,850 samples, 0.45%) + + + +xmit_inbound (49,615 samples, 1.88%) +x.. + + +tcp_out_adjust_seq (1,380 samples, 0.05%) + + + +ifa_lookup (7,644 samples, 0.29%) + + + +rte_atomic32_inc (1,704 samples, 0.06%) + + + +netif_rcv_mbuf (204,280 samples, 7.76%) +netif_rcv_.. + + +af_inet_hooks (302 samples, 0.01%) + + + +rte_raw_cksum (586 samples, 0.02%) + + + +tcp_hdr (640 samples, 0.02%) + + + +__lll_unlock_wake_private (1,763 samples, 0.07%) + + + +lcore_process_packets (266 samples, 0.01%) + + + +pkt_type_get (3,383 samples, 0.13%) + + + +dp_vs_laddr_unbind (19,178 samples, 0.73%) + + + +dp_vs_service_lookup (948 samples, 0.04%) + + + +ip4_hdr (226 samples, 0.01%) + + + +dp_vs_save_outxmit_info (1,460 samples, 0.06%) + + + +ip4_hdr (307 samples, 0.01%) + + + +dp_vs_conn_is_in_timer (269 samples, 0.01%) + + + +rte_ipv4_phdr_cksum (2,297 samples, 0.09%) + + + +rte_is_zero_ether_addr (526 samples, 0.02%) + + + +dp_vs_dest_is_avail (596 samples, 0.02%) + + + +inet_is_addr_any (537 samples, 0.02%) + + + +put_laddr (319 samples, 0.01%) + + + +slave_lcore_loop_func (5,426 samples, 0.21%) + + + +futex_wait_setup (452 samples, 0.02%) + + + +dp_vs_schedule (44,711 samples, 1.70%) + + + +system_call_after_swapgs (294 samples, 0.01%) + + + +INIT_LIST_HEAD (360 samples, 0.01%) + + + +rte_ether_addr_copy (421 samples, 0.02%) + + + +ifa_lookup (7,305 samples, 0.28%) + + + +dp_vs_conn_is_in_timer (271 samples, 0.01%) + + + +tcp_in_init_seq (1,760 samples, 0.07%) + + + +lcore_process_packets (214,812 samples, 8.16%) +lcore_proce.. + + +netif_rx_burst (12,159 samples, 0.46%) + + + +eth_type_parse (1,345 samples, 0.05%) + + + +rte_pktmbuf_prepend (406 samples, 0.02%) + + + +dp_vs_dest_is_avail (347 samples, 0.01%) + + + +rte_atomic32_inc (1,324 samples, 0.05%) + + + +rte_jhash_3words (1,396 samples, 0.05%) + + + +ifa_put (309 samples, 0.01%) + + + +tcp_in_remove_ts (621 samples, 0.02%) + + + +rte_atomic32_dec (251 samples, 0.01%) + + + +lcore_job_recv_fwd (240,919 samples, 9.15%) +lcore_job_rec.. + + +rte_lcore_id (502 samples, 0.02%) + + + +__memset_sse2 (587 samples, 0.02%) + + + +dp_vs_fill_iphdr (1,800 samples, 0.07%) + + + +rte_arch_bswap16 (1,021 samples, 0.04%) + + + +__rte_raw_cksum (669 samples, 0.03%) + + + +list_add_tail (740 samples, 0.03%) + + + +futex_wake (830 samples, 0.03%) + + + +rte_lcore_id (357 samples, 0.01%) + + + +rte_jhash_3words (4,490 samples, 0.17%) + + + +dp_vs_conn_hashkey (5,188 samples, 0.20%) + + + +neigh_hashkey (239 samples, 0.01%) + + + +ifa_lookup (7,172 samples, 0.27%) + + + +rte_lcore_id (326 samples, 0.01%) + + + +rte_lcore_id (392 samples, 0.01%) + + + +__memset_sse2 (338 samples, 0.01%) + + + +sa_pool_hash (407 samples, 0.02%) + + + +dp_vs_laddr_unbind (19,254 samples, 0.73%) + + + +eal_thread_loop (322,207 samples, 12.24%) +eal_thread_loop + + +route_out_local_lookup (927 samples, 0.04%) + + + +list_del (1,438 samples, 0.05%) + + + +netif_port_get (351 samples, 0.01%) + + + +mbuf_header_pointer (501 samples, 0.02%) + + + +tcp_conn_lookup (34,447 samples, 1.31%) + + + +dp_vs_pre_routing (7,618 samples, 0.29%) + + + +list_del (1,595 samples, 0.06%) + + + +tcp_out_adjust_mss (287 samples, 0.01%) + + + +__list_del (569 samples, 0.02%) + + + +dp_vs_conn_free (529 samples, 0.02%) + + + +system_call_after_swapgs (257 samples, 0.01%) + + + +rte_atomic32_dec (4,552 samples, 0.17%) + + + +system_call_after_swapgs (545 samples, 0.02%) + + + +do_lcore_job (794 samples, 0.03%) + + + +netif_hard_xmit (1,047 samples, 0.04%) + + + +tcp_hdr (443 samples, 0.02%) + + + +rte_atomic32_dec (1,551 samples, 0.06%) + + + +rte_atomic32_dec (281 samples, 0.01%) + + + +timeval_to_ticks (739 samples, 0.03%) + + + +__rte_raw_cksum (936 samples, 0.04%) + + + +__drand48_iterate (610 samples, 0.02%) + + + +dp_vs_conn_is_in_timer (246 samples, 0.01%) + + + +dp_vs_fill_iphdr (1,744 samples, 0.07%) + + + +[libcrypto.so.1.0.2k] (2,905 samples, 0.11%) + + + +ip4_hdr (357 samples, 0.01%) + + + +lcore_job_timer_manage (46,854 samples, 1.78%) + + + +rte_atomic32_read (308 samples, 0.01%) + + + +dp_vs_conn_resend_packets (979 samples, 0.04%) + + + +dp_vs_conn_unbind_dest (752 samples, 0.03%) + + + +tcp_state_trans (7,720 samples, 0.29%) + + + +ixgbe_xmit_pkts (7,138 samples, 0.27%) + + + +__dpvs_timer_sched (4,141 samples, 0.16%) + + + +tcp_in_add_toa (8,341 samples, 0.32%) + + + +tcp_send_csum (4,495 samples, 0.17%) + + + +dp_vs_conn_bind_dest (1,078 samples, 0.04%) + + + +rte_timer_tick_cb (36,901 samples, 1.40%) + + + +neigh_lookup_entry (726 samples, 0.03%) + + + +dp_vs_pre_routing (7,750 samples, 0.29%) + + + +list_del (791 samples, 0.03%) + + + +wake_up_q (247 samples, 0.01%) + + + +dp_vs_conn_expire (36,644 samples, 1.39%) + + + +rte_raw_cksum (1,639 samples, 0.06%) + + + +lcore_job_recv_fwd (238,627 samples, 9.06%) +lcore_job_rec.. + + +rte_ether_addr_copy (470 samples, 0.02%) + + + +rte_atomic32_dec (4,607 samples, 0.17%) + + + +[unknown] (1,781 samples, 0.07%) + + + +rte_atomic32_dec_and_test (3,279 samples, 0.12%) + + + +mbuf_may_pull (311 samples, 0.01%) + + + +list_move_tail (355 samples, 0.01%) + + + +dp_vs_blklst_lookup (4,504 samples, 0.17%) + + + +idev_put (804 samples, 0.03%) + + + +__dp_vs_service_get (1,159 samples, 0.04%) + + + +rte_arch_bswap16 (297 samples, 0.01%) + + + +__dpvs_timer_sched (2,313 samples, 0.09%) + + + +this_lcore_sched (227 samples, 0.01%) + + + +lcore_job_timer_manage (46,612 samples, 1.77%) + + + +dp_vs_stats_conn (286 samples, 0.01%) + + + +net_cmp (317 samples, 0.01%) + + + +msg_slave_process (5,182 samples, 0.20%) + + + +__rte_raw_cksum (315 samples, 0.01%) + + + +dp_vs_save_outxmit_info (1,441 samples, 0.05%) + + + +lcore_job_xmit (12,087 samples, 0.46%) + + + +__dpvs_timer_sched (3,967 samples, 0.15%) + + + +__dp_vs_in (177,705 samples, 6.75%) +__dp_vs_in + + +rcu_idle_exit (335 samples, 0.01%) + + + +dp_vs_conn_put (9,492 samples, 0.36%) + + + +rte_atomic32_inc (1,728 samples, 0.07%) + + + +timeval_to_ticks (261 samples, 0.01%) + + + +dp_vs_fast_xmit_fnat (19,552 samples, 0.74%) + + + +mbuf_header_pointer (386 samples, 0.01%) + + + +neigh_output (3,470 samples, 0.13%) + + + +dp_vs_synproxy_snat_handler (418 samples, 0.02%) + + + +sa_pool_release (2,918 samples, 0.11%) + + + +netif_hard_xmit (1,015 samples, 0.04%) + + + +rte_jhash_3words (4,484 samples, 0.17%) + + + +tcp_out_save_seq (806 samples, 0.03%) + + + +rte_lcore_id (346 samples, 0.01%) + + + +dp_vs_xmit_fnat (37,319 samples, 1.42%) + + + +__rte_raw_cksum (648 samples, 0.02%) + + + +dp_vs_save_xmit_info (1,588 samples, 0.06%) + + + +list_add_tail (1,108 samples, 0.04%) + + + +__rte_raw_cksum (647 samples, 0.02%) + + + +netif_hard_xmit (978 samples, 0.04%) + + + +tcp_in_add_toa (2,086 samples, 0.08%) + + + +netif_port_get (282 samples, 0.01%) + + + +[unknown] (1,872 samples, 0.07%) + + + +__dpvs_timer_sched (2,216 samples, 0.08%) + + + +get_level_ticks (400 samples, 0.02%) + + + +ip4_hdrlen (914 samples, 0.03%) + + + +dp_vs_conn_detach_timer (386 samples, 0.01%) + + + +rte_lcore_id (323 samples, 0.01%) + + + +netif_update_worker_loop_cnt (465 samples, 0.02%) + + + +dp_vs_conn_set_timeout (231 samples, 0.01%) + + + +__rte_raw_cksum (849 samples, 0.03%) + + + +sa_pool_hash (431 samples, 0.02%) + + + +ip4_hdr (357 samples, 0.01%) + + + +__dp_vs_conn_hash (6,064 samples, 0.23%) + + + +dp_vs_conn_detach_timer (374 samples, 0.01%) + + + +dp_vs_synproxy_syn_rcv (3,417 samples, 0.13%) + + + +rte_pktmbuf_trim (555 samples, 0.02%) + + + +qsch_sched_all (293 samples, 0.01%) + + + +dp_vs_in (178,970 samples, 6.80%) +dp_vs_in + + +dp_vs_dest_is_avail (496 samples, 0.02%) + + + +rte_pktmbuf_adj (673 samples, 0.03%) + + + +tcp_send_csum (4,615 samples, 0.18%) + + + +ip4_hdrlen (849 samples, 0.03%) + + + +rte_atomic32_dec_and_test (292 samples, 0.01%) + + + +mbuf_header_pointer (268 samples, 0.01%) + + + +dpvs_timer_update (6,394 samples, 0.24%) + + + +dp_vs_conn_resend_packets (926 samples, 0.04%) + + + +timeval_to_ticks (678 samples, 0.03%) + + + +netif_port_get (2,497 samples, 0.09%) + + + +dp_vs_laddr_bind (26,538 samples, 1.01%) + + + +list_add_tail (1,528 samples, 0.06%) + + + +netif_port_get (2,480 samples, 0.09%) + + + +netif_rcv_mbuf (208,420 samples, 7.91%) +netif_rcv_m.. + + +sa4_fetch (17,836 samples, 0.68%) + + + +__dp_vs_out_xmit_fnat4 (21,362 samples, 0.81%) + + + +dp_vs_conn_free (556 samples, 0.02%) + + + +do_lcore_job (704 samples, 0.03%) + + + +neigh_confirm (3,873 samples, 0.15%) + + + +do_futex (1,106 samples, 0.04%) + + + +lcore_stats_burst (1,096 samples, 0.04%) + + + +list_add_tail (1,493 samples, 0.06%) + + + +dp_vs_conn_alloc (1,324 samples, 0.05%) + + + +ipv4_rcv (198,778 samples, 7.55%) +ipv4_rcv + + +dp_vs_service_hashkey (377 samples, 0.01%) + + + +rte_atomic32_inc (2,735 samples, 0.10%) + + + +tcp_out_adjust_seq (1,411 samples, 0.05%) + + + +rte_timer_manage (37,668 samples, 1.43%) + + + +route_out_local_lookup (925 samples, 0.04%) + + + +rte_arch_bswap16 (301 samples, 0.01%) + + + +dp_vs_out_xmit_fnat (21,737 samples, 0.83%) + + + +__vdso_clock_gettime (488 samples, 0.02%) + + + +rte_atomic32_dec_and_test (275 samples, 0.01%) + + + +sa_fetch (17,712 samples, 0.67%) + + + +tcp_conn_expire (2,421 samples, 0.09%) + + + +rte_raw_cksum (1,660 samples, 0.06%) + + + +dp_vs_fill_iphdr (1,922 samples, 0.07%) + + + +ixgbe_recv_pkts_bulk_alloc (7,698 samples, 0.29%) + + + +lcore-worker-3 (327,781 samples, 12.45%) +lcore-worker-3 + + +__lll_unlock_wake_private (396 samples, 0.02%) + + + +dp_vs_save_xmit_info (1,632 samples, 0.06%) + + + +list_add_tail (705 samples, 0.03%) + + + +dp_vs_dest_put (326 samples, 0.01%) + + + +timeval_to_ticks (663 samples, 0.03%) + + + +ifa_put (350 samples, 0.01%) + + + +__list_del_entry (284 samples, 0.01%) + + + +__rte_jhash_3words (1,330 samples, 0.05%) + + + +__random (2,954 samples, 0.11%) + + + +rte_lcore_id (501 samples, 0.02%) + + + +__list_del (503 samples, 0.02%) + + + +dp_vs_conn_attach_timer (2,956 samples, 0.11%) + + + +mbuf_may_pull (623 samples, 0.02%) + + + +netif_deliver_mbuf (210,937 samples, 8.01%) +netif_deliv.. + + +sys_futex (737 samples, 0.03%) + + + +dp_vs_conn_refresh_timer (7,136 samples, 0.27%) + + + +dpvs_job_loop (321,334 samples, 12.20%) +dpvs_job_loop + + +__dp_vs_out_xmit_fnat4 (20,726 samples, 0.79%) + + + +ifa_lookup (7,300 samples, 0.28%) + + + +tcp_secure_sequence_number (869 samples, 0.03%) + + + +rte_atomic32_dec (281 samples, 0.01%) + + + +ipv4_rcv (200,467 samples, 7.61%) +ipv4_rcv + + +dp_vs_synproxy_snat_handler (391 samples, 0.01%) + + + +rte_atomic32_inc (313 samples, 0.01%) + + + +dp_vs_dest_is_valid (509 samples, 0.02%) + + + +__dp_vs_fast_xmit_fnat4 (18,670 samples, 0.71%) + + + +tcp_conn_sched (46,152 samples, 1.75%) + + + +slave_lcore_loop_func (5,347 samples, 0.20%) + + + +ip4_hdrlen (858 samples, 0.03%) + + + +ifa_put (298 samples, 0.01%) + + + +list_empty (388 samples, 0.01%) + + + +rte_is_zero_ether_addr (514 samples, 0.02%) + + + +neigh_hashkey (631 samples, 0.02%) + + + +__list_add (470 samples, 0.02%) + + + +dp_vs_conn_detach_timer (356 samples, 0.01%) + + + +__rte_raw_cksum_reduce (577 samples, 0.02%) + + + +__clock_gettime (630 samples, 0.02%) + + + +rte_atomic32_dec_and_test (301 samples, 0.01%) + + + +rte_lcore_id (493 samples, 0.02%) + + + +futex_wake (779 samples, 0.03%) + + + +dp_vs_dest_is_valid (583 samples, 0.02%) + + + +inet_addr_ifa_put (326 samples, 0.01%) + + + +ip4_hdr (269 samples, 0.01%) + + + +tcp_conn_lookup (34,159 samples, 1.30%) + + + +rte_atomic32_inc (1,410 samples, 0.05%) + + + +list_empty (2,090 samples, 0.08%) + + + +dp_vs_fast_xmit_fnat (19,024 samples, 0.72%) + + + +eth_addr_equal (988 samples, 0.04%) + + + +__dpvs_timer_sched (4,075 samples, 0.15%) + + + +rte_eth_rx_burst (225 samples, 0.01%) + + + +rte_atomic32_inc (256 samples, 0.01%) + + + +ip4_is_frag (553 samples, 0.02%) + + + +whtlst_hashkey (1,333 samples, 0.05%) + + + +eal_thread_loop (322,229 samples, 12.24%) +eal_thread_loop + + +rte_get_timer_cycles (4,753 samples, 0.18%) + + + +dp_vs_fill_iphdr (2,047 samples, 0.08%) + + + +rte_atomic32_dec (776 samples, 0.03%) + + + +__clock_gettime (596 samples, 0.02%) + + + +rte_atomic32_dec_and_test (260 samples, 0.01%) + + + +route4_output (1,617 samples, 0.06%) + + + +rte_atomic32_dec_and_test (260 samples, 0.01%) + + + +lcore_process_redirect_ring (469 samples, 0.02%) + + + +dpvs_time_rand_delay (499 samples, 0.02%) + + + +inet_addr_ifa_put (311 samples, 0.01%) + + + +neigh_output (3,472 samples, 0.13%) + + + +rte_get_tsc_cycles (4,240 samples, 0.16%) + + + +inet_addr_ifa_get (10,853 samples, 0.41%) + + + +__dp_vs_service_get (1,268 samples, 0.05%) + + + +rte_atomic32_dec (812 samples, 0.03%) + + + +rte_timer_tick_cb (37,533 samples, 1.43%) + + + +ip4_hdrlen (904 samples, 0.03%) + + + +rte_atomic32_inc (465 samples, 0.02%) + + + +dp_vs_dest_put (324 samples, 0.01%) + + + +dp_vs_conn_get (17,358 samples, 0.66%) + + + +dp_vs_conn_put (8,844 samples, 0.34%) + + + +xmit_inbound (48,224 samples, 1.83%) +x.. + + +__dp_vs_service_get (1,146 samples, 0.04%) + + + +dp_vs_stats_conn (258 samples, 0.01%) + + + +ip4_hdr (311 samples, 0.01%) + + + +list_del (810 samples, 0.03%) + + + +__vdso_clock_gettime (548 samples, 0.02%) + + + +dp_vs_save_outxmit_info (1,479 samples, 0.06%) + + + +rte_atomic32_dec (847 samples, 0.03%) + + + +dpvs_time_rand_delay (334 samples, 0.01%) + + + +dp_vs_fast_xmit_fnat (19,031 samples, 0.72%) + + + +do_lcore_job (311,518 samples, 11.83%) +do_lcore_job + + +rte_lcore_id (358 samples, 0.01%) + + + +dp_vs_conn_is_in_timer (290 samples, 0.01%) + + + +tcp_in_adjust_seq (288 samples, 0.01%) + + + +__dp_vs_xmit_fnat4 (36,619 samples, 1.39%) + + + +timer_expire (36,829 samples, 1.40%) + + + +dp_vs_stats_conn (281 samples, 0.01%) + + + +__get_laddr (8,345 samples, 0.32%) + + + +futex_wait (714 samples, 0.03%) + + + +this_lcore_sched (245 samples, 0.01%) + + + +__rte_jhash_3words (1,277 samples, 0.05%) + + + +sa_release (18,191 samples, 0.69%) + + + +rte_get_tsc_cycles (4,456 samples, 0.17%) + + + +mbuf_may_pull (315 samples, 0.01%) + + + +rte_atomic32_dec_and_test (279 samples, 0.01%) + + + +rte_timer_manage (37,047 samples, 1.41%) + + + +neigh_confirm (3,594 samples, 0.14%) + + + +INIT_LIST_HEAD (296 samples, 0.01%) + + + +inet_addr_equal (711 samples, 0.03%) + + + +rte_pktmbuf_prepend (236 samples, 0.01%) + + + +netif_rx_burst (11,791 samples, 0.45%) + + + +neigh_entry_state_trans (591 samples, 0.02%) + + + +rte_ipv4_phdr_cksum (3,363 samples, 0.13%) + + + +neigh_entry_state_trans (637 samples, 0.02%) + + + +__list_del (241 samples, 0.01%) + + + +__dpvs_timer_sched (2,257 samples, 0.09%) + + + +dp_vs_conn_refresh_timer (7,187 samples, 0.27%) + + + +rte_pktmbuf_trim (609 samples, 0.02%) + + + +dp_vs_conn_new (42,338 samples, 1.61%) + + + +rte_ether_addr_copy (441 samples, 0.02%) + + + +netif_hard_xmit (1,917 samples, 0.07%) + + + +dp_vs_conn_refresh_timer (7,197 samples, 0.27%) + + + +ip4_hdr (258 samples, 0.01%) + + + +dp_vs_service_lookup (1,346 samples, 0.05%) + + + +tcp_hdr (473 samples, 0.02%) + + + +rte_atomic32_inc (233 samples, 0.01%) + + + +__clock_gettime (303 samples, 0.01%) + + + +rte_pktmbuf_trim (573 samples, 0.02%) + + + +rte_atomic32_dec (234 samples, 0.01%) + + + +rte_pktmbuf_adj (640 samples, 0.02%) + + + +dpvs_timer_update (6,651 samples, 0.25%) + + + +rte_arch_bswap16 (294 samples, 0.01%) + + + +ip4_hdrlen (903 samples, 0.03%) + + + +__dpvs_timer_sched (2,100 samples, 0.08%) + + + +route4_output (1,728 samples, 0.07%) + + + +rte_lcore_id (362 samples, 0.01%) + + + +system_call_after_swapgs (249 samples, 0.01%) + + + +INET_HOOK (4,578 samples, 0.17%) + + + +__get_laddr (7,840 samples, 0.30%) + + + +netif_rcv_mbuf (206,811 samples, 7.85%) +netif_rcv_m.. + + +dpvs_timer_update (6,197 samples, 0.24%) + + + +dp_vs_dest_is_avail (499 samples, 0.02%) + + + +rte_get_timer_cycles (4,379 samples, 0.17%) + + + +tcp_send_csum (4,515 samples, 0.17%) + + + +mbuf_may_pull (456 samples, 0.02%) + + + +netif_deliver_mbuf (209,478 samples, 7.95%) +netif_deliv.. + + +tcp_hdr (638 samples, 0.02%) + + + +this_lcore_sched (243 samples, 0.01%) + + + +dp_vs_dest_is_avail (389 samples, 0.01%) + + + +dp_vs_service_lookup (1,402 samples, 0.05%) + + + +neigh_hashkey (672 samples, 0.03%) + + + +lcore_process_packets (304 samples, 0.01%) + + + +rte_timer_manage (37,992 samples, 1.44%) + + + +dp_vs_dest_is_valid (538 samples, 0.02%) + + + +netif_tx_burst (8,192 samples, 0.31%) + + + +this_lcore_sched (237 samples, 0.01%) + + + +netif_hard_xmit (2,864 samples, 0.11%) + + + +__rte_raw_cksum (257 samples, 0.01%) + + + +__lll_lock_wait_private (314 samples, 0.01%) + + + +rte_arch_bswap16 (310 samples, 0.01%) + + + +ipv4_output_fin2 (4,451 samples, 0.17%) + + + +dp_vs_fill_iphdr (2,034 samples, 0.08%) + + + +inet_is_addr_any (513 samples, 0.02%) + + + +inet_addr_equal (630 samples, 0.02%) + + + +list_empty (425 samples, 0.02%) + + + +mbuf_may_pull (646 samples, 0.02%) + + + +dpvs_time_rand_delay (303 samples, 0.01%) + + + +list_add_tail (1,179 samples, 0.04%) + + + +rte_lcore_id (499 samples, 0.02%) + + + +ifa_lookup (8,194 samples, 0.31%) + + + +do_futex (793 samples, 0.03%) + + + +dp_vs_schedule (44,616 samples, 1.69%) + + + +rte_atomic32_dec (629 samples, 0.02%) + + + +ipv4_output_fin (4,374 samples, 0.17%) + + + +do_futex (1,114 samples, 0.04%) + + + +netif_port_get (2,454 samples, 0.09%) + + + +route4_put (417 samples, 0.02%) + + + +ip4_hdr (333 samples, 0.01%) + + + +rte_arch_bswap16 (345 samples, 0.01%) + + + +dpvs_job_loop (321,297 samples, 12.20%) +dpvs_job_loop + + +tcp_state_trans (7,881 samples, 0.30%) + + + +rte_jhash_3words (4,668 samples, 0.18%) + + + +tcp_conn_sched (46,304 samples, 1.76%) + + + +rte_atomic32_inc (1,268 samples, 0.05%) + + + +netif_xmit (3,753 samples, 0.14%) + + + +netif_rcv_mbuf (206,441 samples, 7.84%) +netif_rcv_m.. + + +slave_lcore_loop_func (5,520 samples, 0.21%) + + + +sa_pool_hash (470 samples, 0.02%) + + + +rte_lcore_id (343 samples, 0.01%) + + + +rte_pktmbuf_append (299 samples, 0.01%) + + + +ixgbe_recv_pkts_bulk_alloc (7,666 samples, 0.29%) + + + +rte_pktmbuf_prepend (284 samples, 0.01%) + + + +dp_vs_conn_unhash (6,517 samples, 0.25%) + + + +system_call_fastpath (1,364 samples, 0.05%) + + + +rte_atomic32_dec (855 samples, 0.03%) + + + +tcp_hdr (660 samples, 0.03%) + + + +netif_update_worker_loop_cnt (437 samples, 0.02%) + + + +ip4_hdrlen (496 samples, 0.02%) + + + +[unknown] (925 samples, 0.04%) + + + +list_add_tail (1,122 samples, 0.04%) + + + +rte_pktmbuf_append (239 samples, 0.01%) + + + +rte_atomic32_inc (495 samples, 0.02%) + + + +inet_addr_fold (224 samples, 0.01%) + + + +blklst_hashkey (1,641 samples, 0.06%) + + + +netif_rcv_mbuf (204,478 samples, 7.76%) +netif_rcv_.. + + +dp_vs_conn_free (541 samples, 0.02%) + + + +tcp_hdr (450 samples, 0.02%) + + + +rte_prefetch0 (308 samples, 0.01%) + + + +[unknown] (920 samples, 0.03%) + + + +list_add_tail (1,501 samples, 0.06%) + + + +net_cmp (304 samples, 0.01%) + + + +[libcrypto.so.1.0.2k] (2,783 samples, 0.11%) + + + +[libcrypto.so.1.0.2k] (268 samples, 0.01%) + + + +slave_lcore_loop_func (5,387 samples, 0.20%) + + + +route_out_net_lookup (677 samples, 0.03%) + + + +do_futex (1,117 samples, 0.04%) + + + +inet_addr_equal (2,403 samples, 0.09%) + + + +dp_vs_stats_in (1,382 samples, 0.05%) + + + +ip4_hdrlen (870 samples, 0.03%) + + + +ipv4_rcv (196,449 samples, 7.46%) +ipv4_rcv + + +tcp_send_csum (1,466 samples, 0.06%) + + + +_raw_spin_unlock_irqrestore (238 samples, 0.01%) + + + +rte_arch_bswap16 (993 samples, 0.04%) + + + +ip4_hdrlen (874 samples, 0.03%) + + + +__dp_vs_xmit_fnat4 (37,473 samples, 1.42%) + + + +__rte_raw_cksum_reduce (405 samples, 0.02%) + + + +rte_atomic32_inc (536 samples, 0.02%) + + + +netif_port_get (262 samples, 0.01%) + + + +rte_atomic32_inc (317 samples, 0.01%) + + + +__rte_raw_cksum (621 samples, 0.02%) + + + +dpvs_time_rand_delay (521 samples, 0.02%) + + + +rte_pktmbuf_adj (638 samples, 0.02%) + + + +ip4_is_frag (555 samples, 0.02%) + + + +dp_vs_conn_refresh_timer (7,159 samples, 0.27%) + + + +dp_vs_stats_in (1,347 samples, 0.05%) + + + +__random (2,972 samples, 0.11%) + + + +rte_atomic32_dec_and_test (280 samples, 0.01%) + + + +mbuf_may_pull (347 samples, 0.01%) + + + +system_call_fastpath (1,189 samples, 0.05%) + + + +rte_atomic32_read (302 samples, 0.01%) + + + +inet_addr_equal (392 samples, 0.01%) + + + +dpvs_time_rand_delay (325 samples, 0.01%) + + + +rte_ether_addr_copy (348 samples, 0.01%) + + + +dp_vs_conn_attach_timer (2,814 samples, 0.11%) + + + +do_futex (796 samples, 0.03%) + + + +__memset_sse2 (278 samples, 0.01%) + + + +__dp_vs_fast_xmit_fnat4 (18,675 samples, 0.71%) + + + +rte_lcore_id (415 samples, 0.02%) + + + +__lll_lock_wait_private (1,380 samples, 0.05%) + + + +tcp_conn_lookup (33,578 samples, 1.28%) + + + +__rte_raw_cksum (623 samples, 0.02%) + + + +list_add (2,417 samples, 0.09%) + + + +rte_atomic32_read (285 samples, 0.01%) + + + +dpvs_time_rand_delay (304 samples, 0.01%) + + + +get_level_ticks (373 samples, 0.01%) + + + +neigh_key_cmp (643 samples, 0.02%) + + + +lcore_job_recv_fwd (239,024 samples, 9.08%) +lcore_job_rec.. + + +inet_addr_equal (2,335 samples, 0.09%) + + + +__rte_raw_cksum (308 samples, 0.01%) + + + +rte_lcore_id (351 samples, 0.01%) + + + +route_out_net_lookup (708 samples, 0.03%) + + + +pkt_type_get (3,599 samples, 0.14%) + + + +__drand48_iterate (531 samples, 0.02%) + + + +dp_vs_proto_lookup (345 samples, 0.01%) + + + +rte_arch_bswap16 (269 samples, 0.01%) + + + +tcp_in_remove_ts (574 samples, 0.02%) + + + +rte_rdtsc (4,060 samples, 0.15%) + + + +rte_arch_bswap16 (1,037 samples, 0.04%) + + + +sa_pool_fetch (2,376 samples, 0.09%) + + + +rte_pktmbuf_prepend (395 samples, 0.01%) + + + +list_empty (366 samples, 0.01%) + + + +netif_xmit (3,483 samples, 0.13%) + + + +lcore_process_redirect_ring (417 samples, 0.02%) + + + +netif_rx_burst (11,619 samples, 0.44%) + + + +inet_addr_ifa_get (10,824 samples, 0.41%) + + + +system_call_fastpath (1,233 samples, 0.05%) + + + +dp_vs_fast_outxmit_fnat (18,538 samples, 0.70%) + + + +rte_is_zero_ether_addr (394 samples, 0.01%) + + + +dpvs_time_rand_delay (332 samples, 0.01%) + + + +rte_ether_addr_copy (464 samples, 0.02%) + + + +mbuf_header_pointer (285 samples, 0.01%) + + + +cpu_startup_entry (11,821 samples, 0.45%) + + + +__dpvs_timer_sched (4,130 samples, 0.16%) + + + +rte_raw_cksum (1,791 samples, 0.07%) + + + +rte_atomic32_dec (224 samples, 0.01%) + + + +tcp_fnat_out_handler (11,172 samples, 0.42%) + + + +__dp_vs_service_get (852 samples, 0.03%) + + + +lcore_process_redirect_ring (420 samples, 0.02%) + + + +OPENSSL_cleanse (275 samples, 0.01%) + + + +neigh_confirm (3,782 samples, 0.14%) + + + +OPENSSL_cleanse (253 samples, 0.01%) + + + +route_out_local_lookup (982 samples, 0.04%) + + + +dp_vs_xmit_fnat (37,765 samples, 1.43%) + + + +rte_get_tsc_cycles (4,126 samples, 0.16%) + + + +mbuf_may_pull (490 samples, 0.02%) + + + +__rte_raw_cksum_reduce (393 samples, 0.01%) + + + +__rte_raw_cksum_reduce (555 samples, 0.02%) + + + +rte_ipv4_phdr_cksum (1,026 samples, 0.04%) + + + +list_add_tail (1,194 samples, 0.05%) + + + +af_inet_hooks (294 samples, 0.01%) + + + +ip4_hdrlen (451 samples, 0.02%) + + + +port_tab_hashkey (293 samples, 0.01%) + + + +idev_put (787 samples, 0.03%) + + + +mbuf_may_pull (610 samples, 0.02%) + + + +ipv4_rcv (196,639 samples, 7.47%) +ipv4_rcv + + +__rte_jhash_3words (4,328 samples, 0.16%) + + + +list_add_tail (735 samples, 0.03%) + + + +list_move_tail (521 samples, 0.02%) + + + +ip4_is_frag (556 samples, 0.02%) + + + +lcore_job_timer_manage (47,306 samples, 1.80%) + + + +neigh_lookup_entry (859 samples, 0.03%) + + + +dp_vs_redirect_ring_proc (224 samples, 0.01%) + + + +this_lcore_sched (276 samples, 0.01%) + + + +slave_lcore_loop_func (5,179 samples, 0.20%) + + + +rte_timer_tick_cb (37,859 samples, 1.44%) + + + +__dp_vs_conn_hash (6,217 samples, 0.24%) + + + +lcore_process_redirect_ring (431 samples, 0.02%) + + + +inet_addr_equal (428 samples, 0.02%) + + + +dp_vs_conn_refresh_timer (7,101 samples, 0.27%) + + + +__rte_raw_cksum_reduce (648 samples, 0.02%) + + + +sa_release (18,237 samples, 0.69%) + + + +ip4_is_frag (310 samples, 0.01%) + + + +rte_atomic32_inc (1,308 samples, 0.05%) + + + +INET_HOOK (5,187 samples, 0.20%) + + + +lcore_job_xmit (12,228 samples, 0.46%) + + + +dp_vs_laddr_bind (26,609 samples, 1.01%) + + + +__rte_jhash_3words (1,345 samples, 0.05%) + + + +lcore_process_redirect_ring (427 samples, 0.02%) + + + +timer_expire (36,199 samples, 1.37%) + + + +dp_vs_in (178,035 samples, 6.76%) +dp_vs_in + + +rte_ipv4_phdr_cksum (2,171 samples, 0.08%) + + + +netif_xmit (2,384 samples, 0.09%) + + + +dp_vs_conn_is_in_timer (262 samples, 0.01%) + + + +dp_vs_conn_hash (6,343 samples, 0.24%) + + + +__rte_jhash_3words (1,348 samples, 0.05%) + + + +rte_pktmbuf_prepend (231 samples, 0.01%) + + + +__dp_vs_fast_xmit_fnat4 (19,177 samples, 0.73%) + + + +whtlst_hashkey (1,251 samples, 0.05%) + + + +all (2,633,369 samples, 100%) + + + +tcp_state_idx (698 samples, 0.03%) + + + +netif_port_get (2,435 samples, 0.09%) + + + +dp_vs_conn_new (42,703 samples, 1.62%) + + + +mbuf_may_pull (477 samples, 0.02%) + + + +__dp_vs_xmit_fnat4 (36,325 samples, 1.38%) + + + +[unknown] (871 samples, 0.03%) + + + +port_tab_hashkey (294 samples, 0.01%) + + + +dp_vs_conn_bind_dest (1,010 samples, 0.04%) + + + +dp_vs_conn_is_in_timer (291 samples, 0.01%) + + + +arch_cpu_idle (7,869 samples, 0.30%) + + + +ifa_lookup (7,383 samples, 0.28%) + + + +dp_vs_blklst_lookup (4,602 samples, 0.17%) + + + +rte_timer_tick_cb (37,056 samples, 1.41%) + + + +sa_pool_destroy (3,443 samples, 0.13%) + + + +_raw_spin_unlock_irqrestore (294 samples, 0.01%) + + + +dp_vs_synproxy_syn_rcv (3,441 samples, 0.13%) + + + +dp_vs_conn_free (583 samples, 0.02%) + + + +lcore_job_recv_fwd (238,565 samples, 9.06%) +lcore_job_rec.. + + +rte_arch_bswap16 (274 samples, 0.01%) + + + +dp_vs_dest_put (317 samples, 0.01%) + + + +rte_get_timer_cycles (4,555 samples, 0.17%) + + + +netif_port_get (396 samples, 0.02%) + + + +dp_vs_fill_iphdr (1,741 samples, 0.07%) + + + +tcp_conn_expire (2,555 samples, 0.10%) + + + +__lll_unlock_wake_private (491 samples, 0.02%) + + + +dp_vs_save_xmit_info (1,563 samples, 0.06%) + + + +dp_vs_service_lookup (1,332 samples, 0.05%) + + + +ip4_is_frag (320 samples, 0.01%) + + + +lcore_stats_burst (1,152 samples, 0.04%) + + + +list_add_tail (1,211 samples, 0.05%) + + + +dp_vs_stats_conn (260 samples, 0.01%) + + + +__clock_gettime (631 samples, 0.02%) + + + +dp_vs_stats_in (1,377 samples, 0.05%) + + + +netif_xmit (1,138 samples, 0.04%) + + + +rte_ether_addr_copy (343 samples, 0.01%) + + + +netif_hard_xmit (1,941 samples, 0.07%) + + + +inet_addr_ifa_put (319 samples, 0.01%) + + + +slave_lcore_loop_func (5,612 samples, 0.21%) + + + +eth_addr_equal (1,028 samples, 0.04%) + + + +list_add_tail (225 samples, 0.01%) + + + +__memset_sse2 (580 samples, 0.02%) + + + +inet_addr_ifa_put (308 samples, 0.01%) + + + +rte_eth_rx_burst (10,932 samples, 0.42%) + + + +tcp_in_add_toa (8,157 samples, 0.31%) + + + +this_lcore_sched (239 samples, 0.01%) + + + +dp_vs_dest_is_avail (528 samples, 0.02%) + + + +rte_ether_addr_copy (334 samples, 0.01%) + + + +do_lcore_job (809 samples, 0.03%) + + + +dp_vs_pre_routing (7,748 samples, 0.29%) + + + +dp_vs_conn_set_timeout (670 samples, 0.03%) + + + +qsch_sched_all (307 samples, 0.01%) + + + +dp_vs_fill_iphdr (1,626 samples, 0.06%) + + + +inet_addr_equal (628 samples, 0.02%) + + + +neigh_key_cmp (694 samples, 0.03%) + + + +neigh_key_cmp (741 samples, 0.03%) + + + +blklst_hashkey (1,663 samples, 0.06%) + + + +ip4_hdrlen (363 samples, 0.01%) + + + +tcp_conn_expire (2,531 samples, 0.10%) + + + +rte_atomic32_dec (811 samples, 0.03%) + + + +rte_raw_cksum (1,223 samples, 0.05%) + + + +timeval_to_ticks (277 samples, 0.01%) + + + +inet_addr_ifa_get (11,089 samples, 0.42%) + + + +__clock_gettime (598 samples, 0.02%) + + + +qsch_sched_all (718 samples, 0.03%) + + + +route4_output (1,716 samples, 0.07%) + + + +dp_vs_save_xmit_info (1,586 samples, 0.06%) + + + +ifa_put (280 samples, 0.01%) + + + +rte_atomic32_inc (591 samples, 0.02%) + + + +list_add (2,497 samples, 0.09%) + + + +lcore_job_timer_manage (47,788 samples, 1.81%) +l.. + + +__lll_lock_wait_private (1,327 samples, 0.05%) + + + +dpvs_timer_sched (2,678 samples, 0.10%) + + + +dp_vs_dest_is_avail (589 samples, 0.02%) + + + +rte_atomic32_inc (1,364 samples, 0.05%) + + + +mbuf_header_pointer (425 samples, 0.02%) + + + +mbuf_may_pull (654 samples, 0.02%) + + + +eal_thread_loop (323,390 samples, 12.28%) +eal_thread_loop + + +mbuf_header_pointer (513 samples, 0.02%) + + + +rte_timer_tick_cb (37,600 samples, 1.43%) + + + +rte_atomic32_inc (1,608 samples, 0.06%) + + + +tcp_out_save_seq (722 samples, 0.03%) + + + +futex_wait_setup (484 samples, 0.02%) + + + +__random (2,916 samples, 0.11%) + + + +msg_slave_process (5,048 samples, 0.19%) + + + +__dp_vs_service_get (846 samples, 0.03%) + + + +list_empty (362 samples, 0.01%) + + + +tcp_state_trans (7,669 samples, 0.29%) + + + +__dp_vs_out_xmit_fnat4 (20,474 samples, 0.78%) + + + +net_cmp (343 samples, 0.01%) + + + +qsch_sched_all (282 samples, 0.01%) + + + +ip4_hdrlen (457 samples, 0.02%) + + + +tcp_conn_expire (2,574 samples, 0.10%) + + + +__clock_gettime (292 samples, 0.01%) + + + +__drand48_iterate (619 samples, 0.02%) + + + +rte_raw_cksum (1,716 samples, 0.07%) + + + +inet_addr_equal (653 samples, 0.02%) + + + +OPENSSL_cleanse (253 samples, 0.01%) + + + +dp_vs_conn_put (9,149 samples, 0.35%) + + + +mbuf_header_pointer (279 samples, 0.01%) + + + +rte_atomic32_inc (565 samples, 0.02%) + + + +dpvs_timer_update (6,396 samples, 0.24%) + + + +sys_futex (1,155 samples, 0.04%) + + + +ip4_hdrlen (501 samples, 0.02%) + + + +inet_addr_ifa_get (11,238 samples, 0.43%) + + + +netif_xmit (3,670 samples, 0.14%) + + + +INET_HOOK (5,287 samples, 0.20%) + + + +idev_put (774 samples, 0.03%) + + + +pkt_type_get (3,359 samples, 0.13%) + + + +list_move_tail (520 samples, 0.02%) + + + +list_del (1,530 samples, 0.06%) + + + +dp_vs_dest_is_avail (534 samples, 0.02%) + + + +lcore-worker-1 (328,863 samples, 12.49%) +lcore-worker-1 + + +timeval_to_ticks (292 samples, 0.01%) + + + +rte_timer_tick_cb (37,857 samples, 1.44%) + + + +__random (2,973 samples, 0.11%) + + + +tcp_in_init_seq (1,787 samples, 0.07%) + + + +timeval_to_ticks (268 samples, 0.01%) + + + +dp_vs_whtlst_allow (4,333 samples, 0.16%) + + + +ip4_hdr (226 samples, 0.01%) + + + +rte_atomic32_dec_and_test (316 samples, 0.01%) + + + +xmit_outbound (33,923 samples, 1.29%) + + + +__random_r (919 samples, 0.03%) + + + +dpvs_time_rand_delay (472 samples, 0.02%) + + + +wake_up_q (279 samples, 0.01%) + + + +rte_atomic32_read (262 samples, 0.01%) + + + +netif_port_get (312 samples, 0.01%) + + + +rte_is_zero_ether_addr (479 samples, 0.02%) + + + +__rte_jhash_3words (4,203 samples, 0.16%) + + + +qsch_sched_all (707 samples, 0.03%) + + + +idev_put (789 samples, 0.03%) + + + +dev_get_idev (1,541 samples, 0.06%) + + + +dp_vs_service_hashkey (354 samples, 0.01%) + + + +sa_pool_hash (422 samples, 0.02%) + + + +sys_futex (915 samples, 0.03%) + + + +inet_is_addr_any (536 samples, 0.02%) + + + +mbuf_header_pointer (516 samples, 0.02%) + + + +OPENSSL_cleanse (267 samples, 0.01%) + + + +list_add_tail (1,271 samples, 0.05%) + + + +rte_arch_bswap16 (308 samples, 0.01%) + + + +rte_arch_bswap16 (641 samples, 0.02%) + + + +rte_get_timer_cycles (4,445 samples, 0.17%) + + + +rte_atomic32_dec_and_test (3,331 samples, 0.13%) + + + +lcore_process_arp_ring (5,337 samples, 0.20%) + + + +__dp_vs_fast_xmit_fnat4 (18,660 samples, 0.71%) + + + +neigh_lookup_entry (859 samples, 0.03%) + + + +ixgbe_xmit_pkts (7,145 samples, 0.27%) + + + +inet_addr_equal (406 samples, 0.02%) + + + +__dp_vs_out_xmit_fnat4 (21,069 samples, 0.80%) + + + +inet_addr_ifa_get (10,746 samples, 0.41%) + + + +mbuf_may_pull (324 samples, 0.01%) + + + +dp_vs_dest_is_valid (546 samples, 0.02%) + + + +rte_arch_bswap32 (272 samples, 0.01%) + + + +__clock_gettime (629 samples, 0.02%) + + + +netif_hard_xmit (944 samples, 0.04%) + + + +tcp_fnat_out_handler (11,218 samples, 0.43%) + + + +__dp_vs_service_get (1,204 samples, 0.05%) + + + +dpvs_job_loop (321,509 samples, 12.21%) +dpvs_job_loop + + +ip4_hdrlen (506 samples, 0.02%) + + + +__drand48_iterate (566 samples, 0.02%) + + + +neigh_lookup_entry (757 samples, 0.03%) + + + +rte_arch_bswap16 (242 samples, 0.01%) + + + +ip4_hdrlen (379 samples, 0.01%) + + + +[libcrypto.so.1.0.2k] (301 samples, 0.01%) + + + +rte_arch_bswap32 (928 samples, 0.04%) + + + +__random_r (1,075 samples, 0.04%) + + + +ifa_put (284 samples, 0.01%) + + + +neigh_hashkey (692 samples, 0.03%) + + + +idev_put (839 samples, 0.03%) + + + +list_empty (357 samples, 0.01%) + + + +tcp_out_adjust_mss (273 samples, 0.01%) + + + +rte_atomic32_dec_and_test (296 samples, 0.01%) + + + +timeval_to_ticks (696 samples, 0.03%) + + + +dp_vs_rr_schedule (963 samples, 0.04%) + + + +list_add (2,415 samples, 0.09%) + + + +__dpvs_timer_sched (4,290 samples, 0.16%) + + + +inet_addr_equal (2,153 samples, 0.08%) + + + +list_move_tail (575 samples, 0.02%) + + + +lcore_stats_burst (1,126 samples, 0.04%) + + + +lcore_process_arp_ring (5,505 samples, 0.21%) + + + +__list_del (523 samples, 0.02%) + + + +dp_vs_xmit_fnat (38,073 samples, 1.45%) + + + +__memset_sse2 (257 samples, 0.01%) + + + +dpvs_timer_sched (2,506 samples, 0.10%) + + + +dp_vs_service_hashkey (461 samples, 0.02%) + + + +mbuf_may_pull (304 samples, 0.01%) + + + +__list_add (420 samples, 0.02%) + + + +do_lcore_job (309,084 samples, 11.74%) +do_lcore_job + + +list_add_tail (1,473 samples, 0.06%) + + + +rte_prefetch0 (293 samples, 0.01%) + + + +__list_add (377 samples, 0.01%) + + + +dp_vs_in (178,255 samples, 6.77%) +dp_vs_in + + +futex_wait (815 samples, 0.03%) + + + +rte_atomic32_dec (792 samples, 0.03%) + + + +dp_vs_whtlst_allow (4,149 samples, 0.16%) + + + +list_add_tail (1,500 samples, 0.06%) + + + +rte_ipv4_phdr_cksum (3,284 samples, 0.12%) + + + +mbuf_header_pointer (460 samples, 0.02%) + + + +rte_arch_bswap16 (668 samples, 0.03%) + + + +dp_vs_dest_put (342 samples, 0.01%) + + + +__dp_vs_conn_hash (6,018 samples, 0.23%) + + + +ixgbe_recv_pkts_bulk_alloc (7,818 samples, 0.30%) + + + +list_empty (397 samples, 0.02%) + + + +sa_pool_hash (429 samples, 0.02%) + + + +get_level_ticks (380 samples, 0.01%) + + + +dp_vs_conn_put (8,751 samples, 0.33%) + + + +tcp_fnat_out_handler (10,908 samples, 0.41%) + + + +tcp_in_remove_ts (534 samples, 0.02%) + + + +rte_atomic32_dec_and_test (243 samples, 0.01%) + + + +xmit_inbound (47,933 samples, 1.82%) +x.. + + +dp_vs_dest_put (307 samples, 0.01%) + + + +rte_get_tsc_cycles (4,115 samples, 0.16%) + + + +sa_pool_hash (397 samples, 0.02%) + + + +rte_atomic32_inc (586 samples, 0.02%) + + + +lcore_job_timer_manage (47,384 samples, 1.80%) + + + +rte_atomic32_dec (1,497 samples, 0.06%) + + + +dp_vs_service_hashkey (405 samples, 0.02%) + + + +rte_atomic32_dec (795 samples, 0.03%) + + + +system_call_fastpath (760 samples, 0.03%) + + + +put_laddr (306 samples, 0.01%) + + + +rte_atomic32_inc (379 samples, 0.01%) + + + +dp_vs_fast_outxmit_fnat (19,278 samples, 0.73%) + + + +qsch_sched_all (775 samples, 0.03%) + + + +dp_vs_synproxy_snat_handler (414 samples, 0.02%) + + + +do_lcore_job (790 samples, 0.03%) + + + +rte_get_timer_cycles (4,406 samples, 0.17%) + + + +mbuf_header_pointer (250 samples, 0.01%) + + + +neigh_lookup_entry (749 samples, 0.03%) + + + +dp_vs_conn_is_in_timer (289 samples, 0.01%) + + + +rte_raw_cksum (1,231 samples, 0.05%) + + + +dp_vs_conn_detach_timer (369 samples, 0.01%) + + + +netif_rx_burst (225 samples, 0.01%) + + + +rte_lcore_id (373 samples, 0.01%) + + + +neigh_output (3,259 samples, 0.12%) + + + +port_tab_hashkey (275 samples, 0.01%) + + + +rte_rdtsc (4,107 samples, 0.16%) + + + +__memset_sse2 (295 samples, 0.01%) + + + +rte_eth_rx_burst (10,803 samples, 0.41%) + + + +__list_del_entry (272 samples, 0.01%) + + + +ip4_hdrlen (883 samples, 0.03%) + + + +rte_raw_cksum (577 samples, 0.02%) + + + +rte_atomic32_inc (1,563 samples, 0.06%) + + + +dp_vs_laddr_bind (27,207 samples, 1.03%) + + + +rte_atomic32_inc (1,709 samples, 0.06%) + + + +dev_get_idev (1,447 samples, 0.05%) + + + +neigh_key_cmp (332 samples, 0.01%) + + + +__memset_sse2 (386 samples, 0.01%) + + + +neigh_confirm (3,541 samples, 0.13%) + + + +dp_vs_stats_out (1,376 samples, 0.05%) + + + +ip4_hdrlen (411 samples, 0.02%) + + + +inet_addr_equal (665 samples, 0.03%) + + + +neigh_output (3,323 samples, 0.13%) + + + +lcore_process_packets (270 samples, 0.01%) + + + +dp_vs_conn_set_timeout (656 samples, 0.02%) + + + +__random_r (944 samples, 0.04%) + + + +__rte_raw_cksum_reduce (350 samples, 0.01%) + + + +neigh_hashkey (686 samples, 0.03%) + + + +tcp_hdr (295 samples, 0.01%) + + + +tcp_hdr (293 samples, 0.01%) + + + +list_move_tail (526 samples, 0.02%) + + + +rte_atomic32_read (277 samples, 0.01%) + + + +eth_type_parse (1,249 samples, 0.05%) + + + +__dp_vs_out_xmit_fnat4 (21,588 samples, 0.82%) + + + +tcp_state_idx (752 samples, 0.03%) + + + +dp_vs_conn_set_timeout (635 samples, 0.02%) + + + +dp_vs_service_lookup (1,349 samples, 0.05%) + + + +netif_rx_burst (246 samples, 0.01%) + + + +rte_atomic32_inc (346 samples, 0.01%) + + + +timeval_to_ticks (684 samples, 0.03%) + + + +dp_vs_conn_expire (36,456 samples, 1.38%) + + + +do_lcore_job (309,192 samples, 11.74%) +do_lcore_job + + +dp_vs_conn_resend_packets (1,060 samples, 0.04%) + + + +dp_vs_conn_attach_timer (2,998 samples, 0.11%) + + + +__lll_lock_wait_private (1,211 samples, 0.05%) + + + +__lll_unlock_wake_private (545 samples, 0.02%) + + + +__dpvs_timer_sched (3,771 samples, 0.14%) + + + +__rte_raw_cksum (271 samples, 0.01%) + + + +rte_atomic32_dec (273 samples, 0.01%) + + + +rte_atomic32_inc (313 samples, 0.01%) + + + +sa_fetch (17,988 samples, 0.68%) + + + +rte_ether_addr_copy (311 samples, 0.01%) + + + +dpvs_time_rand_delay (307 samples, 0.01%) + + + +rte_atomic32_dec (4,851 samples, 0.18%) + + + +__rte_raw_cksum (827 samples, 0.03%) + + + +rte_ipv4_phdr_cksum (1,063 samples, 0.04%) + + + +netif_hard_xmit (1,922 samples, 0.07%) + + + +ip4_hdr (329 samples, 0.01%) + + + +rte_arch_bswap16 (311 samples, 0.01%) + + + +do_futex (846 samples, 0.03%) + + + +inet_addr_equal (2,449 samples, 0.09%) + + + +dev_get_idev (1,480 samples, 0.06%) + + + +ip4_hdrlen (334 samples, 0.01%) + + + +neigh_output (3,409 samples, 0.13%) + + + +rte_eth_rx_burst (11,039 samples, 0.42%) + + + +rte_atomic32_dec (226 samples, 0.01%) + + + +sa4_fetch (17,516 samples, 0.67%) + + + +dp_vs_conn_refresh_timer (6,923 samples, 0.26%) + + + +ip4_hdr (399 samples, 0.02%) + + + +rte_ipv4_phdr_cksum (3,297 samples, 0.13%) + + + +inet_is_addr_any (509 samples, 0.02%) + + + +__lll_unlock_wake_private (1,713 samples, 0.07%) + + + +rte_arch_bswap16 (257 samples, 0.01%) + + + +list_empty (372 samples, 0.01%) + + + +dp_vs_blklst_lookup (4,685 samples, 0.18%) + + + +tcp_in_add_toa (2,045 samples, 0.08%) + + + +mbuf_header_pointer (487 samples, 0.02%) + + + +futex_wake (833 samples, 0.03%) + + + +eth_type_parse (1,319 samples, 0.05%) + + + +futex_wake (827 samples, 0.03%) + + + +netif_port_get (2,430 samples, 0.09%) + + + +get_level_ticks (405 samples, 0.02%) + + + +__dp_vs_xmit_fnat4 (37,807 samples, 1.44%) + + + +rte_raw_cksum (1,250 samples, 0.05%) + + + +dp_vs_conn_is_in_timer (297 samples, 0.01%) + + + +rte_atomic32_inc (1,324 samples, 0.05%) + + + +futex_wait_setup (460 samples, 0.02%) + + + +netif_port_get (273 samples, 0.01%) + + + +rte_timer_tick_cb (37,052 samples, 1.41%) + + + +timeval_to_ticks (669 samples, 0.03%) + + + +sys_futex (1,182 samples, 0.04%) + + + +netif_rcv_mbuf (205,462 samples, 7.80%) +netif_rcv_m.. + + +lcore_process_packets (256 samples, 0.01%) + + + +tcp_in_init_seq (1,837 samples, 0.07%) + + + +dp_vs_conn_unhash (6,206 samples, 0.24%) + + + +get_level_ticks (368 samples, 0.01%) + + + +dp_vs_conn_hash (6,097 samples, 0.23%) + + + +dp_vs_conn_free (505 samples, 0.02%) + + + +blklst_hashkey (1,658 samples, 0.06%) + + + +__dp_vs_service_get (878 samples, 0.03%) + + + +netif_port_get (2,508 samples, 0.10%) + + + +netif_tx_burst (8,340 samples, 0.32%) + + + +dev_get_idev (1,592 samples, 0.06%) + + + +mbuf_header_pointer (465 samples, 0.02%) + + + +finish_task_switch (1,119 samples, 0.04%) + + + +netif_xmit (1,234 samples, 0.05%) + + + +tcp_hdr (485 samples, 0.02%) + + + +sa_pool_fetch (2,302 samples, 0.09%) + + + +[unknown] (800 samples, 0.03%) + + + +rte_atomic32_dec_and_test (3,354 samples, 0.13%) + + + +INIT_LIST_HEAD (331 samples, 0.01%) + + + +rte_atomic32_dec (251 samples, 0.01%) + + + +lcore_process_redirect_ring (225 samples, 0.01%) + + + +tcp_state_idx (763 samples, 0.03%) + + + +rte_prefetch0 (316 samples, 0.01%) + + + +dp_vs_whtlst_allow (4,425 samples, 0.17%) + + + +ip4_hdr (421 samples, 0.02%) + + + +ifa_put (298 samples, 0.01%) + + + +tcp_in_add_toa (2,279 samples, 0.09%) + + + +rte_arch_bswap32 (924 samples, 0.04%) + + + +dp_vs_service_lookup (980 samples, 0.04%) + + + +rte_raw_cksum (619 samples, 0.02%) + + + +ip4_hdrlen (944 samples, 0.04%) + + + +neigh_key_cmp (324 samples, 0.01%) + + + +rte_atomic32_dec (286 samples, 0.01%) + + + +dp_vs_in (180,174 samples, 6.84%) +dp_vs_in + + +tcp_out_adjust_mss (270 samples, 0.01%) + + + +sa4_fetch (17,483 samples, 0.66%) + + + +tcp_hdr (484 samples, 0.02%) + + + +[unknown] (834 samples, 0.03%) + + + +list_add_tail (686 samples, 0.03%) + + + +rte_pktmbuf_prepend (232 samples, 0.01%) + + + +OPENSSL_cleanse (306 samples, 0.01%) + + + +futex_wait (778 samples, 0.03%) + + + +rte_atomic32_dec_and_test (261 samples, 0.01%) + + + +rte_lcore_id (335 samples, 0.01%) + + + +net_cmp (326 samples, 0.01%) + + + +inet_is_addr_any (496 samples, 0.02%) + + + +dev_get_idev (1,173 samples, 0.04%) + + + +af_inet_hooks (291 samples, 0.01%) + + + +[unknown] (1,778 samples, 0.07%) + + + +dp_vs_dest_is_avail (378 samples, 0.01%) + + + +netif_xmit (2,412 samples, 0.09%) + + + +wake_up_q (297 samples, 0.01%) + + + +ipv4_output (5,028 samples, 0.19%) + + + +tcp_send_csum (2,961 samples, 0.11%) + + + +rte_atomic32_dec (762 samples, 0.03%) + + + +tcp_hdr (465 samples, 0.02%) + + + +dp_vs_conn_get (17,275 samples, 0.66%) + + + +neigh_hashkey (277 samples, 0.01%) + + + +dp_vs_conn_put (8,974 samples, 0.34%) + + + +__dpvs_timer_sched (3,915 samples, 0.15%) + + + +tcp_in_add_toa (2,192 samples, 0.08%) + + + +get_level_ticks (425 samples, 0.02%) + + + +rte_pktmbuf_lastseg (229 samples, 0.01%) + + + +timer_expire (37,128 samples, 1.41%) + + + +rte_atomic32_inc (1,365 samples, 0.05%) + + + +__dpvs_timer_sched (3,797 samples, 0.14%) + + + +__random_r (854 samples, 0.03%) + + + +mbuf_may_pull (561 samples, 0.02%) + + + +dev_get_idev (1,324 samples, 0.05%) + + + +ip4_is_frag (323 samples, 0.01%) + + + +eal_thread_loop (321,966 samples, 12.23%) +eal_thread_loop + + +tcp_fnat_in_handler (13,676 samples, 0.52%) + + + +rte_arch_bswap32 (273 samples, 0.01%) + + + +sa_pool_destroy (3,468 samples, 0.13%) + + + +route_out_net_lookup (692 samples, 0.03%) + + + +ip4_hdr (330 samples, 0.01%) + + + +rte_arch_bswap32 (949 samples, 0.04%) + + + +__list_del (543 samples, 0.02%) + + + +lcore_process_packets (213,095 samples, 8.09%) +lcore_proce.. + + +rte_jhash_3words (4,674 samples, 0.18%) + + + +netif_update_worker_loop_cnt (389 samples, 0.01%) + + + +neigh_hashkey (628 samples, 0.02%) + + + +dp_vs_conn_set_timeout (251 samples, 0.01%) + + + +sys_futex (1,181 samples, 0.04%) + + + +__list_del (335 samples, 0.01%) + + + +rte_raw_cksum (1,160 samples, 0.04%) + + + +__dp_vs_conn_hash (5,965 samples, 0.23%) + + + +tcp_conn_expire (2,599 samples, 0.10%) + + + +get_level_ticks (386 samples, 0.01%) + + + +dp_vs_conn_set_timeout (257 samples, 0.01%) + + + +list_add_tail (1,486 samples, 0.06%) + + + +__list_del (558 samples, 0.02%) + + + +rte_ipv4_phdr_cksum (3,336 samples, 0.13%) + + + +get_level_ticks (372 samples, 0.01%) + + + +rte_arch_bswap16 (296 samples, 0.01%) + + + +rte_lcore_id (373 samples, 0.01%) + + + +tcp_fnat_in_handler (13,881 samples, 0.53%) + + + +rte_atomic32_dec (1,544 samples, 0.06%) + + + +idev_put (643 samples, 0.02%) + + + +tcp_hdr (419 samples, 0.02%) + + + +dev_get_idev (1,504 samples, 0.06%) + + + +lcore_process_packets (252 samples, 0.01%) + + + +dp_vs_conn_bind_dest (1,044 samples, 0.04%) + + + +tcp_conn_sched (46,275 samples, 1.76%) + + + +rte_atomic32_inc (237 samples, 0.01%) + + + +rte_is_zero_ether_addr (514 samples, 0.02%) + + + +rte_arch_bswap16 (490 samples, 0.02%) + + + +dp_vs_conn_detach_timer (377 samples, 0.01%) + + + +route_out_local_lookup (940 samples, 0.04%) + + + +ip4_hdr (337 samples, 0.01%) + + + +idev_put (743 samples, 0.03%) + + + +rte_jhash_3words (4,600 samples, 0.17%) + + + +lcore_job_xmit (12,376 samples, 0.47%) + + + +do_futex (1,038 samples, 0.04%) + + + +dp_vs_rr_schedule (916 samples, 0.03%) + + + +netif_port_get (347 samples, 0.01%) + + + +dp_vs_conn_attach_timer (3,036 samples, 0.12%) + + + +tcp_in_init_seq (1,813 samples, 0.07%) + + + +tcp_out_adjust_seq (1,451 samples, 0.06%) + + + +tcp_conn_lookup (34,098 samples, 1.29%) + + + +system_call_fastpath (944 samples, 0.04%) + + + +list_add_tail (1,496 samples, 0.06%) + + + +dp_vs_xmit_fnat (37,154 samples, 1.41%) + + + +__list_del (288 samples, 0.01%) + + + +[unknown] (1,857 samples, 0.07%) + + + +rte_lcore_id (410 samples, 0.02%) + + + +cpuidle_enter_state (7,354 samples, 0.28%) + + + +dp_vs_conn_bind_dest (1,126 samples, 0.04%) + + + +lcore_process_packets (213,859 samples, 8.12%) +lcore_proce.. + + +ixgbe_xmit_pkts (6,909 samples, 0.26%) + + + +ifa_lookup (7,448 samples, 0.28%) + + + +__dp_vs_pre_routing (7,057 samples, 0.27%) + + + +dp_vs_conn_new (42,757 samples, 1.62%) + + + +start_cpu (11,948 samples, 0.45%) + + + +rte_atomic32_dec_and_test (255 samples, 0.01%) + + + +__dp_vs_out_xmit_fnat4 (20,282 samples, 0.77%) + + + +rte_atomic32_inc (2,805 samples, 0.11%) + + + +neigh_entry_state_trans (605 samples, 0.02%) + + + +inet_is_addr_any (544 samples, 0.02%) + + + +rte_lcore_id (356 samples, 0.01%) + + + +list_del (1,432 samples, 0.05%) + + + +rte_arch_bswap16 (535 samples, 0.02%) + + + +lcore-worker-2 (327,670 samples, 12.44%) +lcore-worker-2 + + +ip4_hdr (349 samples, 0.01%) + + + +INET_HOOK (191,708 samples, 7.28%) +INET_HOOK + + +rte_atomic32_inc (599 samples, 0.02%) + + + +dp_vs_schedule (45,224 samples, 1.72%) + + + +cpuidle_idle_call (7,494 samples, 0.28%) + + + +neigh_key_cmp (632 samples, 0.02%) + + + +list_empty (2,336 samples, 0.09%) + + + +dp_vs_conn_unhash (6,455 samples, 0.25%) + + + +rte_eth_tx_burst (7,374 samples, 0.28%) + + + +dp_vs_conn_resend_packets (1,009 samples, 0.04%) + + + +dpvs_timer_update (6,585 samples, 0.25%) + + + +rte_arch_bswap32 (298 samples, 0.01%) + + + +inet_addr_ifa_get (10,354 samples, 0.39%) + + + +__dp_vs_in (178,112 samples, 6.76%) +__dp_vs_in + + +__lll_lock_wait_private (1,469 samples, 0.06%) + + + +sa_pool_fetch (2,383 samples, 0.09%) + + + +dp_vs_blklst_lookup (4,713 samples, 0.18%) + + + +__dp_vs_fast_xmit_fnat4 (18,986 samples, 0.72%) + + + +__random (2,906 samples, 0.11%) + + + +dp_vs_conn_is_in_timer (286 samples, 0.01%) + + + +rte_arch_bswap16 (912 samples, 0.03%) + + + +tcp_conn_sched (46,913 samples, 1.78%) + + + +__dp_vs_fast_outxmit_fnat4 (19,008 samples, 0.72%) + + + +tcp_in_adjust_seq (338 samples, 0.01%) + + + +route4_output (1,775 samples, 0.07%) + + + +eth_addr_equal (1,037 samples, 0.04%) + + + +rte_ipv4_phdr_cksum (3,354 samples, 0.13%) + + + +dp_vs_conn_resend_packets (897 samples, 0.03%) + + + +__list_del (567 samples, 0.02%) + + + +dp_vs_proto_lookup (404 samples, 0.02%) + + + +rte_ether_addr_copy (331 samples, 0.01%) + + + +ifa_lookup (7,906 samples, 0.30%) + + + +eth_type_parse (1,292 samples, 0.05%) + + + +ip4_hdrlen (358 samples, 0.01%) + + + +rte_pktmbuf_prepend (441 samples, 0.02%) + + + +ip4_is_frag (319 samples, 0.01%) + + + +dp_vs_schedule (44,575 samples, 1.69%) + + + +[unknown] (1,784 samples, 0.07%) + + + +rte_pktmbuf_append (230 samples, 0.01%) + + + +__lll_unlock_wake_private (1,897 samples, 0.07%) + + + +INET_HOOK (5,116 samples, 0.19%) + + + +rte_atomic32_dec (1,482 samples, 0.06%) + + + +sa4_fetch (17,283 samples, 0.66%) + + + +rte_atomic32_dec_and_test (320 samples, 0.01%) + + + +dp_vs_conn_alloc (1,354 samples, 0.05%) + + + +mbuf_header_pointer (254 samples, 0.01%) + + + +rte_arch_bswap16 (1,051 samples, 0.04%) + + + +dp_vs_conn_attach_timer (3,006 samples, 0.11%) + + + +__memset_sse2 (575 samples, 0.02%) + + + +tcp_fnat_in_handler (13,672 samples, 0.52%) + + + +inet_addr_equal (411 samples, 0.02%) + + + +dp_vs_conn_new (42,892 samples, 1.63%) + + + +dp_vs_synproxy_syn_rcv (3,276 samples, 0.12%) + + + +list_del (853 samples, 0.03%) + + + +rte_raw_cksum (589 samples, 0.02%) + + + +INET_HOOK (5,330 samples, 0.20%) + + + +rte_timer_tick_cb (37,940 samples, 1.44%) + + + +__vdso_clock_gettime (478 samples, 0.02%) + + + +tcp_in_adjust_seq (374 samples, 0.01%) + + + +dp_vs_proto_lookup (395 samples, 0.01%) + + + +tcp_conn_sched (46,285 samples, 1.76%) + + + +netif_xmit (1,195 samples, 0.05%) + + + +dp_vs_xmit_fnat (38,356 samples, 1.46%) + + + +rte_pktmbuf_append (236 samples, 0.01%) + + + +xmit_inbound (48,523 samples, 1.84%) +x.. + + +tcp_out_adjust_mss (279 samples, 0.01%) + + + +port_tab_hashkey (271 samples, 0.01%) + + + +__lll_unlock_wake_private (385 samples, 0.01%) + + + +dp_vs_laddr_bind (26,674 samples, 1.01%) + + + +lcore_process_packets (212,686 samples, 8.08%) +lcore_proce.. + + +dp_vs_stats_conn (289 samples, 0.01%) + + + +netif_hard_xmit (1,023 samples, 0.04%) + + + +list_del (766 samples, 0.03%) + + + +rte_arch_bswap16 (512 samples, 0.02%) + + + +rte_pktmbuf_append (265 samples, 0.01%) + + + +__dp_vs_service_get (857 samples, 0.03%) + + + +inet_addr_equal (600 samples, 0.02%) + + + +__rte_raw_cksum_reduce (569 samples, 0.02%) + + + +__lll_unlock_wake_private (542 samples, 0.02%) + + + +__clock_gettime (755 samples, 0.03%) + + + +rte_jhash_3words (1,408 samples, 0.05%) + + + +rte_atomic32_inc (369 samples, 0.01%) + + + +dp_vs_conn_set_timeout (640 samples, 0.02%) + + + +list_add (2,490 samples, 0.09%) + + + +tcp_out_adjust_mss (308 samples, 0.01%) + + + +dp_vs_conn_unbind_dest (760 samples, 0.03%) + + + +tcp_hdr (646 samples, 0.02%) + + + +rte_pktmbuf_prepend (258 samples, 0.01%) + + + +dp_vs_conn_attach_timer (2,927 samples, 0.11%) + + + +sa_pool_destroy (3,452 samples, 0.13%) + + + +inet_addr_equal (650 samples, 0.02%) + + + +inet_addr_equal (407 samples, 0.02%) + + + +__lll_lock_wait_private (243 samples, 0.01%) + + + +rte_atomic32_read (336 samples, 0.01%) + + + +inet_addr_equal (2,126 samples, 0.08%) + + + +ifa_put (301 samples, 0.01%) + + + +dp_vs_conn_put (8,931 samples, 0.34%) + + + +rte_atomic32_inc (290 samples, 0.01%) + + + +rte_atomic32_dec (1,493 samples, 0.06%) + + + +route_out_local_lookup (898 samples, 0.03%) + + + +rte_pktmbuf_adj (628 samples, 0.02%) + + + +inet_addr_ifa_get (11,073 samples, 0.42%) + + + +dpvs_timer_update (6,110 samples, 0.23%) + + + +ipv4_output_fin (4,248 samples, 0.16%) + + + +rte_atomic32_dec (447 samples, 0.02%) + + + +netif_xmit (3,539 samples, 0.13%) + + + +neigh_key_cmp (718 samples, 0.03%) + + + +rte_atomic32_dec (774 samples, 0.03%) + + + +tcp_fnat_out_handler (10,775 samples, 0.41%) + + + +_raw_spin_unlock_irqrestore (576 samples, 0.02%) + + + +rte_atomic32_dec_and_test (278 samples, 0.01%) + + + +tcp_hdr (282 samples, 0.01%) + + + +get_level_ticks (391 samples, 0.01%) + + + +dp_vs_fast_outxmit_fnat (19,462 samples, 0.74%) + + + +inet_addr_ifa_get (11,444 samples, 0.43%) + + + +rte_ipv4_phdr_cksum (2,334 samples, 0.09%) + + + +rte_get_timer_cycles (5,059 samples, 0.19%) + + + +timer_expire (37,184 samples, 1.41%) + + + +tcp_in_adjust_seq (249 samples, 0.01%) + + + +sa_release (18,394 samples, 0.70%) + + + +rte_arch_bswap16 (689 samples, 0.03%) + + + +rte_lcore_id (362 samples, 0.01%) + + + +rte_atomic32_read (267 samples, 0.01%) + + + +blklst_hashkey (1,608 samples, 0.06%) + + + +xmit_inbound (49,087 samples, 1.86%) +x.. + + +netif_hard_xmit (2,937 samples, 0.11%) + + + +__dp_vs_xmit_fnat4 (36,442 samples, 1.38%) + + + +__lll_lock_wait_private (249 samples, 0.01%) + + + +rte_lcore_id (366 samples, 0.01%) + + + +system_call_fastpath (888 samples, 0.03%) + + + +rte_is_zero_ether_addr (543 samples, 0.02%) + + + +tcp_send_csum (1,538 samples, 0.06%) + + + +rte_is_zero_ether_addr (472 samples, 0.02%) + + + +dp_vs_rr_schedule (931 samples, 0.04%) + + + +route4_put (379 samples, 0.01%) + + + +dpvs_time_rand_delay (325 samples, 0.01%) + + + +dpvs_timer_sched (2,525 samples, 0.10%) + + + +rte_atomic32_inc (1,716 samples, 0.07%) + + + +list_add_tail (672 samples, 0.03%) + + + +rte_arch_bswap32 (909 samples, 0.03%) + + + +pkt_type_get (3,551 samples, 0.13%) + + + +ipv4_output_fin2 (4,349 samples, 0.17%) + + + +dev_get_idev (1,439 samples, 0.05%) + + + +neigh_hashkey (662 samples, 0.03%) + + + +netif_hard_xmit (1,042 samples, 0.04%) + + + +rte_atomic32_dec_and_test (315 samples, 0.01%) + + + +eth_addr_equal (965 samples, 0.04%) + + + +msg_slave_process (5,829 samples, 0.22%) + + + +system_call_fastpath (815 samples, 0.03%) + + + +__list_add (357 samples, 0.01%) + + + +rte_arch_bswap16 (610 samples, 0.02%) + + + +[unknown] (1,664 samples, 0.06%) + + + +mbuf_header_pointer (449 samples, 0.02%) + + + +netif_port_get (279 samples, 0.01%) + + + +rte_jhash_3words (1,413 samples, 0.05%) + + + +tcp_in_adjust_seq (368 samples, 0.01%) + + + +rte_ipv4_phdr_cksum (3,378 samples, 0.13%) + + + +dp_vs_rr_schedule (949 samples, 0.04%) + + + +dp_vs_conn_put (8,998 samples, 0.34%) + + + +tcp_send_csum (1,552 samples, 0.06%) + + + +qsch_sched_all (277 samples, 0.01%) + + + +dp_vs_fast_outxmit_fnat (19,453 samples, 0.74%) + + + +__rte_raw_cksum (886 samples, 0.03%) + + + +__dpvs_timer_sched (2,285 samples, 0.09%) + + + +rte_atomic32_inc (339 samples, 0.01%) + + + +rte_pktmbuf_prepend (398 samples, 0.02%) + + + +rte_raw_cksum (1,730 samples, 0.07%) + + + +rte_lcore_id (351 samples, 0.01%) + + + +__rte_raw_cksum (616 samples, 0.02%) + + + +rte_lcore_id (386 samples, 0.01%) + + + +system_call_fastpath (1,209 samples, 0.05%) + + + +get_level_ticks (239 samples, 0.01%) + + + +rte_raw_cksum (1,729 samples, 0.07%) + + + +rte_get_tsc_cycles (4,065 samples, 0.15%) + + + +rte_rdtsc (4,785 samples, 0.18%) + + + +inet_addr_ifa_put (313 samples, 0.01%) + + + +rte_atomic32_dec (878 samples, 0.03%) + + + +rte_arch_bswap16 (255 samples, 0.01%) + + + +get_level_ticks (432 samples, 0.02%) + + + +dp_vs_conn_alloc (1,347 samples, 0.05%) + + + +dp_vs_conn_resend_packets (940 samples, 0.04%) + + + +INET_HOOK (192,260 samples, 7.30%) +INET_HOOK + + +dev_get_idev (1,560 samples, 0.06%) + + + +sa_release (18,356 samples, 0.70%) + + + +__dp_vs_out_xmit_fnat4 (20,734 samples, 0.79%) + + + +ipv4_rcv (197,490 samples, 7.50%) +ipv4_rcv + + +dp_vs_conn_refresh_timer (6,605 samples, 0.25%) + + + +futex_wait_setup (530 samples, 0.02%) + + + +__rte_raw_cksum_reduce (399 samples, 0.02%) + + + +rte_arch_bswap32 (285 samples, 0.01%) + + + +[unknown] (1,833 samples, 0.07%) + + + +rte_atomic32_inc (314 samples, 0.01%) + + + +list_del (1,628 samples, 0.06%) + + + +__dpvs_timer_sched (4,045 samples, 0.15%) + + + +timeval_to_ticks (658 samples, 0.02%) + + + +put_laddr (276 samples, 0.01%) + + + +this_lcore_sched (247 samples, 0.01%) + + + +ip4_hdr (295 samples, 0.01%) + + + +ip4_hdrlen (415 samples, 0.02%) + + + +ip4_hdr (425 samples, 0.02%) + + + +dp_vs_schedule (45,142 samples, 1.71%) + + + +ip4_hdrlen (522 samples, 0.02%) + + + +dp_vs_laddr_unbind (19,276 samples, 0.73%) + + + +mbuf_header_pointer (471 samples, 0.02%) + + + +tcp_out_adjust_seq (1,401 samples, 0.05%) + + + +inet_addr_equal (2,148 samples, 0.08%) + + + +tcp_out_save_seq (745 samples, 0.03%) + + + +dp_vs_whtlst_allow (4,162 samples, 0.16%) + + + +[unknown] (906 samples, 0.03%) + + + +rte_is_zero_ether_addr (485 samples, 0.02%) + + + +rte_atomic32_dec_and_test (3,300 samples, 0.13%) + + + +dp_vs_dest_is_avail (431 samples, 0.02%) + + + +tcp_send_csum (4,579 samples, 0.17%) + + + +__list_del_entry (260 samples, 0.01%) + + + +list_move_tail (359 samples, 0.01%) + + + +rte_ipv4_phdr_cksum (984 samples, 0.04%) + + + +neigh_entry_state_trans (622 samples, 0.02%) + + + +this_lcore_sched (244 samples, 0.01%) + + + +ip4_hdr (330 samples, 0.01%) + + + +rte_lcore_id (512 samples, 0.02%) + + + +__dp_vs_service_get (845 samples, 0.03%) + + + +netif_port_get (2,405 samples, 0.09%) + + + +dp_vs_conn_alloc (1,383 samples, 0.05%) + + + +dp_vs_conn_set_timeout (690 samples, 0.03%) + + + +netif_hard_xmit (2,832 samples, 0.11%) + + + +rte_atomic32_inc (368 samples, 0.01%) + + + +tcp_in_remove_ts (557 samples, 0.02%) + + + +list_add (2,398 samples, 0.09%) + + + +inet_addr_ifa_put (317 samples, 0.01%) + + + +sa_pool_hash (399 samples, 0.02%) + + + +rte_atomic32_read (298 samples, 0.01%) + + + +__memset_sse2 (596 samples, 0.02%) + + + +timeval_to_ticks (697 samples, 0.03%) + + + +dp_vs_fill_iphdr (2,083 samples, 0.08%) + + + +dp_vs_save_xmit_info (1,664 samples, 0.06%) + + + +rte_jhash_3words (1,323 samples, 0.05%) + + + +dp_vs_laddr_bind (26,326 samples, 1.00%) + + + +tcp_out_adjust_seq (1,437 samples, 0.05%) + + + +ipv4_output_fin2 (4,378 samples, 0.17%) + + + +OPENSSL_cleanse (265 samples, 0.01%) + + + +dp_vs_conn_refresh_timer (7,392 samples, 0.28%) + + + +rte_pktmbuf_adj (621 samples, 0.02%) + + + +get_level_ticks (393 samples, 0.01%) + + + +do_futex (762 samples, 0.03%) + + + +__rte_raw_cksum (928 samples, 0.04%) + + + +netif_xmit (3,654 samples, 0.14%) + + + +rte_raw_cksum (1,721 samples, 0.07%) + + + +rte_pktmbuf_append (318 samples, 0.01%) + + + +rte_atomic32_inc (1,479 samples, 0.06%) + + + +list_del (1,426 samples, 0.05%) + + + +rte_is_zero_ether_addr (493 samples, 0.02%) + + + +INET_HOOK (4,770 samples, 0.18%) + + + +dp_vs_save_outxmit_info (1,321 samples, 0.05%) + + + +__list_del (562 samples, 0.02%) + + + +rte_raw_cksum (1,146 samples, 0.04%) + + + +rte_atomic32_dec_and_test (288 samples, 0.01%) + + + +ifa_lookup (7,251 samples, 0.28%) + + + +eth_addr_equal (1,006 samples, 0.04%) + + + +rte_atomic32_inc (1,346 samples, 0.05%) + + + +route_out_net_lookup (662 samples, 0.03%) + + + +ip4_hdrlen (542 samples, 0.02%) + + + +netif_deliver_mbuf (206,739 samples, 7.85%) +netif_deliv.. + + +tcp_send_csum (3,083 samples, 0.12%) + + + +dp_vs_conn_hashkey (5,128 samples, 0.19%) + + + +lcore_job_recv_fwd (238,961 samples, 9.07%) +lcore_job_rec.. + + +ipv4_output_fin2 (4,274 samples, 0.16%) + + + +ifa_put (307 samples, 0.01%) + + + +netif_port_get (266 samples, 0.01%) + + + +ip4_is_frag (322 samples, 0.01%) + + + +rte_eth_rx_burst (236 samples, 0.01%) + + + +rte_pktmbuf_append (241 samples, 0.01%) + + + +tick_nohz_idle_exit (521 samples, 0.02%) + + + +ip4_hdr (328 samples, 0.01%) + + + +rte_atomic32_inc (396 samples, 0.02%) + + + +sa4_fetch (17,545 samples, 0.67%) + + + +rte_ipv4_phdr_cksum (2,312 samples, 0.09%) + + + +dp_vs_fast_xmit_fnat (19,244 samples, 0.73%) + + + +list_move_tail (535 samples, 0.02%) + + + +tcp_in_init_seq (1,921 samples, 0.07%) + + + +__dp_vs_pre_routing (7,178 samples, 0.27%) + + + +__memset_sse2 (596 samples, 0.02%) + + + +rte_atomic32_inc (419 samples, 0.02%) + + + +rte_ipv4_phdr_cksum (2,153 samples, 0.08%) + + + +rte_ipv4_phdr_cksum (1,105 samples, 0.04%) + + + +tcp_in_add_toa (2,095 samples, 0.08%) + + + +blklst_hashkey (1,584 samples, 0.06%) + + + +sa_pool_fetch (2,339 samples, 0.09%) + + + +mbuf_header_pointer (488 samples, 0.02%) + + + +tcp_send_csum (3,096 samples, 0.12%) + + + +dp_vs_conn_refresh_timer (6,982 samples, 0.27%) + + + +dp_vs_synproxy_syn_rcv (3,482 samples, 0.13%) + + + +dp_vs_dest_put (327 samples, 0.01%) + + + +tcp_in_add_toa (7,911 samples, 0.30%) + + + +inet_addr_equal (2,439 samples, 0.09%) + + + +rte_atomic32_dec_and_test (307 samples, 0.01%) + + + +rte_ether_addr_copy (415 samples, 0.02%) + + + +ifa_put (298 samples, 0.01%) + + + +rte_atomic32_inc (2,848 samples, 0.11%) + + + +rte_arch_bswap16 (527 samples, 0.02%) + + + +tcp_in_remove_ts (556 samples, 0.02%) + + + +rte_get_timer_cycles (4,623 samples, 0.18%) + + + +dp_vs_conn_expire (36,298 samples, 1.38%) + + + +dp_vs_stats_in (1,264 samples, 0.05%) + + + + diff --git a/test/tc/ipset_cls.sh b/test/tc/ipset_cls.sh new file mode 100755 index 000000000..5dbb28535 --- /dev/null +++ b/test/tc/ipset_cls.sh @@ -0,0 +1,375 @@ +#!/bin/env bash + +###### +# Notes: +# 1. restart dpvs first and then run the script +# 2. two linux servers are needed at least, one for dpvs, and one for both rs and client. +# 3. this is an interactive script, you should check the result of each set type +# according to the commented command at each pause. +###### + +iface=dpdk0 +dpip=../../bin/dpip +ipvsadm=../../bin/ipvsadm + +trap cleanup SIGINT SIGTERM EXIT + +function init() +{ + $dpip addr add 192.168.88.12/24 dev $iface + $dpip addr add 192.168.88.112/24 dev $iface + $dpip addr add 2001::112/64 dev $iface + $dpip addr add 2001::1:112/64 dev $iface + $dpip link set $iface tc-ingress on + $dpip link set $iface tc-egress on + + $dpip addr add 192.168.88.1/32 dev $iface + $ipvsadm -At 192.168.88.1:80 -s wrr + $ipvsadm -at 192.168.88.1:80 -r 192.168.88.15:80 -w 100 -b + $ipvsadm -at 192.168.88.1:80 -r 192.168.88.115:80 -w 100 -b + $ipvsadm -Pt 192.168.88.1:80 -z 192.168.88.241 -F $iface + $ipvsadm -At 192.168.88.1:8080 -s wrr + $ipvsadm -at 192.168.88.1:8080 -r 192.168.88.15:80 -w 100 -b + $ipvsadm -at 192.168.88.1:8080 -r 192.168.88.115:80 -w 100 -b + $ipvsadm -Pt 192.168.88.1:8080 -z 192.168.88.242 -F $iface + + $dpip addr add 2001::1 dev $iface + $ipvsadm -At [2001::1]:80 -s wlc + $ipvsadm -at [2001::1]:80 -r 192.168.88.15:80 -b + $ipvsadm -at [2001::1]:80 -r 192.168.88.115:80 -b + $ipvsadm -Pt [2001::1]:80 -z 192.168.88.243 -F $iface + $ipvsadm -At [2001::1]:8080 -s wlc + $ipvsadm -at [2001::1]:8080 -r 192.168.88.15:80 -b + $ipvsadm -at [2001::1]:8080 -r 192.168.88.115:80 -b + $ipvsadm -Pt [2001::1]:8080 -z 192.168.88.244 -F $iface +} + +function cleanup() +{ + $dpip link set $iface tc-ingress off + $dpip link set $iface tc-egress off + $dpip addr del 192.168.88.12/24 dev $iface + $dpip addr del 192.168.88.112/24 dev $iface + $dpip addr del 2001::112/64 dev $iface + $dpip addr del 2001::1:112/64 dev $iface + + $dpip addr del 192.168.88.1/32 dev $iface + $ipvsadm -Qt 192.168.88.1:80 -z 192.168.88.241 -F $iface + $ipvsadm -Dt 192.168.88.1:80 + $ipvsadm -Qt 192.168.88.1:8080 -z 192.168.88.242 -F $iface + $ipvsadm -Dt 192.168.88.1:8080 + + $dpip addr del 2001::1 dev $iface + $ipvsadm -Qt [2001::1]:80 -z 192.168.88.243 -F $iface + $ipvsadm -Dt [2001::1]:80 + $ipvsadm -Qt [2001::1]:8080 -z 192.168.88.244 -F $iface + $ipvsadm -Dt [2001::1]:8080 +} + +function next() +{ + while true + do + read -p "continue next test? (yes|no|exit) -- " ans + if [ _$ans == _yes ]; then + break + elif [ _$ans == _exit ]; then + exit + else + sleep 1 + fi + done +} + +function bitmap_ip() +{ + $dpip ipset create foo bitmap:ip range 192.168.0.0/16 + $dpip qsch add dev $iface ingress pfifo_fast + $dpip cls add dev $iface qsch ingress handle 1:1 ipset match foo,src target drop + $dpip cls add dev $iface qsch ingress handle 1:2 ipset match foo,dst target drop + $dpip ipset add foo 192.168.88.15 # client + $dpip ipset add foo 192.168.88.12 # dpvs + # ping -c 3 192.168.88.12 -m 1 -I 192.168.88.15 # fail + # ping -c 3 192.168.88.112 -m 1 -I 192.168.88.15 # fail + # ping -c 3 192.168.88.12 -m 1 -I 192.168.88.115 # fail + # ping -c 3 192.168.88.112 -m 1 -I 192.168.88.115 # ok +} + +function bitmap_ip_clean() +{ + $dpip cls del dev $iface qsch ingress handle 1:1 + $dpip cls del dev $iface qsch ingress handle 1:2 + $dpip qsch del dev $iface ingress + $dpip ipset destroy foo +} + +function bitmap_port() +{ + $dpip ipset create foo bitmap:port range 0-65535 + $dpip qsch add dev $iface ingress pfifo limit 1024 + $dpip cls add dev $iface qsch ingress handle 1:1 ipset match foo,dst target drop + # curl 192.168.88.1:80 # ok + $dpip ipset add foo tcp:80 + # curl 192.168.88.1:80 # fail + # curl 192.168.88.1:8080 # ok + # curl -g [2001::1]:80 # ok +} + +function bitmap_port_clean() +{ + $dpip cls del dev $iface qsch ingress handle 1:1 + $dpip qsch del dev $iface ingress + $dpip ipset destroy foo +} + +function bitmap_ip_mac() +{ + $dpip ipset create foo bitmap:ip,mac range 192.168.88.0/24 + $dpip qsch add dev $iface ingress pfifo limit 1024 + $dpip cls add dev $iface qsch ingress handle 1:1 ipset match foo,src target drop + $dpip ipset add foo 192.168.88.15,a0:36:9f:9d:5d:10 + # ping -c 3 -m 1 192.168.88.112 -I 192.168.88.15 # fail + # ping -c 3 -m 1 192.168.88.112 -I 192.168.88.115 # ok +} + +function bitmap_ip_mac_clean() +{ + $dpip cls del dev $iface qsch ingress handle 1:1 + $dpip qsch del dev $iface ingress + $dpip ipset destroy foo +} + +function hash_ip() +{ + $dpip ipset -6 create bar hash:ip + $dpip qsch add dev $iface ingress pfifo limit 4096 + $dpip cls add dev $iface qsch ingress handle 1:1 pkttype ipv6 prio 100 ipset match bar,src target drop + $dpip cls add dev $iface qsch ingress handle 1:2 pkttype ipv6 prio 101 ipset match bar,dst target drop + $dpip ipset add bar 2001::15 # client + $dpip ipset add bar 2001::1:112 # dpvs + # ping6 -c 3 2001::112 -m 1 -I 2001::15 # fail + # ping6 -c 3 2001::112 -m 1 -I 2001::1:15 # ok + # ping6 -c 3 2001::1:112 -m 1 -I 2001::1:15 # fail + # ping6 -c 3 2001::1:112 -m 1 -I 2001::15 # fail +} + +function hash_ip_clean() +{ + $dpip cls del dev $iface qsch ingress handle 1:1 + $dpip cls del dev $iface qsch ingress handle 1:2 + $dpip qsch del dev $iface ingress + $dpip ipset destroy bar +} + +function hash_ip_port() +{ + $dpip ipset create foo hash:ip,port + $dpip ipset -6 create bar hash:ip,port + $dpip qsch add dev $iface ingress pfifo_fast limit 1024 + $dpip cls add dev $iface qsch ingress handle 1:1 pkttype ipv4 prio 100 ipset match foo,dst target drop + $dpip cls add dev $iface qsch ingress handle 1:2 pkttype ipv6 ipset prio 101 match bar,dst target drop + $dpip ipset add foo 192.168.88.1,tcp:80 + $dpip ipset add bar 2001::1,tcp:8080 + # curl -g 192.168.88.1:80 # fail + # curl -g 192.168.88.1:8080 # ok + # curl -g [2001::1]:80 # ok + # curl -g [2001::1]:8080 # fail +} + +function hash_ip_port_clean() +{ + $dpip cls del dev $iface qsch ingress handle 1:1 + $dpip cls del dev $iface qsch ingress handle 1:2 + $dpip qsch del dev $iface ingress + $dpip ipset destroy foo + $dpip ipset destroy bar +} + +function hash_ip_port_ip() +{ + $dpip ipset create foo hash:ip,port,ip + $dpip ipset -6 create bar hash:ip,port,ip + $dpip qsch add dev $iface ingress pfifo_fast limit 1024 + $dpip cls add dev $iface qsch ingress handle 1:1 pkttype ipv4 prio 100 ipset match foo,dst target drop + $dpip cls add dev $iface qsch ingress handle 1:2 pkttype ipv6 ipset prio 101 match bar,dst target drop + $dpip ipset add foo 192.168.88.15,tcp:80,192.168.88.1 + $dpip ipset add bar 2001::15,tcp:8080,2001::1 + # curl -g 192.168.88.1:80 # from 192.168.88.15, fail + # curl -g 192.168.88.1:80 # from 192.168.88.115, ok + # curl -g 192.168.88.1:8080 # ok + # curl -g [2001::1]:80 # ok + # curl -g [2001::1]:8080 # from 2001::15, fail + # curl -g [2001::1]:8080 # from 2001::1:15, ok +} + +function hash_ip_port_ip_clean() +{ + $dpip cls del dev $iface qsch ingress handle 1:1 + $dpip cls del dev $iface qsch ingress handle 1:2 + $dpip qsch del dev $iface ingress + $dpip ipset destroy foo + $dpip ipset destroy bar +} + +function hash_ip_port_net() +{ + $dpip ipset create foo hash:ip,port,net + $dpip ipset -6 create bar hash:ip,port,net + $dpip qsch add dev $iface ingress pfifo_fast limit 1024 + $dpip cls add dev $iface qsch ingress handle 1:1 pkttype ipv4 prio 100 ipset match foo,dst target drop + $dpip cls add dev $iface qsch ingress handle 1:2 pkttype ipv6 ipset prio 101 match bar,dst target drop + $dpip ipset add foo 192.168.88.1,tcp:80,192.168.88.0/26 # note: net always corespond to mbuf source! + $dpip ipset add bar 2001::1,tcp:8080,2001::/120 + # curl -g 192.168.88.1:80 # from 192.168.88.15, fail + # curl -g 192.168.88.1:80 # from 192.168.88.115, ok + # curl -g 192.168.88.1:8080 # ok + # curl -g [2001::1]:80 # ok + # curl -g [2001::1]:8080 # from 2001::15, fail + # curl -g [2001::1]:8080 # from 2001::1:15, ok +} + +function hash_ip_port_net_clean() +{ + $dpip cls del dev $iface qsch ingress handle 1:1 + $dpip cls del dev $iface qsch ingress handle 1:2 + $dpip qsch del dev $iface ingress + $dpip ipset destroy foo + $dpip ipset destroy bar +} + +function hash_net() +{ + $dpip ipset create foo hash:net + $dpip qsch add dev $iface ingress pfifo limit 4096 + $dpip cls add dev $iface qsch ingress handle 1:1 pkttype ipv4 prio 100 ipset match foo,src target drop + $dpip cls add dev $iface qsch ingress handle 1:2 pkttype ipv4 prio 101 ipset match foo,dst target drop + $dpip ipset add foo 192.168.88.0/26 + # ping -c 3 192.168.88.12 -m 1 -I 192.168.88.15 # fail + # ping -c 3 192.168.88.12 -m 1 -I 192.168.88.115 # fail + # ping -c 3 192.168.88.112 -m 1 -I 192.168.88.15 # fail + # ping -c 3 192.168.88.112 -m 1 -I 192.168.88.115 # ok +} + +function hash_net_clean() +{ + $dpip cls del dev $iface qsch ingress handle 1:1 + $dpip cls del dev $iface qsch ingress handle 1:2 + $dpip qsch del dev $iface ingress + $dpip ipset destroy foo +} + +function hash_net_port() +{ + $dpip ipset -6 create bar hash:net,port + $dpip qsch add dev $iface ingress pfifo limit 4096 + $dpip cls add dev $iface qsch ingress handle 1:1 pkttype ipv6 ipset match bar,dst target drop + $dpip ipset add bar 2001::/120,tcp:80 + # curl -g [2001::1]:8080 # ok + # curl -g [2001::1]:80 # fail +} + +function hash_net_port_clean() +{ + $dpip cls del dev $iface qsch ingress handle 1:1 + $dpip qsch del dev $iface ingress + $dpip ipset destroy bar +} + +function hash_net_port_iface() +{ + $dpip ipset create foo hash:net,port,iface + $dpip qsch add dev $iface ingress pfifo_fast limit 1024 + $dpip cls add dev $iface qsch ingress handle 1:1 pkttype ipv4 ipset match foo,dst target drop + $dpip ipset add foo 192.168.88.1,tcp:80,dpdk0 + # curl 192.168.88.1:80 # fail + # curl 192.168.88.1:8080 # ok +} + +function hash_net_port_iface_clean() +{ + $dpip cls del dev $iface qsch ingress handle 1:1 + $dpip qsch del dev $iface ingress + $dpip ipset destroy foo +} + +function hash_net_port_net() +{ + $dpip ipset create foo hash:net,port,net + $dpip qsch add dev $iface ingress pfifo_fast limit 1024 + $dpip cls add dev $iface qsch ingress handle 1:1 pkttype ipv4 ipset match foo,dst target drop + $dpip ipset add foo 192.168.88.0/26,tcp:8080,192.168.88.1/32 + # curl -g 192.168.88.1:80 # ok + # curl -g 192.168.88.1:8080 # from 192.168.88.15, fail + # curl -g 192.168.88.1:8080 # from 192.168.88.115, ok +} + +function hash_net_port_net_clean() +{ + $dpip cls del dev $iface qsch ingress handle 1:1 + $dpip qsch del dev $iface ingress + $dpip ipset destroy foo +} + +################################################ + +init + +echo "---------> start tc cls ipset test <---------" +next + +echo "bitmap:ip" +bitmap_ip +next +bitmap_ip_clean + +echo "bitmap:port" +bitmap_port +next +bitmap_port_clean + +echo "bitmap:ip,mac" +bitmap_ip_mac +next +bitmap_ip_mac_clean + +echo "hash:ip" +hash_ip +next +hash_ip_clean + +echo "hash:ip,port" +hash_ip_port +next +hash_ip_port_clean + +echo "hash:ip,port,ip" +hash_ip_port_ip +next +hash_ip_port_ip_clean + +echo "hash:ip,port,net" +hash_ip_port_net +next +hash_ip_port_net_clean + +echo "hash:net" +hash_net +next +hash_net_clean + +echo "hash:net,port" +hash_net_port +next +hash_net_port_clean + +echo "hash:net,port,net" +hash_net_port_net +next +hash_net_port_net_clean + +echo "hash:net,port,iface" +hash_net_port_iface +next +hash_net_port_iface_clean + +echo "---------> end tc cls ipset test <---------" diff --git a/tools/dpip/Makefile b/tools/dpip/Makefile index 8459f60a3..e1bbe21e4 100644 --- a/tools/dpip/Makefile +++ b/tools/dpip/Makefile @@ -23,18 +23,25 @@ TARGET = build/dpip CFLAGS = -g -O0 CFLAGS += -Wall -Werror -Wstrict-prototypes -Wmissing-prototypes +CFLAGS += -Wno-address-of-packed-member CFLAGS += -I ../../include CFLAGS += -I ../keepalived/keepalived/include +ifneq ("$(wildcard ../../src/VERSION)","") +VERSION_STRING := $(shell ../../src/VERSION) +else +VERSION_STRING := $(shell git describe --tags --always) +endif + LIBS = -lnuma -DEFS = -D DPVS_MAX_LCORE=64 +DEFS = -D DPVS_MAX_LCORE=64 -D DPIP_VERSION=\"$(VERSION_STRING)\" CFLAGS += $(DEFS) -OBJS = dpip.o utils.o route.o addr.o neigh.o link.o vlan.o \ - qsch.o cls.o tunnel.o ipset.o ipv6.o iftraf.o eal_mem.o ../../src/common.o \ - ../keepalived/keepalived/check/sockopt.o +OBJS = ipset.o dpip.o utils.o route.o addr.o neigh.o link.o vlan.o \ + qsch.o cls.o tunnel.o ipset.o ipv6.o iftraf.o eal_mem.o flow.o \ + ../../src/common.o ../keepalived/keepalived/check/sockopt.o all: $(TARGET) diff --git a/tools/dpip/cls.c b/tools/dpip/cls.c index 718d3d8cc..cab722a68 100644 --- a/tools/dpip/cls.c +++ b/tools/dpip/cls.c @@ -40,8 +40,8 @@ static void cls_help(void) "\n" "Parameters:\n" " PKTTYPE := { ipv4 | ipv6 | vlan }\n" - " CLS_TYPE := { match }\n" - " COPTIONS := { MATCH_OPTS }\n" + " CLS_TYPE := { match | ipset }\n" + " COPTIONS := { MATCH_OPTS | SET_OPTS }\n" " PRIO := NUMBER\n" "\n" "Match options:\n" @@ -55,6 +55,10 @@ static void cls_help(void) " RANGE := ADDR[-ADDR][:PORT[-PORT]]\n" " IIF := \"iif=IFNAME\"\n" " OIF := \"oif=IFNAME\"\n" + "Set options:\n" + " SET_OPTS := match IPSET { target { CHILD_QSCH | drop } }\n" + " IPSET := SETNAME{,TARGET }\n" + " TARGET := \"{ src | dst }\"\n" "\n" "Examples:\n" " dpip cls show dev dpdk0 qsch 1:\n" @@ -63,6 +67,7 @@ static void cls_help(void) " dpip cls add dev dpdk0 qsch 1: handle 1:10 \\\n" " match pattern 'tcp,from=192.168.0.1:1-1024,oif=eth1'\\\n" " target 1:1\n" + " dpip cls add dev dpdk0 qsch root ipset match denyset,src target drop\n" " dpip cls del dev dpdk0 qsch 1: handle 1:10\n" ); } @@ -93,11 +98,48 @@ static void cls_dump_param(const char *ifname, const union tc_param *param, printf("%s target %s", dump_match(m->proto, &m->match, patt, sizeof(patt)), result); + } else if (strcmp(cls->kind, "ipset") == 0) { + char result[32], target[16]; + const struct tc_cls_ipset_copt *set = &cls->copt.set; + + if (set->result.drop) + snprintf(result, sizeof(result), "%s", "drop"); + else + snprintf(result, sizeof(result), "%s", + tc_handle_itoa(set->result.sch_id, target, sizeof(target))); + printf("ipset match %s,%s target %s", set->setname, + set->dst_match ? "dst" : "src", result); } printf("\n"); } +static inline int parse_cls_ipset(const char *args, char *setname, bool *dst_match) +{ + size_t len; + char *dir; + + *dst_match = false; // default false + + dir = strchr(args, ','); + if (dir) { + *dir++ = '\0'; + if (strncmp(dir, "src", 3) == 0) + *dst_match = false; + else if (strncmp(dir, "dst", 3) == 0) + *dst_match = true; + else + return EDPVS_INVAL; + } + + len = strlen(args); + if (!len || len >= IPSET_MAXNAMELEN) + return EDPVS_INVAL; + strncpy(setname, args, len); + + return EDPVS_OK; +} + static int cls_parse(struct dpip_obj *obj, struct dpip_conf *cf) { struct tc_conf *conf = obj->param; @@ -136,8 +178,10 @@ static int cls_parse(struct dpip_obj *obj, struct dpip_conf *cf) } else if (strcmp(CURRARG(cf), "prio") == 0) { NEXTARG_CHECK(cf, CURRARG(cf)); param->priority = atoi(CURRARG(cf)); - } else if (strcmp(CURRARG(cf), "match") == 0) { + } else if ((strcmp(CURRARG(cf), "match") == 0) && (!param->kind[0])) { snprintf(param->kind, TCNAMESIZ, "%s", "match"); + } else if (strcmp(CURRARG(cf), "ipset") == 0) { + snprintf(param->kind, TCNAMESIZ, "%s", "ipset"); } else { /* kind must be set adead then COPTIONS */ if (strcmp(param->kind, "match") == 0) { struct tc_cls_match_copt *m = ¶m->copt.match; @@ -156,6 +200,21 @@ static int cls_parse(struct dpip_obj *obj, struct dpip_conf *cf) else m->result.sch_id = tc_handle_atoi(CURRARG(cf)); } + } else if (strcmp(param->kind, "ipset") == 0) { + struct tc_cls_ipset_copt *set = ¶m->copt.set; + if (strcmp(CURRARG(cf), "match") == 0) { + NEXTARG_CHECK(cf, CURRARG(cf)); + if (parse_cls_ipset(CURRARG(cf), set->setname, &set->dst_match) != EDPVS_OK) { + fprintf(stderr, "invalid ipset match: %s\n", CURRARG(cf)); + return EDPVS_INVAL; + } + } else if (strcmp(CURRARG(cf), "target") == 0) { + NEXTARG_CHECK(cf, CURRARG(cf)); + if (strcmp(CURRARG(cf), "drop") == 0) + set->result.drop = true; + else + set->result.sch_id = tc_handle_atoi(CURRARG(cf)); + } } else { fprintf(stderr, "invalid/miss cls type: '%s'\n", param->kind); return EDPVS_INVAL; @@ -192,6 +251,8 @@ static int cls_check(const struct dpip_obj *obj, dpip_cmd_t cmd) fprintf(stderr, "invalid match pattern.\n"); return EDPVS_INVAL; } + } else if (strcmp(param->kind, "ipset") == 0) { + // TODO: check the existence of ipset? } else { fprintf(stderr, "invalid cls kind.\n"); return EDPVS_INVAL; @@ -212,6 +273,8 @@ static int cls_check(const struct dpip_obj *obj, dpip_cmd_t cmd) fprintf(stderr, "invalid match pattern.\n"); return EDPVS_INVAL; } + } else if (strcmp(param->kind, "ipset") == 0) { + // TODO: check the existence of ipset? } else { fprintf(stderr, "invalid cls kind.\n"); return EDPVS_INVAL; diff --git a/tools/dpip/dpip.c b/tools/dpip/dpip.c index bd16fee3d..754ec2aa4 100644 --- a/tools/dpip/dpip.c +++ b/tools/dpip/dpip.c @@ -33,9 +33,10 @@ static void usage(void) "Usage:\n" " "DPIP_NAME" [OPTIONS] OBJECT { COMMAND | help }\n" "Parameters:\n" - " OBJECT := { link | addr | route | neigh | vlan | tunnel |\n" - " qsch | cls | ipv6 | iftraf | eal-mem }\n" - " COMMAND := { add | del | change | replace | show | flush | enable | disable }\n" + " OBJECT := { link | addr | route | neigh | vlan | tunnel | qsch | cls |\n" + " ipv6 | iftraf | eal-mem | ipset | flow }\n" + " COMMAND := { create | destroy | add | del | show (list) | set (change) |\n" + " replace | flush | test | enable | disable }\n" "Options:\n" " -v, --verbose\n" " -h, --help\n" @@ -44,6 +45,7 @@ static void usage(void) " -6, --family=inet6\n" " -s, --stats, statistics\n" " -C, --color\n" + " -F, --force\n" ); } @@ -62,6 +64,7 @@ static struct dpip_obj *dpip_obj_get(const char *name) static int parse_args(int argc, char *argv[], struct dpip_conf *conf) { int opt; + bool show_usage = false; struct dpip_obj *obj; struct option opts[] = { {"verbose", no_argument, NULL, 'v'}, @@ -73,6 +76,7 @@ static int parse_args(int argc, char *argv[], struct dpip_conf *conf) {"color", no_argument, NULL, 'C'}, {"interval", required_argument, NULL, 'i'}, {"count", required_argument, NULL, 'c'}, + {"force", no_argument, NULL, 'F'}, {NULL, 0, NULL, 0}, }; @@ -84,14 +88,14 @@ static int parse_args(int argc, char *argv[], struct dpip_conf *conf) exit(0); } - while ((opt = getopt_long(argc, argv, "vhV46f:si:c:C", opts, NULL)) != -1) { + while ((opt = getopt_long(argc, argv, "vhV46f:si:c:CDF", opts, NULL)) != -1) { switch (opt) { case 'v': conf->verbose = 1; break; case 'h': - usage(); - exit(0); + show_usage = true; + break; case 'V': printf(DPIP_NAME"-"DPIP_VERSION"\n"); exit(0); @@ -121,7 +125,10 @@ static int parse_args(int argc, char *argv[], struct dpip_conf *conf) conf->count = atoi(optarg); break; case 'C': - conf->color = true; + conf->color = true; + break; + case 'F': + conf->force = true; break; case '?': default: @@ -143,7 +150,7 @@ static int parse_args(int argc, char *argv[], struct dpip_conf *conf) argv += optind; conf->obj = argv[0]; - if (argc < 2) { + if (argc < 2 || show_usage) { obj = dpip_obj_get(conf->obj); if (obj && obj->help) obj->help(); @@ -152,11 +159,17 @@ static int parse_args(int argc, char *argv[], struct dpip_conf *conf) exit(1); } - if (strcmp(argv[1], "add") == 0 || - strcmp(argv[1], "enable") == 0) + if (strcmp(argv[1], "create") == 0) + conf->cmd = DPIP_CMD_CREATE; + else if (strcmp(argv[1], "destroy") == 0) + conf->cmd = DPIP_CMD_DESTROY; + else if (strcmp(argv[1], "enable") == 0) + conf->cmd = DPIP_CMD_ENABLE; + else if (strcmp(argv[1], "disable") == 0) + conf->cmd = DPIP_CMD_DISABLE; + else if (strcmp(argv[1], "add") == 0) conf->cmd = DPIP_CMD_ADD; - else if (strcmp(argv[1], "del") == 0 || - strcmp(argv[1], "disable") == 0) + else if (strcmp(argv[1], "del") == 0) conf->cmd = DPIP_CMD_DEL; else if (strcmp(argv[1], "set") == 0 || strcmp(argv[1], "change") == 0) @@ -168,6 +181,8 @@ static int parse_args(int argc, char *argv[], struct dpip_conf *conf) conf->cmd = DPIP_CMD_REPLACE; else if (strcmp(argv[1], "flush") == 0) conf->cmd = DPIP_CMD_FLUSH; + else if (strcmp(argv[1], "test") == 0) + conf->cmd = DPIP_CMD_TEST; else if (strcmp(argv[1], "help") == 0) conf->cmd = DPIP_CMD_HELP; else { diff --git a/tools/dpip/dpip.h b/tools/dpip/dpip.h index dac51a5cf..c507540de 100644 --- a/tools/dpip/dpip.h +++ b/tools/dpip/dpip.h @@ -21,9 +21,15 @@ #include "utils.h" #define DPIP_NAME "dpip" +#ifndef DPIP_VERSION #define DPIP_VERSION "v1.0.0" +#endif typedef enum dpip_cmd_e { + DPIP_CMD_ENABLE, + DPIP_CMD_DISABLE, + DPIP_CMD_CREATE, + DPIP_CMD_DESTROY, DPIP_CMD_ADD, DPIP_CMD_DEL, DPIP_CMD_SET, @@ -31,6 +37,7 @@ typedef enum dpip_cmd_e { DPIP_CMD_REPLACE, DPIP_CMD_FLUSH, DPIP_CMD_HELP, + DPIP_CMD_TEST, } dpip_cmd_t; struct dpip_conf { @@ -40,6 +47,7 @@ struct dpip_conf { int interval; int count; bool color; + bool force; char *obj; dpip_cmd_t cmd; int argc; diff --git a/tools/dpip/flow.c b/tools/dpip/flow.c new file mode 100644 index 000000000..0c961be09 --- /dev/null +++ b/tools/dpip/flow.c @@ -0,0 +1,176 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "dpip.h" +#include "sockopt.h" +#include "conf/kni.h" +#include "conf/sockopts.h" + +enum dpip_flow_type { + DPIP_FLOW_TYPE_KNI = 101, +}; + +struct dpip_flow_param { + enum dpip_flow_type type; + union { + struct kni_conf_param kni; + } flow; +}; + +static void flow_help(void) +{ + fprintf(stderr, + "Usage:\n" + " dpip flow { add | del } type FLOWTYPE OPTS dev STRING\n" + " dpip flow { show | flush } type FLOWTYPE dev STRING\n" + "Parameters:\n" + " FLOWTYPE := { kni }\n" + " OPTS := { KNI_IP_ADDRESS }\n" + "Examples:\n" + " dpip flow add type kni dev dpdk0 192.168.88.12\n" + " dpip flow get type kni dev dpdk0\n" + ); +} + +static int kni_flow_parse_args(struct dpip_conf *conf, struct dpip_flow_param *param) +{ + param->flow.kni.type = KNI_DTYPE_ADDR_FLOW; + while (conf->argc > 0) { + if (strcmp(conf->argv[0], "dev") == 0) { + NEXTARG_CHECK(conf, "dev"); + snprintf(param->flow.kni.ifname, sizeof(param->flow.kni.ifname), + "%s", conf->argv[0]); + } else { + if (inet_pton_try(¶m->flow.kni.data.flow.af, conf->argv[0], + ¶m->flow.kni.data.flow.addr) <= 0) + return EDPVS_INVAL; + } + NEXTARG(conf); + } + + if (conf->argc > 0) { + fprintf(stderr, "too many arguments\n"); + return EDPVS_INVAL; + } + + return EDPVS_OK; +} + +static int flow_parse_args(struct dpip_conf *conf, struct dpip_flow_param *param) +{ + memset(param, 0, sizeof(struct dpip_flow_param)); + + if ((conf->argc > 1) && (strcmp(conf->argv[0], "type") == 0)) { + NEXTARG_CHECK(conf, "type"); + if (strcmp(conf->argv[0], "kni") == 0) { + param->type = DPIP_FLOW_TYPE_KNI; + NEXTARG(conf); + } + } + + if (!param->type) { + fprintf(stderr, "missing flow type\n"); + return EDPVS_INVAL; + } + + switch (param->type) { + case DPIP_FLOW_TYPE_KNI: + return kni_flow_parse_args(conf, param); + default: + return EDPVS_NOTSUPP; + } + + return EDPVS_NOTSUPP; +} + +static int kni_flow_do_cmd(dpip_cmd_t cmd, struct dpip_conf *conf, + struct kni_conf_param *param) +{ + int i, err = EDPVS_OK; + struct kni_info *info; + size_t outlen; + char buf[64]; + + switch (conf->cmd) { + case DPIP_CMD_ADD: + return dpvs_setsockopt(SOCKOPT_SET_KNI_ADD, param, sizeof(*param)); + case DPIP_CMD_DEL: + return dpvs_setsockopt(SOCKOPT_SET_KNI_DEL, param, sizeof(*param)); + case DPIP_CMD_FLUSH: + return dpvs_setsockopt(SOCKOPT_SET_KNI_FLUSH, param, sizeof(*param)); + case DPIP_CMD_SHOW: + err = dpvs_getsockopt(SOCKOPT_GET_KNI_LIST, param, sizeof(*param), + (void **)&info, &outlen); + break; + default: + return EDPVS_NOTSUPP; + } + + // Only SOCKOPT_GET_KNI_LIST arrives here + if (err != EDPVS_OK) + return err; + if (outlen < sizeof(*info) || outlen < sizeof(*info) + + info->len * sizeof(struct kni_addr_flow_entry)) { + fprintf(stderr, "corrupted response\n"); + dpvs_sockopt_msg_free(info); + return EDPVS_INVAL; + } + + for (i = 0; i < info->len; i++) { + if (info->entries[i].type != KNI_DTYPE_ADDR_FLOW) { + fprintf(stderr, "unexpectd kni data type %d\n", info->entries[i].type); + continue; + } + printf("kni addr flow %s dev %s\n", inet_ntop(info->entries[i].data.flow.af, + &info->entries[i].data.flow.addr, buf, sizeof(buf)), info->entries[i].ifname); + } + + dpvs_sockopt_msg_free(info); + return EDPVS_OK; +} + +static int flow_do_cmd(struct dpip_obj *obj, dpip_cmd_t cmd, + struct dpip_conf *conf) +{ + struct dpip_flow_param param; + + if (flow_parse_args(conf, ¶m) != EDPVS_OK) + return EDPVS_INVAL; + + switch (param.type) { + case DPIP_FLOW_TYPE_KNI: + return kni_flow_do_cmd(cmd, conf, ¶m.flow.kni); + default: + return EDPVS_NOTSUPP; + } +} + +struct dpip_obj dpip_flow = { + .name = "flow", + .help = flow_help, + .do_cmd = flow_do_cmd, +}; + +static void __init addr_init(void) +{ + dpip_register_obj(&dpip_flow); +} + +static void __exit addr_exit(void) +{ + dpip_unregister_obj(&dpip_flow); +} diff --git a/tools/dpip/iftraf.c b/tools/dpip/iftraf.c index 9d97bf5c1..f89cf8497 100644 --- a/tools/dpip/iftraf.c +++ b/tools/dpip/iftraf.c @@ -95,9 +95,9 @@ static int iftraf_do_cmd(struct dpip_obj *obj, dpip_cmd_t cmd, return EDPVS_INVAL; switch (conf->cmd) { - case DPIP_CMD_ADD: + case DPIP_CMD_ENABLE: return dpvs_setsockopt(SOCKOPT_SET_IFTRAF_ADD, &iftraf_param, sizeof(iftraf_param)); - case DPIP_CMD_DEL: + case DPIP_CMD_DISABLE: return dpvs_setsockopt(SOCKOPT_SET_IFTRAF_DEL, &iftraf_param, sizeof(iftraf_param)); case DPIP_CMD_SHOW: err = dpvs_getsockopt(SOCKOPT_GET_IFTRAF_SHOW, &iftraf_conf, sizeof(iftraf_conf), diff --git a/tools/dpip/ipset.c b/tools/dpip/ipset.c index d8af048eb..baeee9257 100644 --- a/tools/dpip/ipset.c +++ b/tools/dpip/ipset.c @@ -15,149 +15,1506 @@ * GNU General Public License for more details. * */ -#include -#include -#include "conf/common.h" #include "dpip.h" #include "conf/ipset.h" -#include "linux_ipv6.h" #include "sockopt.h" -static void ipset_help(void) +#define HEADER_LEN 1024 +#define MEMBER_LEN 1024 + +typedef int (*sort_compare_func) + (int af, const struct ipset_member *m1, const struct ipset_member *m2); + +struct ipset_type { + char *name; + int (* parse)(char *arg); + int (* check)(void); + void (* dump_header)(char *buf, struct ipset_info *info); + int (* dump_member)(char *buf, struct ipset_member *m, int af); + sort_compare_func sort_compare; +}; + +// All supported ipset types +#define MAX_TYPE_NUM 64 +struct ipset_type types[MAX_TYPE_NUM]; + +static struct ipset_param param; + +static char *query_str; + +static inline bool +ipv6_addr_any(const struct in6_addr *a) +{ + return !(a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | a->s6_addr32[3]); +} + +static inline bool +ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) +{ + return !((a1->s6_addr32[0] ^ a2->s6_addr32[0]) && + (a1->s6_addr32[1] ^ a2->s6_addr32[1]) && + (a1->s6_addr32[2] ^ a2->s6_addr32[2]) && + (a1->s6_addr32[3] ^ a2->s6_addr32[3])); +} + +static inline int +is_zero_mac_addr(const uint8_t *mac) +{ + const uint16_t *w = (const uint16_t *)mac; + + return !(w[0] | w[1] | w[2]); +} + +static int +types_string(char buf[], size_t bufsiz, int tokens_per_line, const char *prompt) { - fprintf(stderr, + int i, j; + int indent, typelen, linelen, totallen; + + indent = strlen(prompt); + if (tokens_per_line < 2 * indent || bufsiz < tokens_per_line) + return EDPVS_INVAL; + linelen = indent; + totallen = snprintf(buf, bufsiz, "%s", prompt); + if (totallen >= bufsiz) + return EDPVS_NOMEM; + + for (i = 0; i < NELEMS(types); i++) { + if (!types[i].name) + break; + typelen = snprintf(&buf[totallen], bufsiz - totallen - 1, + i > 0 ? " | %s" : "%s", types[i].name); + totallen += typelen; + if (totallen >= bufsiz) + return EDPVS_NOMEM; + linelen += typelen; + if (linelen < tokens_per_line) + continue; + + if (totallen + indent + 1 >= bufsiz) + return EDPVS_NOMEM; + buf[totallen++] = '\n'; + for (j = 0; j < indent; j++) + buf[totallen++] = ' '; + linelen = indent; + } + snprintf(&buf[totallen], bufsiz - totallen - 1, "%s", " }"); + + return EDPVS_OK; +} + +static void +ipset_help(void) +{ + char type_names[1024]; + if (types_string(type_names, sizeof(type_names), 80, " TYPE := { ") != EDPVS_OK) + fprintf(stderr, "Warn: Failed to get all ipset types."); + fprintf(stderr, "Usage:\n" - " dpip gfwip { add | del } IPs\n" - " dpip gfwip show\n" - " dpip gfwip flush\n" + " dpip ipset create SETNAME TYPE [ OPTIONS ]\n" + " dpip ipset destroy SETNAME\n" + " dpip ipset { add | del | test } SETNAME ENTRY [ ADTOPTS ]\n" + " dpip ipset { show | flush } [ SETNAME ]\n" + "Parameters:\n" + "%s\n" + " ENTRY := combinations of one or more comma seperated tokens below,\n" + " { { IP | NET } | PORT | MAC | IFACE }\n" + " IP := ipv4 or ipv6 string literal\n" + " NET := \"{ IP/prefix | IP(range from)[-IP(range end)] }\"\n" + " MAC := 6 bytes MAC address string literal\n" + " PORT := \"[{ tcp | udp | icmp | icmp6 }:]port1[-port2]\"\n" + " OPTIONS := { comment | range NET | hashsize NUM | maxelem NUM }\n" + " ADTOPTS := { comment STRING | unmatch (for add only) }\n" + " flag := { -F(--force) | { -4 | -6 } | -v }\n" + "Examples:\n" + " dpip ipset create foo bitmap:ip range 192.168.0.0/16 comment\n" + " dpip ipset add foo 192.168.0.1-192.168.0.5 comment \"test entry\"\n" + " dpip ipset show foo\n" + " dpip ipset flush foo\n" + " dpip ipset destroy foo\n" + " dpip -6 ipset create bar hash:net,port,iface hashsize 300 maxelem 1000\n" + " dpip ipset add bar 2001:beef::/64,udp:100,dpdk0\n" + " dpip -v ipset test bar 2001:beef::abcd,udp:100,dpdk0\n" + " dpip ipset del bar 2001:beef::/64,udp:100,dpdk0\n" + " dpip ipset destroy bar\n", type_names ); } -static int ipset_parse_args(struct dpip_conf *conf, struct dp_vs_multi_ipset_conf **ips_conf, int *ips_size) +/* ========================== parse =========================== */ +/* { ip1-ip2 | ip/cidr } */ +static int +addr_arg_parse(char *arg, struct inet_addr_range *range, uint8_t *cidr) { - char *ipaddr = NULL; - int ipset_size; - int index = 0; - struct dp_vs_multi_ipset_conf *ips; + char *ip1, *ip2, *sep; + int *af = ¶m.option.family; - if (conf->cmd == DPIP_CMD_FLUSH || conf->cmd == DPIP_CMD_SHOW) { - if (conf->argc != 0) - return -1; - else - return 0; + /* ip/cidr */ + if (cidr && (sep = strstr(arg, "/"))) { + *sep++ = '\0'; + *cidr = atoi(sep); + + if (inet_pton(AF_INET6, arg, &range->min_addr.in6) <= 0) { + if (inet_pton(AF_INET, arg, &range->min_addr.in) <= 0) + return EDPVS_INVAL; + *af = AF_INET; + } else { + *af = AF_INET6; + } + + range->max_addr = range->min_addr; + return EDPVS_OK; } - - if (conf->argc <= 0) { - fprintf(stderr, "no arguments\n"); - return -1; + + /* ip1-ip2 */ + ip1 = arg; + ip2 = strrchr(arg, '-'); + if (ip2) + *ip2++ = '\0'; + if (strlen(ip1) && inet_pton(AF_INET6, ip1, &range->min_addr.in6) > 0) { + if (ip2 && strlen(ip2)) { + if (inet_pton(AF_INET6, ip2, &range->max_addr.in6) <= 0) + return EDPVS_INVAL; + } else { + range->max_addr = range->min_addr; + } + *af = AF_INET6; + } else { + if (strlen(ip1) && inet_pton(AF_INET, ip1, &range->min_addr.in) <= 0) + return EDPVS_INVAL; + if (ip2 && strlen(ip2)) { + if (inet_pton(AF_INET, ip2, &range->max_addr.in) <= 0) + return EDPVS_INVAL; + if (ntohl(range->max_addr.in.s_addr) < ntohl(range->min_addr.in.s_addr)) + range->max_addr = range->min_addr; + } else { + range->max_addr = range->min_addr; + } + *af = AF_INET; } - ipset_size = sizeof(struct dp_vs_multi_ipset_conf) + conf->argc*sizeof(struct dp_vs_ipset_conf); - *ips_conf = malloc(ipset_size); - if (*ips_conf == NULL) { - fprintf(stderr, "no memory\n"); - return -1; - } - memset(*ips_conf, 0, ipset_size); - ips = *ips_conf; - - ips->num = conf->argc; + return EDPVS_OK; +} + +/* [ {tcp | udp | icmp | icmp6 }: ]port1[-port2] */ +static int +port_arg_parse(char *arg, struct inet_addr_range *range) +{ + char *proto = arg, *sep, *port1, *port2; + int portval; + + if (!strncmp(proto, "tcp", 3)) + param.proto = IPPROTO_TCP; + else if (!strncmp(proto, "udp", 3)) + param.proto = IPPROTO_UDP; + else if (!strncmp(proto, "icmp", 4)) + param.proto = IPPROTO_ICMP; + else if (!strncmp(proto, "icmp6", 5)) + param.proto = IPPROTO_ICMPV6; + else + param.proto = 0; + + if ((sep = strchr(arg, ':')) != NULL) { + *sep++ = '\0'; + arg = sep; + } + + port1 = arg; + portval = atoi(port1); + if (portval < 0 || portval > 65535) + return EDPVS_INVAL; + range->max_port = range->min_port = portval; + + sep = strchr(arg, '-'); + if (sep) { + *sep++ = '\0'; + port2 = sep; + portval = atoi(port2); + if (portval < range->min_port || portval > 65535) + return EDPVS_INVAL; + range->max_port = portval; + } + + return EDPVS_OK; +} + +/* option parse */ +static inline int +create_opt_parse(struct dpip_conf *conf) +{ + struct ipset_option *opt = ¶m.option; + opt->family = (conf->af == AF_INET6) ? AF_INET6 : AF_INET; + while (conf->argc > 0) { - ipaddr = conf->argv[0]; - ips->ipset_conf[index].af = AF_INET; - if (inet_pton_try(&conf->af, ipaddr, &ips->ipset_conf[index].addr) <= 0) - { - fprintf(stderr, "bad IP\n"); - free(ips); - return -1; + /* bitmap type MUST specify range */ + if (!strcmp(CURRARG(conf), "range")) { + NEXTARG_CHECK(conf, CURRARG(conf)); + if (strstr(param.type, "ip")) { + if (addr_arg_parse(CURRARG(conf), ¶m.range, ¶m.cidr) < 0) + return EDPVS_INVAL; + } else if (strstr(param.type, "port")) + if (port_arg_parse(CURRARG(conf), ¶m.range) < 0) + return EDPVS_INVAL; + } else if (!strcmp(CURRARG(conf), "comment")) { + opt->create.comment = true; + } else if (!strcmp(CURRARG(conf), "hashsize")) { + NEXTARG_CHECK(conf, CURRARG(conf)); + opt->create.hashsize = atoi(CURRARG(conf)); + } else if (!strcmp(CURRARG(conf), "maxelem")) { + NEXTARG_CHECK(conf, CURRARG(conf)); + opt->create.maxelem = atoi(CURRARG(conf)); + } else { + return EDPVS_NOTSUPP; } - index++; NEXTARG(conf); } + return EDPVS_OK; +} - if (conf->argc > 0) { - fprintf(stderr, "too many arguments\n"); - free(ips); - return -1; +static inline int +add_del_opt_parse(struct dpip_conf *conf) +{ + while(conf->argc > 0) { + if (strcmp(CURRARG(conf), "comment") == 0) { + NEXTARG_CHECK(conf, CURRARG(conf)); + strncpy(param.comment, CURRARG(conf), IPSET_MAXCOMLEN); + } else if (strcmp(CURRARG(conf), "nomatch") == 0) { + param.option.add.nomatch = true; + }else { + return EDPVS_NOTSUPP; + } + NEXTARG(conf); } - *ips_size = ipset_size; - return 0; + + if (conf->force) + param.flag |= IPSET_F_FORCE; + + return EDPVS_OK; } -static int ipset_dump(const struct dp_vs_ipset_conf *ipconf) +static int +net_parse(char *arg) { - char ip[64]; - printf("%s\n", inet_ntop(ipconf->af, &ipconf->addr, ip, sizeof(ip))? ip: ""); - return 0; + return addr_arg_parse(arg, ¶m.range, ¶m.cidr); +} + +static inline int +seg_parse(char *params, int maxsegs, int *segnum, char **segs) +{ + int i = 0; + char *start, *sp, *arg; + + for (start = params; (arg = strtok_r(start, ",", &sp)); start = NULL) { + segs[i] = arg; + i++; + } + + if (i > maxsegs) + return EDPVS_INVAL; + if (segnum) + *segnum = i; + + return EDPVS_OK; +} + +/* ip, mac */ +static int +ipmac_parse(char *arg) +{ + int i, segnum; + char *segs[2]; + unsigned int mac[6] = { 0 }; + + if (seg_parse(arg, 2, &segnum, segs) < 0) + return EDPVS_INVAL; + + if (net_parse(segs[0]) < 0) + return EDPVS_INVAL; + + if (segnum > 1 && sscanf(segs[1], "%02X:%02X:%02X:%02X:%02X:%02X", + &mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) < 0) + return EDPVS_INVAL; + + for (i = 0; i < 6; i++) + param.mac[i] = mac[i]; + + return EDPVS_OK; +} + +static int +port_parse(char *arg) +{ + if (port_arg_parse(arg, ¶m.range) < 0) + return EDPVS_INVAL; + + // bitmap:port supports protocol tcp, udp only + if (param.proto != IPPROTO_TCP && + param.proto != IPPROTO_UDP) { + fprintf(stderr, "bitmap:port should specified protocol tcp or udp\n"); + return EDPVS_INVAL; + } + + return EDPVS_OK; +} + +static int +netport_parse(char *arg) +{ + int segnum; + char *segs[2]; + + if (seg_parse(arg, 2, &segnum, segs) < 0) + return EDPVS_INVAL; + if (segnum != 2) + return EDPVS_INVAL; + + if (addr_arg_parse(segs[0], ¶m.range, ¶m.cidr) < 0) + return EDPVS_INVAL; + + if (port_arg_parse(segs[1], ¶m.range) < 0) + return EDPVS_INVAL; + + return EDPVS_OK; +} + +/* net, port, iface */ +static int +netportiface_parse(char *arg) +{ + int segnum; + char *segs[3]; + + if (seg_parse(arg, 3, &segnum, segs) < 0) + return EDPVS_INVAL; + if (segnum != 3) + return EDPVS_INVAL; + + if (addr_arg_parse(segs[0], ¶m.range, ¶m.cidr) < 0) + return EDPVS_INVAL; + + if (port_arg_parse(segs[1], ¶m.range) < 0) + return EDPVS_INVAL; + + strncpy(param.iface, segs[2], IFNAMSIZ); + + return EDPVS_OK; +} + +static int +ipport_parse(char *arg) +{ + int segnum; + char *segs[2]; + + if (seg_parse(arg, 2, &segnum, segs) < 0) + return EDPVS_INVAL; + + if (addr_arg_parse(segs[0], ¶m.range, ¶m.cidr) < 0) + return EDPVS_INVAL; + + if (segnum > 1 && port_arg_parse(segs[1], ¶m.range) < 0) + return EDPVS_INVAL; + + return EDPVS_OK; +} + +static int +ipportip_parse(char *arg) +{ + int segnum; + char *segs[3]; + + if (seg_parse(arg, 3, &segnum, segs) < 0) + return EDPVS_INVAL; + if (segnum != 3) + return EDPVS_INVAL; + + if (addr_arg_parse(segs[0], ¶m.range, ¶m.cidr) < 0) + return EDPVS_INVAL; + + if (port_arg_parse(segs[1], ¶m.range) < 0) + return EDPVS_INVAL; + + if (addr_arg_parse(segs[2], ¶m.range2, ¶m.cidr2) < 0) + return EDPVS_INVAL; + + return EDPVS_OK; } -static int ipset_do_cmd(struct dpip_obj *obj, dpip_cmd_t cmd, - struct dpip_conf *conf) +static int +ipportnet_parse(char *arg) { - struct dp_vs_multi_ipset_conf *ips_conf; - struct dp_vs_ipset_conf_array *array; - size_t size, i; - int ips_size, err; + int segnum; + char *segs[3]; + + if (seg_parse(arg, 3, &segnum, segs) < 0) + return EDPVS_INVAL; + if (segnum != 3) + return EDPVS_INVAL; - if ((ipset_parse_args(conf, &ips_conf, &ips_size)) != 0) + if (addr_arg_parse(segs[2], ¶m.range, ¶m.cidr) < 0) return EDPVS_INVAL; + if (port_arg_parse(segs[1], ¶m.range) < 0) + return EDPVS_INVAL; + + if (addr_arg_parse(segs[0], ¶m.range2, ¶m.cidr2) < 0) + return EDPVS_INVAL; + + return EDPVS_OK; +} + +static int +netportnetport_parse(char *arg) +{ + uint8_t proto; + int segnum; + char *segs[4]; + + if (seg_parse(arg, 4, &segnum, segs) < 0) + return EDPVS_INVAL; + if (segnum != 4) + return EDPVS_INVAL; + + if (addr_arg_parse(segs[0], ¶m.range, ¶m.cidr) < 0) + return EDPVS_INVAL; + + if (port_arg_parse(segs[1], ¶m.range) < 0) + return EDPVS_INVAL; + proto = param.proto; + + if (addr_arg_parse(segs[2], ¶m.range2, ¶m.cidr2) < 0) + return EDPVS_INVAL; + + if (port_arg_parse(segs[3], ¶m.range2) < 0) + return EDPVS_INVAL; + + if (param.proto != proto) { + fprintf(stderr, "Error: port protocol doesn't match\n"); + return EDPVS_INVAL; + } + + return EDPVS_OK; +} + +static int +get_info_array(struct ipset_info_array **array) +{ + size_t size; + + int err = dpvs_getsockopt(SOCKOPT_GET_IPSET_LIST, ¶m, sizeof(param), + (void **)array, &size); + if (err != 0) + return EDPVS_INVAL; + + if (size < 0) { + fprintf(stderr, "corrupted response.\n"); + dpvs_sockopt_msg_free(*array); + return EDPVS_INVAL; + } + + return EDPVS_OK; +} + +static inline int +get_type_idx_from_type(char *type) +{ + int i; + + for (i = 0; i < NELEMS(types); i++) { + if (!types[i].name) + break; + if (!strcmp(types[i].name, type)) + return i; + } + + return EDPVS_NOTSUPP; +} + +static int +get_type_idx_remote(void) +{ + static int tyidx = -1; + struct ipset_info *info; + struct ipset_info_array *array; + + if (tyidx >= 0) + return tyidx; + + if (get_info_array(&array) < 0) { + return EDPVS_NOTEXIST; + } + info = &array->infos[0]; + + tyidx = get_type_idx_from_type(info->type); + dpvs_sockopt_msg_free(array); + + return tyidx; +} + +static int +get_type_idx(void) +{ + if (param.opcode == IPSET_OP_CREATE) + return get_type_idx_from_type(param.type); + return get_type_idx_remote(); +} + +static int +ipset_parse(struct dpip_obj *obj, struct dpip_conf *conf) +{ + int type_idx; + switch (conf->cmd) { - case DPIP_CMD_ADD: - err = dpvs_setsockopt(SOCKOPT_SET_IPSET_ADD, ips_conf, ips_size); - free(ips_conf); - return err; - - case DPIP_CMD_DEL: - err = dpvs_setsockopt(SOCKOPT_SET_IPSET_DEL, ips_conf, ips_size); - free(ips_conf); - return err; - - case DPIP_CMD_FLUSH: - return dpvs_setsockopt(SOCKOPT_SET_IPSET_FLUSH, NULL, 0); - - case DPIP_CMD_SHOW: - err = dpvs_getsockopt(SOCKOPT_GET_IPSET_SHOW, NULL, 0, (void **)&array, &size); - if (err != 0) - return err; - - if (size < sizeof(*array) - || size != sizeof(*array) + \ - array->nipset * sizeof(struct dp_vs_ipset_conf)) { - fprintf(stderr, "corrupted response.\n"); - dpvs_sockopt_msg_free(array); + case DPIP_CMD_CREATE: + param.opcode = IPSET_OP_CREATE; + break; + case DPIP_CMD_DESTROY: + param.opcode = IPSET_OP_DESTROY; + break; + case DPIP_CMD_ADD: + param.opcode = IPSET_OP_ADD; + break; + case DPIP_CMD_DEL: + param.opcode = IPSET_OP_DEL; + break; + case DPIP_CMD_FLUSH: + param.opcode = IPSET_OP_FLUSH; + break; + case DPIP_CMD_SHOW: + param.opcode = IPSET_OP_LIST; + break; + case DPIP_CMD_TEST: + param.opcode = IPSET_OP_TEST; + break; + default: + param.opcode = IPSET_OP_MAX; + break; + } + + /* list all sets */ + if (conf->argc == 0) { + if (conf->cmd == DPIP_CMD_SHOW) + return EDPVS_OK; + return EDPVS_INVAL; + } + + /* operate on specific set */ + sprintf(param.name, "%s", CURRARG(conf)); + NEXTARG(conf); + switch (conf->cmd) { + case DPIP_CMD_FLUSH: + case DPIP_CMD_DESTROY: + case DPIP_CMD_SHOW: + if (conf->argc == 0) + return EDPVS_OK; + return EDPVS_INVAL; + case DPIP_CMD_CREATE: + if (conf->argc < 1) + return EDPVS_INVAL; + sprintf(param.type, "%s", CURRARG(conf)); + NEXTARG(conf); + if (create_opt_parse(conf) < 0) + return EDPVS_INVAL; + return EDPVS_OK; + case DPIP_CMD_ADD: + case DPIP_CMD_DEL: + case DPIP_CMD_TEST: + if ((conf->argc < 1)) + return EDPVS_INVAL; + type_idx = get_type_idx(); + if (type_idx < 0) + return EDPVS_INVAL; + if (conf->verbose) { + query_str = malloc(strlen(CURRARG(conf) + 1)); + strcpy(query_str, CURRARG(conf)); + } + /* type specific arg parsing */ + if (types[type_idx].parse && + (types[type_idx].parse(CURRARG(conf)) < 0)) + return EDPVS_INVAL; + if (conf->cmd == DPIP_CMD_TEST) + return EDPVS_OK; + NEXTARG(conf); + return add_del_opt_parse(conf); + default: + return EDPVS_INVAL; + } + return EDPVS_NOTSUPP; +} + +/* =========================== check ============================ */ + +static int +bitmap_check(void) +{ + if (param.option.family == AF_INET6) { + fprintf(stderr, "bitmap doesn't support ipv6\n"); + return EDPVS_NOTSUPP; + } + + if (param.opcode != IPSET_OP_CREATE) + return EDPVS_OK; + + if (strstr(param.type, "ip")) { + if (ntohl(param.range.min_addr.in.s_addr) > ntohl(param.range.max_addr.in.s_addr) || + param.range.max_addr.in.s_addr == 0) { + fprintf(stderr, "bitmap's IP range MUST be specified\n"); return EDPVS_INVAL; } - + } + if (strstr(param.type, "port")) { + if (param.range.min_port > param.range.max_port || + param.range.max_port == 0) { + fprintf(stderr, "bitmap's port range MUST be specified\n"); + return EDPVS_INVAL; + } + } - if (array->nipset) - printf("IPset gfwip has %d members:\n", array->nipset); - else - printf("IPset gfwip has no members.\n"); - - for (i = 0; i < array->nipset; i++) - ipset_dump(&array->ips[i]); + return EDPVS_OK; +} - dpvs_sockopt_msg_free(array); +static int +hash_ip_check(void) +{ + if (param.opcode != IPSET_OP_ADD && param.opcode != IPSET_OP_DEL) return EDPVS_OK; - default: - return EDPVS_NOTSUPP; + + if (param.option.family == AF_INET6) { + if (param.cidr || param.cidr2) { + fprintf(stderr, "ipv6 cidr is not supported by the set type\n"); + return EDPVS_INVAL; + } + } else if (param.option.family == AF_INET) { + if ((param.cidr > 0 && param.cidr < 16) || + (param.cidr2 > 0 && param.cidr2 < 16)) { + fprintf(stderr, "ipv4 cidr shouldn't be less than 16\n"); + return EDPVS_INVAL; + } + if (ntohl(param.range.max_addr.in.s_addr) != 0) { + if (ntohl(param.range.max_addr.in.s_addr) < + ntohl(param.range.min_addr.in.s_addr)) { + fprintf(stderr, "invalid ipv4 range\n"); + return EDPVS_INVAL; + } + if (ntohl(param.range.max_addr.in.s_addr) - + ntohl(param.range.min_addr.in.s_addr) > 65536) { + fprintf(stderr, "ip range shouldn't be greater than 65536\n"); + return EDPVS_INVAL; + } + } + if (ntohl(param.range2.max_addr.in.s_addr) != 0) { + if (ntohl(param.range2.max_addr.in.s_addr) < + ntohl(param.range2.min_addr.in.s_addr)) { + fprintf(stderr, "invalid ipv4 range\n"); + return EDPVS_INVAL; + } + if (ntohl(param.range2.max_addr.in.s_addr) - + ntohl(param.range2.min_addr.in.s_addr) > 65536) { + fprintf(stderr, "ip range shouldn't be greater than 65536\n"); + return EDPVS_INVAL; + } + } + } + + return EDPVS_OK; +} + +static int +hash_net_check(void) +{ + if (param.opcode != IPSET_OP_ADD && param.opcode != IPSET_OP_DEL) + return EDPVS_OK; + + if (param.option.family == AF_INET) { + if (ntohl(param.range.max_addr.in.s_addr) != 0) { + if (ntohl(param.range.max_addr.in.s_addr) < + ntohl(param.range.min_addr.in.s_addr)) { + fprintf(stderr, "invalid ipv4 range\n"); + return EDPVS_INVAL; + } + } + if (ntohl(param.range2.max_addr.in.s_addr) != 0) { + if (ntohl(param.range2.max_addr.in.s_addr) < + ntohl(param.range2.min_addr.in.s_addr)) { + fprintf(stderr, "invalid ipv4 range\n"); + return EDPVS_INVAL; + } + } } + + return EDPVS_OK; +} + +static int +hash_ipnet_check(void) +{ + if (param.opcode != IPSET_OP_ADD && param.opcode != IPSET_OP_DEL) + return EDPVS_OK; + + if (param.option.family == AF_INET6) { + if (param.cidr2) { + fprintf(stderr, "hash:ip,port doesn't support ipv6 cidr\n"); + return EDPVS_INVAL; + } + } else if (param.option.family == AF_INET) { + if (param.cidr > 0 && param.cidr < 16) { + fprintf(stderr, "ipv4 cidr shouldn't be less than 16\n"); + return EDPVS_INVAL; + } + if (ntohl(param.range.max_addr.in.s_addr) != 0) { + if (ntohl(param.range.max_addr.in.s_addr) < + ntohl(param.range.min_addr.in.s_addr)) { + fprintf(stderr, "invalid ipv4 range\n"); + return EDPVS_INVAL; + } + if (ntohl(param.range.max_addr.in.s_addr) - + ntohl(param.range.min_addr.in.s_addr) > 65536) { + fprintf(stderr, "ipv6 range shouldn't be greater than 65536\n"); + return EDPVS_INVAL; + } + } + } + + return EDPVS_OK; +} + +static int +ipset_check(const struct dpip_obj *obj, dpip_cmd_t cmd) +{ + int type_idx; + + if (param.opcode == IPSET_OP_TEST) { + if (param.cidr || param.cidr2) { + fprintf(stderr, "Warning: ignore cidr settings for ipset test\n"); + param.cidr = param.cidr2 = 0; + } + } + + if (param.option.family == AF_INET6) { + if ((!ipv6_addr_any(¶m.range.max_addr.in6) && + !ipv6_addr_equal(¶m.range.min_addr.in6, ¶m.range.max_addr.in6)) || + (!ipv6_addr_any(¶m.range2.min_addr.in6) && + !(ipv6_addr_equal(¶m.range2.min_addr.in6, ¶m.range2.max_addr.in6)))) { + fprintf(stderr, "ipv6 range is not supported\n"); + return EDPVS_INVAL; + } + } + + type_idx = get_type_idx(); + if (type_idx < 0) { + if (param.opcode == IPSET_OP_LIST) + return EDPVS_OK; + return EDPVS_INVAL; + } + + /* type specific check */ + if (types[type_idx].check) + return types[type_idx].check(); + + return EDPVS_OK; +} + +/* =========================== dump ============================ */ + +static void +bitmap_dump_header(char *buf, struct ipset_info *info) +{ + char range[128]; + char addr[INET6_ADDRSTRLEN], addr2[INET6_ADDRSTRLEN]; + + if (info->bitmap.cidr) { + inet_ntop(AF_INET, &info->bitmap.range.min_addr, + addr, INET_ADDRSTRLEN); + sprintf(range, "%s/%d", addr, info->bitmap.cidr); + } else { + if (!strcmp(info->type, "bitmap:ip")) { + inet_ntop(AF_INET, &info->bitmap.range.min_addr, + addr, INET_ADDRSTRLEN); + inet_ntop(AF_INET, &info->bitmap.range.max_addr, + addr2, INET_ADDRSTRLEN); + sprintf(range, "%s-%s", addr, addr2); + } + if (!strcmp(info->type, "bitmap:port")) { + sprintf(range, "tcp|udp:%d-%d", info->bitmap.range.min_port, + info->bitmap.range.max_port); + } + } + sprintf(buf, "range %s %s", range, info->comment? "comment" : ""); +} + +static void +hash_dump_header(char *buf, struct ipset_info *info) +{ + sprintf(buf, "family %s hashsize %d maxelem %d %s", + info->af == AF_INET? "inet" : "inet6", + info->hash.hashsize, info->hash.maxelem, + info->comment? "comment" : ""); +} + +static inline int +dump_comment(char *buf, char *comment) +{ + int n; + + if (strlen(comment)) { + n = sprintf(buf, "comment \"%s\"\n", comment); + } else { + n = sprintf(buf, "\n"); + } + return n; +} + +static const char* +proto_string(uint8_t proto) +{ + switch(proto) { + case IPPROTO_TCP: + return "tcp"; + case IPPROTO_UDP: + return "udp"; + case IPPROTO_ICMP: + return "icmp"; + case IPPROTO_ICMPV6: + return "icmp6"; + default: + return "unspec"; + } +} + +static int +net_dump_member(char *buf, struct ipset_member *member, int af) +{ + int n = 0; + char addr[INET6_ADDRSTRLEN]; + + inet_ntop(af, &member->addr.in6, addr, INET6_ADDRSTRLEN); + if (member->cidr) { + n = sprintf(buf, "%s/%d ", addr, member->cidr); + } else { + n = sprintf(buf, "%s ", addr); + } + + if (member->nomatch) + n += sprintf(buf + n, "nomatch "); + n += sprintf(buf + n , " "); + + n += dump_comment(buf + n, member->comment); + + return n; +} + +static int +ipmac_dump_member(char *buf, struct ipset_member *member, int af) +{ + int n; + char addr[INET_ADDRSTRLEN]; + uint8_t *mac = member->mac; + + inet_ntop(AF_INET, &member->addr.in, addr, INET_ADDRSTRLEN); + if (!is_zero_mac_addr(mac)) { + n = sprintf(buf, "%s,%02X:%02X:%02X:%02X:%02X:%02X ", addr, + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); + } else { + n = sprintf(buf, "%s", addr); + } + + n += dump_comment(buf + n, member->comment); + + return n; +} + +static int +port_dump_member(char *buf, struct ipset_member *member, int af) +{ + int n; + + n = sprintf(buf, "%s:%d ", proto_string(member->proto), member->port); + + n += dump_comment(buf + n, member->comment); + + return n; +} + +static int +ipport_dump_member(char *buf, struct ipset_member *member, int af) +{ + int n; + char addr[INET6_ADDRSTRLEN]; + + inet_ntop(af, &member->addr, addr, INET6_ADDRSTRLEN); + + n = sprintf(buf, "%s,%s:%d ", addr, proto_string(member->proto), member->port); + + n += dump_comment(buf + n, member->comment); + + return n; +} + +static int +netport_dump_member(char *buf, struct ipset_member *member, int af) +{ + int n; + char addr[INET6_ADDRSTRLEN]; + + inet_ntop(af, &member->addr, addr, INET6_ADDRSTRLEN); + + n = sprintf(buf, "%s/%d,%s:%d ", addr, member->cidr, + proto_string(member->proto), member->port); + + if (member->nomatch) + n += sprintf(buf + n, "nomatch "); + n += sprintf(buf + n , " "); + + n += dump_comment(buf + n, member->comment); + + return n; +} + +static int +netportiface_dump_member(char *buf, struct ipset_member *member, int af) +{ + int n; + char addr[INET6_ADDRSTRLEN]; + + inet_ntop(af, &member->addr, addr, INET6_ADDRSTRLEN); + + n = sprintf(buf, "%s/%d,%s:%d,%s ", addr, member->cidr, + proto_string(member->proto), + member->port, member->iface); + + if (member->nomatch) + n += sprintf(buf + n, "nomatch "); + n += sprintf(buf + n , " "); + + n += dump_comment(buf + n, member->comment); + + return n; +} + +static int +ipportip_dump_member(char *buf, struct ipset_member *member, int af) +{ + int n; + char addr[INET6_ADDRSTRLEN], addr2[INET6_ADDRSTRLEN]; + + inet_ntop(af, &member->addr, addr, INET6_ADDRSTRLEN); + inet_ntop(af, &member->addr2, addr2, INET6_ADDRSTRLEN); + + n = sprintf(buf, "%s,%s:%d,%s ", addr, + proto_string(member->proto), member->port, addr2); + + n += dump_comment(buf + n, member->comment); + + return n; +} + +static int +ipportnet_dump_member(char *buf, struct ipset_member *member, int af) +{ + int n; + char addr[INET6_ADDRSTRLEN], addr2[INET6_ADDRSTRLEN]; + + inet_ntop(af, &member->addr, addr, INET6_ADDRSTRLEN); + inet_ntop(af, &member->addr2, addr2, INET6_ADDRSTRLEN); + + n = sprintf(buf, "%s,%s:%d,%s/%d ", addr2, + proto_string(member->proto), member->port, addr, member->cidr); + + if (member->nomatch) + n += sprintf(buf + n, "nomatch "); + n += sprintf(buf + n , " "); + + n += dump_comment(buf + n, member->comment); + + return n; +} + +static int +netportnet_dump_member(char *buf, struct ipset_member *member, int af) +{ + int n; + char addr[INET6_ADDRSTRLEN], addr2[INET6_ADDRSTRLEN]; + + inet_ntop(af, &member->addr, addr, INET6_ADDRSTRLEN); + inet_ntop(af, &member->addr2, addr2, INET6_ADDRSTRLEN); + + n = sprintf(buf, "%s/%d,%s:%d,%s/%d ", addr, member->cidr, + proto_string(member->proto), member->port, addr2, member->cidr2); + + if (member->nomatch) + n += sprintf(buf + n, "nomatch "); + n += sprintf(buf + n , " "); + + n += dump_comment(buf + n, member->comment); + + return n; +} + +static int +netportnetport_dump_member(char *buf, struct ipset_member *member, int af) +{ + int n; + char addr[INET6_ADDRSTRLEN], addr2[INET6_ADDRSTRLEN]; + + inet_ntop(af, &member->addr, addr, INET6_ADDRSTRLEN); + inet_ntop(af, &member->addr2, addr2, INET6_ADDRSTRLEN); + + n = sprintf(buf, "%s/%d,%s:%d,%s/%d,%s:%d ", addr, member->cidr, + proto_string(member->proto), member->port, + addr2, member->cidr2, + proto_string(member->proto), member->port2); + + if (member->nomatch) + n += sprintf(buf + n, "nomatch "); + n += sprintf(buf + n , " "); + + n += dump_comment(buf + n, member->comment); + + return n; +} + +static void +ipset_info_dump(struct ipset_info *info, bool sort) +{ + int i, type, n = 0; + struct ipset_member *member; + char header[HEADER_LEN], *members; + + type = get_type_idx(); + + /* header */ + types[type].dump_header(header, info); + + /* members */ + if (info->entries) + members = malloc(info->entries * MEMBER_LEN); + else + members = ""; + + if (sort && types[type].sort_compare) { + // sort the ipset + int i, j, min; + struct ipset_member swap; + struct ipset_member *members = (struct ipset_member*)info->members; + sort_compare_func sort_compare = types[type].sort_compare; + + for (i = 0; i < info->entries - 1; i++) { + min = i; + for (j = i + 1; j < info->entries; j++) { + if (sort_compare(info->af, &members[min], &members[j]) > 0) + min = j; + } + if (min != i) { + memcpy(&swap, &members[min], sizeof(struct ipset_member)); + memcpy(&members[min], &members[i], sizeof(struct ipset_member)); + memcpy(&members[i], &swap, sizeof(struct ipset_member)); + } + } + } + + member = info->members; + for (i = 0; i < info->entries; i++) { + n += types[type].dump_member(members + n, member, info->af); + member++; + } + + fprintf(stdout, + "Name: %s\n" + "Type: %s\n" + "Header: %s\n" + "Size in memory: %d\n" + "References: %d\n" + "Number of entries: %d\n" + "Members:\n%s", + info->name, info->type, header, (int)info->size, + info->references, info->entries, members); + + if (info->entries) + free(members); + + return; +} + +static void +ipset_sockopt_msg_dump(struct ipset_info_array *array, bool sort) +{ + int i; + void *ptr; + struct ipset_info *info; + + ptr = (void *)array + sizeof(*array) + array->nipset * sizeof(*info); + for (i = 0; i < array->nipset; i++) { + info = &array->infos[i]; + info->members = ptr; + + ipset_info_dump(info, sort); + fprintf(stdout, "\n"); + + ptr += info->entries * sizeof(struct ipset_member); + } +} + +static int +ipset_do_cmd(struct dpip_obj *obj, dpip_cmd_t cmd, struct dpip_conf *conf) +{ + int err; + + switch (cmd) { + case DPIP_CMD_CREATE: + case DPIP_CMD_DESTROY: + case DPIP_CMD_ADD: + case DPIP_CMD_DEL: + case DPIP_CMD_FLUSH: + return dpvs_setsockopt(SOCKOPT_SET_IPSET, ¶m, sizeof(param)); + case DPIP_CMD_TEST: + { + int *result; + size_t len; + err = dpvs_getsockopt(SOCKOPT_GET_IPSET_TEST, ¶m, sizeof(param), (void **)&result, &len); + if (err != EDPVS_OK || len != sizeof(*result) || *result < 0) { + fprintf(stderr, "set test failed\n"); + return err ? err : EDPVS_INVAL; + } + if (conf->verbose) { + if (*result) + fprintf(stdout, "%s is in set %s\n", query_str, param.name); + else + fprintf(stdout, "%s is NOT in set %s\n", query_str, param.name); + free(query_str); + } else { + if (*result) + fprintf(stdout, "true\n"); + else + fprintf(stdout, "false\n"); + } + dpvs_sockopt_msg_free(result); + return EDPVS_OK; + } + case DPIP_CMD_SHOW: + { + struct ipset_info_array *array; + if (get_info_array(&array) < 0) + return EDPVS_INVAL; + ipset_sockopt_msg_dump(array, !!conf->verbose); + dpvs_sockopt_msg_free(array); + return EDPVS_OK; + } + default: + return EDPVS_NOTSUPP; + } +} + +/* =========================== sort ============================ */ + +static int +cidr_compare(const uint8_t cidr1, const uint8_t cidr2) +{ + if (cidr1 > cidr2) + return 1; + if (cidr1 < cidr2) + return -1; + return 0; +} + +static int +ip_addr_compare(int af, const union inet_addr *addr1, const union inet_addr *addr2) +{ + if (af == AF_INET) { + if (ntohl(addr1->in.s_addr) > ntohl(addr2->in.s_addr)) + return 1; + if (ntohl(addr1->in.s_addr) < ntohl(addr2->in.s_addr)) + return -1; + return 0; + } + if (af == AF_INET6) { + int i; + for (i = 0; i < 16; i++) { + if (addr1->in6.s6_addr[i] > addr2->in6.s6_addr[i]) + return 1; + if (addr1->in6.s6_addr[i] < addr2->in6.s6_addr[i]) + return -1; + } + return 0; + } + return 0; +} + +static int +port_compare(const __be16 port1, const __be16 port2) +{ + if (port1 == port2) + return 0; + if (port1 < port2) + return -1; + return 1; +} + +static int +ip_sort_compare(int af, const struct ipset_member *m1, const struct ipset_member *m2) +{ + return ip_addr_compare(af, &m1->addr, &m2->addr); } +static int +net_sort_compare(int af, const struct ipset_member *m1, const struct ipset_member *m2) +{ + int res; + + res = cidr_compare(m1->cidr, m2->cidr); + if (res) + return -1 * res; + + return ip_addr_compare(af, &m1->addr, &m2->addr); +} + +static int +ipport_sort_compare(int af, const struct ipset_member *m1, const struct ipset_member *m2) +{ + int res; + + res = ip_sort_compare(af, m1, m2); + if (res) + return res; + + return port_compare(m1->port, m2->port); +} + +static int +ipportip_sort_compare(int af, const struct ipset_member *m1, const struct ipset_member *m2) +{ + int res; + + res = ip_addr_compare(af, &m1->addr, &m2->addr); + if (res) + return res; + + res = port_compare(m1->port, m2->port); + if (res) + return res; + + return ip_addr_compare(af, &m1->addr2, &m2->addr2); +} + +static int +netport_sort_compare(int af, const struct ipset_member *m1, const struct ipset_member *m2) +{ + int res; + + res = cidr_compare(m1->cidr, m2->cidr); + if (res) + return -1 * res; + + res = ip_addr_compare(af, &m1->addr, &m2->addr); + if (res) + return res; + + return port_compare(m1->port, m2->port); +} + +static int +netportiface_sort_compare(int af, const struct ipset_member *m1, const struct ipset_member *m2) +{ + int res; + + res = cidr_compare(m1->cidr, m2->cidr); + if (res) + return -1 * res; + + res = ip_addr_compare(af, &m1->addr, &m2->addr); + if (res) + return res; + + res = port_compare(m1->port, m2->port); + if (res) + return res; + + return strncmp(m1->iface, m2->iface, IFNAMSIZ); +} + +static int +ipportnet_sort_compare(int af, const struct ipset_member *m1, const struct ipset_member *m2) +{ + int res; + + res = ip_addr_compare(af, &m1->addr2, &m2->addr2); + if (res) + return res; + + res = cidr_compare(m1->cidr, m2->cidr); + if (res) + return -1 * res; + + res = ip_addr_compare(af, &m1->addr, &m2->addr); + if (res) + return res; + + return port_compare(m1->port, m2->port); +} + +static int +netportnet_sort_compare(int af, const struct ipset_member *m1, const struct ipset_member *m2) +{ + int res; + + res = cidr_compare(m1->cidr, m2->cidr); + if (res) + return -1 * res; + + res = cidr_compare(m1->cidr2, m2->cidr2); + if (res) + return -1 * res; + + res = ip_addr_compare(af, &m1->addr, &m2->addr); + if (res) + return res; + + res = ip_addr_compare(af, &m1->addr2, &m2->addr2); + if (res) + return res; + + return port_compare(m1->port, m2->port); +} + +static int +netportnetport_sort_compare(int af, const struct ipset_member *m1, const struct ipset_member *m2) +{ + int res; + + res = cidr_compare(m1->cidr, m2->cidr); + if (res) + return -1 * res; + + res = cidr_compare(m1->cidr2, m2->cidr2); + if (res) + return -1 * res; + + res = ip_addr_compare(af, &m1->addr, &m2->addr); + if (res) + return res; + + res = ip_addr_compare(af, &m1->addr2, &m2->addr2); + if (res) + return res; + + res = port_compare(m1->port, m2->port); + if (res) + return res; + + return port_compare(m1->port2, m2->port2); +} + +struct ipset_type types[MAX_TYPE_NUM] = { + { + .name = "bitmap:ip", + .parse = net_parse, + .check = bitmap_check, + .dump_header = bitmap_dump_header, + .dump_member = net_dump_member + }, + { + .name = "bitmap:ip,mac", + .parse = ipmac_parse, + .check = bitmap_check, + .dump_header = bitmap_dump_header, + .dump_member = ipmac_dump_member + }, + { + .name = "bitmap:port", + .parse = port_parse, + .check = bitmap_check, + .dump_header = bitmap_dump_header, + .dump_member = port_dump_member + }, + { + .name = "hash:ip", + .parse = net_parse, + .check = hash_ip_check, + .dump_header = hash_dump_header, + .dump_member = net_dump_member, + .sort_compare = ip_sort_compare + }, + { + .name = "hash:net", + .parse = net_parse, + .check = hash_net_check, + .dump_header = hash_dump_header, + .dump_member = net_dump_member, + .sort_compare = net_sort_compare + }, + { + .name = "hash:ip,port", + .parse = ipport_parse, + .check = hash_ip_check, + .dump_header = hash_dump_header, + .dump_member = ipport_dump_member, + .sort_compare = ipport_sort_compare + }, + { + .name = "hash:net,port", + .parse = netport_parse, + .check = hash_net_check, + .dump_header = hash_dump_header, + .dump_member = netport_dump_member, + .sort_compare = netport_sort_compare + }, + { + .name = "hash:net,port,iface", + .parse = netportiface_parse, + .check = hash_net_check, + .dump_header = hash_dump_header, + .dump_member = netportiface_dump_member, + .sort_compare = netportiface_sort_compare + }, + { + .name = "hash:ip,port,ip", + .parse = ipportip_parse, + .check = hash_ip_check, + .dump_header = hash_dump_header, + .dump_member = ipportip_dump_member, + .sort_compare = ipportip_sort_compare + }, + { + .name = "hash:ip,port,net", + .parse = ipportnet_parse, + .check = hash_ipnet_check, + .dump_header = hash_dump_header, + .dump_member = ipportnet_dump_member, + .sort_compare = ipportnet_sort_compare + }, + { + .name = "hash:net,port,net", + .parse = ipportip_parse, + .check = hash_net_check, + .dump_header = hash_dump_header, + .dump_member = netportnet_dump_member, + .sort_compare = netportnet_sort_compare + }, + { + .name = "hash:net,port,net,port", + .parse = netportnetport_parse, + .check = hash_net_check, + .dump_header = hash_dump_header, + .dump_member = netportnetport_dump_member, + .sort_compare = netportnetport_sort_compare + } +}; + struct dpip_obj dpip_ipset = { - .name = "gfwip", + .name = "ipset", + .param = ¶m, .help = ipset_help, + .parse = ipset_parse, + .check = ipset_check, .do_cmd = ipset_do_cmd, }; -static void __init ipset_init(void) +static void __init dpip_ipset_init(void) { dpip_register_obj(&dpip_ipset); -} +} -static void __exit ipset_exit(void) +static void __exit dpip_ipset_exit(void) { dpip_unregister_obj(&dpip_ipset); } - diff --git a/tools/ipvsadm/ipvsadm.c b/tools/ipvsadm/ipvsadm.c index 6f74782ce..5e98c546b 100644 --- a/tools/ipvsadm/ipvsadm.c +++ b/tools/ipvsadm/ipvsadm.c @@ -21,7 +21,7 @@ * Wensong Zhang : added the long options * Wensong Zhang : added the hostname and portname input * Wensong Zhang : added the hostname and portname output - * Lars Marowsky-Brée : added persistence granularity support + * Lars Marowsky-Brée : added persistence granularity support * Julian Anastasov : fixed the (null) print for unknown services * Wensong Zhang : added the port_to_anyname function * Horms : added option to read commands from stdin @@ -1379,6 +1379,8 @@ static int parse_match_snat(const char *buf, ipvs_service_t *svc) int r; bool range = false; bool af = false; + struct inet_addr_range ip_range; + int ip_af = 0; snprintf(params, sizeof(params), "%s", buf); @@ -1414,9 +1416,17 @@ static int parse_match_snat(const char *buf, ipvs_service_t *svc) } else if (strcmp(key, "src-range") == 0) { range = true; snprintf(svc->user.srange, sizeof(svc->user.srange), "%s", val); + if (svc->af == 0) { + inet_addr_range_parse(svc->user.srange, &ip_range, &ip_af); + svc->af = ip_af; + } } else if (strcmp(key, "dst-range") == 0) { range = true; snprintf(svc->user.drange, sizeof(svc->user.drange), "%s", val); + if (svc->af == 0) { + inet_addr_range_parse(svc->user.drange, &ip_range, &ip_af); + svc->af = ip_af; + } } else if (strcmp(key, "iif") == 0) { snprintf(svc->user.iifname, sizeof(svc->user.iifname), "%s", val); } else if (strcmp(key, "oif") == 0) { @@ -1709,7 +1719,7 @@ static void list_conn(int is_template, unsigned int format) for (i = 0; i < conn_array->nconns; i++) print_conn_entry(&conn_array->array[i], format); req.whence = conn_array->curcid; - more = conn_array->resl & GET_IPVS_CONN_FLAG_MORE; + more = conn_array->resl & GET_IPVS_CONN_RESL_MORE; free(conn_array); if (!more) break; diff --git a/tools/keepalived/keepalived/check/check_udp.c b/tools/keepalived/keepalived/check/check_udp.c index 5f1f3c6f6..16a7ec336 100644 --- a/tools/keepalived/keepalived/check/check_udp.c +++ b/tools/keepalived/keepalived/check/check_udp.c @@ -312,7 +312,7 @@ udp_check_thread(thread_ref_t thread) if (recv_buf) FREE(recv_buf); - return; + return 0; } static int @@ -331,7 +331,7 @@ udp_connect_thread(thread_ref_t thread) if (!checker->enabled) { thread_add_timer(thread->master, udp_connect_thread, checker, checker->delay_loop); - return; + return 0; } if ((fd = socket(co->dst.ss_family, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_UDP)) == -1) { @@ -339,7 +339,7 @@ udp_connect_thread(thread_ref_t thread) thread_add_timer(thread->master, udp_connect_thread, checker, checker->delay_loop); - return; + return 0; } status = udp_bind_connect(fd, co, udp_check->payload, udp_check->payload_len); @@ -350,7 +350,7 @@ udp_connect_thread(thread_ref_t thread) udp_epilog(thread, false); } - return; + return 0; } #ifdef THREAD_DUMP diff --git a/tools/keepalived/keepalived/check/ipvswrapper.c b/tools/keepalived/keepalived/check/ipvswrapper.c index 0f9d41327..e88296e3c 100755 --- a/tools/keepalived/keepalived/check/ipvswrapper.c +++ b/tools/keepalived/keepalived/check/ipvswrapper.c @@ -67,6 +67,9 @@ ipvs_get_laddr_group_by_name(char *gname, list l) element e; local_addr_group *laddr_group; + if (!gname) + return NULL; + LIST_FOREACH(l, laddr_group, e) { if (!strcmp(laddr_group->gname, gname)) return laddr_group; diff --git a/tools/keepalived/keepalived/check/ipwrapper.c b/tools/keepalived/keepalived/check/ipwrapper.c index 9ac45a284..915fb940b 100755 --- a/tools/keepalived/keepalived/check/ipwrapper.c +++ b/tools/keepalived/keepalived/check/ipwrapper.c @@ -309,6 +309,9 @@ clear_laddr_group(local_addr_group *laddr_group, virtual_server_t *vs) element e; local_addr_entry *laddr_entry; + if (!laddr_group) + return; + LIST_FOREACH(laddr_group->addr_ip, laddr_entry, e) { if (!ipvs_laddr_remove_entry(vs, laddr_entry)) return; @@ -323,9 +326,6 @@ clear_laddr_group(local_addr_group *laddr_group, virtual_server_t *vs) void clear_services(void) { - if (!check_data) - return; - element e; virtual_server_t *vs; local_addr_group *laddr_group; diff --git a/tools/keepalived/keepalived/check/libipvs.c b/tools/keepalived/keepalived/check/libipvs.c index a5105d9ec..02ec05c12 100644 --- a/tools/keepalived/keepalived/check/libipvs.c +++ b/tools/keepalived/keepalived/check/libipvs.c @@ -132,7 +132,7 @@ static void ipvs_service_entry_2_user(const ipvs_service_entry_t *entry, ipvs_se strcpy(rule->user.srange, entry->user.srange); strcpy(rule->user.drange, entry->user.drange); strcpy(rule->user.iifname, entry->user.iifname); - strcpy(rule->user.iifname, entry->user.iifname); + strcpy(rule->user.oifname, entry->user.oifname); } struct ip_vs_getinfo g_ipvs_info; @@ -690,7 +690,7 @@ ipvs_get_service(ipvs_service_t *hint, lcoreid_t cid) ipvs_service_entry_t *svc; size_t len, len_rcv; dpvs_service_entry_t dpvs_svc, *dpvs_svc_rcv; - struct dp_vs_service_user dpvs_app, *dpvs_app_ptr; + struct dp_vs_service_entry *dpvs_svc_entry; ipvs_func = ipvs_get_service; @@ -704,10 +704,8 @@ ipvs_get_service(ipvs_service_t *hint, lcoreid_t cid) len_rcv = sizeof(*dpvs_svc_rcv); memset(&dpvs_svc, 0, len); - dpvs_app_ptr = &dpvs_app; - memset(dpvs_app_ptr, 0, sizeof(dpvs_app)); - IPVS_2_DPVS(dpvs_app_ptr, hint); - memcpy(&dpvs_svc, dpvs_app_ptr, sizeof(dpvs_app)); + dpvs_svc_entry = &dpvs_svc.user; + IPVS_2_DPVS(dpvs_svc_entry, hint); dpvs_svc.user.cid = cid; if (dpvs_getsockopt(cpu2opt_svc(cid, DPVS_SO_GET_SERVICE), @@ -964,7 +962,7 @@ struct ip_vs_get_laddrs *ipvs_get_laddrs(ipvs_service_entry_t *svc, lcoreid_t ci snprintf(conf.srange, sizeof(conf.srange), "%s", svc->user.srange); snprintf(conf.drange, sizeof(conf.drange), "%s", svc->user.drange); snprintf(conf.iifname, sizeof(conf.iifname), "%s", svc->user.iifname); - snprintf(conf.iifname, sizeof(conf.oifname), "%s", svc->user.oifname); + snprintf(conf.oifname, sizeof(conf.oifname), "%s", svc->user.oifname); if (dpvs_getsockopt(cpu2opt_laddr(cid, SOCKOPT_GET_LADDR_GETALL), &conf, sizeof(conf), (void **)&result, &res_size) != 0) diff --git a/tools/keepalived/keepalived/include/ip_vs.h b/tools/keepalived/keepalived/include/ip_vs.h index e8a7e0bc2..90d8a6672 100644 --- a/tools/keepalived/keepalived/include/ip_vs.h +++ b/tools/keepalived/keepalived/include/ip_vs.h @@ -22,7 +22,7 @@ // ///////////////////////////////////////////////////////////////////////////////////////// -#define IP_VS_VERSION_CODE 0x010808 /* DPVS v1.8.8 */ +#define IP_VS_VERSION_CODE 0x010902 /* DPVS v1.9.2 */ #define NVERSION(version) \ (version >> 16) & 0xFF, \ (version >> 8) & 0xFF, \ @@ -85,6 +85,19 @@ #define IP_VS_SO_GET_LADDRS (IP_VS_BASE_CTL+8) #define IP_VS_SO_GET_MAX IP_VS_SO_GET_LADDRS +/* Tunnel types */ +enum { + IP_VS_CONN_F_TUNNEL_TYPE_IPIP = 0, /* IPIP */ + IP_VS_CONN_F_TUNNEL_TYPE_GUE, /* GUE */ + IP_VS_CONN_F_TUNNEL_TYPE_GRE, /* GRE */ + IP_VS_CONN_F_TUNNEL_TYPE_MAX, +}; + +/* Tunnel encapsulation flags */ +#define IP_VS_TUNNEL_ENCAP_FLAG_NOCSUM (0) +#define IP_VS_TUNNEL_ENCAP_FLAG_CSUM (1 << 0) +#define IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM (1 << 1) + /* * The struct ip_vs_service_user and struct ip_vs_dest_user are * used to set IPVS rules through setsockopt. diff --git a/tools/keepalived/lib/notify.c b/tools/keepalived/lib/notify.c index 894a144ee..67f8cc3a6 100644 --- a/tools/keepalived/lib/notify.c +++ b/tools/keepalived/lib/notify.c @@ -462,6 +462,8 @@ script_killall(thread_master_t *m, int signo, bool requeue) rb_for_each_entry_cached(thread, &m->child, n) { c_pgid = getpgid(thread->u.c.pid); + if (c_pgid <= 0) + continue; if (c_pgid != p_pgid) kill(-c_pgid, signo); else {