From 8a8bf227a27178b1aa11739169abcf952d088ca6 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Thu, 20 May 2021 14:29:25 +0800 Subject: [PATCH 01/41] IPVS: fix ipvs rr/wrr/wlc problem of uneven load distribution across dests. Different workers should start schedule algorith from the dests that are evenly distributed across the whole dest list. It can avoid the clustering of connections across dests on the early phase after the service setup, especially for such scheduling methods as rr/wrr/wlc. Signed-off-by: ywc689 --- include/ipvs/sched.h | 3 ++- src/ipvs/ip_vs_rr.c | 7 +++---- src/ipvs/ip_vs_sched.c | 23 +++++++++++++++++++++++ src/ipvs/ip_vs_wlc.c | 22 +++++++++++++++++----- src/ipvs/ip_vs_wrr.c | 4 ++-- 5 files changed, 47 insertions(+), 12 deletions(-) diff --git a/include/ipvs/sched.h b/include/ipvs/sched.h index 72e691b62..e26e24c3f 100644 --- a/include/ipvs/sched.h +++ b/include/ipvs/sched.h @@ -27,7 +27,6 @@ struct dp_vs_iphdr; struct dp_vs_scheduler { struct list_head n_list; char *name; -// rte_atomic32_t refcnt; struct dp_vs_dest * (*schedule)(struct dp_vs_service *svc, @@ -52,6 +51,8 @@ int dp_vs_unbind_scheduler(struct dp_vs_service *svc); int dp_vs_gcd_weight(struct dp_vs_service *svc); +struct list_head * dp_vs_sched_first_dest(const struct dp_vs_service *svc); + void dp_vs_scheduler_put(struct dp_vs_scheduler *scheduler); int register_dp_vs_scheduler(struct dp_vs_scheduler *scheduler); diff --git a/src/ipvs/ip_vs_rr.c b/src/ipvs/ip_vs_rr.c index 469f841fd..0fa1b6558 100644 --- a/src/ipvs/ip_vs_rr.c +++ b/src/ipvs/ip_vs_rr.c @@ -20,15 +20,15 @@ static int dp_vs_rr_init_svc(struct dp_vs_service *svc) { - svc->sched_data = &svc->dests; + svc->sched_data = dp_vs_sched_first_dest(svc); + return EDPVS_OK; } static int dp_vs_rr_update_svc(struct dp_vs_service *svc, struct dp_vs_dest *dest __rte_unused, sockoptid_t opt __rte_unused) { - svc->sched_data = &svc->dests; - return EDPVS_OK; + return dp_vs_rr_init_svc(svc); } /* @@ -68,7 +68,6 @@ static struct dp_vs_dest *dp_vs_rr_schedule(struct dp_vs_service *svc, static struct dp_vs_scheduler dp_vs_rr_scheduler = { .name = "rr", /* name */ -// .refcnt = ATOMIC_INIT(0), .n_list = LIST_HEAD_INIT(dp_vs_rr_scheduler.n_list), .init_service = dp_vs_rr_init_svc, .update_service = dp_vs_rr_update_svc, diff --git a/src/ipvs/ip_vs_sched.c b/src/ipvs/ip_vs_sched.c index 0cd37d6cc..f84321e78 100644 --- a/src/ipvs/ip_vs_sched.c +++ b/src/ipvs/ip_vs_sched.c @@ -119,6 +119,29 @@ int dp_vs_gcd_weight(struct dp_vs_service *svc) return g ? g : 1; } +/* + * Different workers should start schedule algorith from the dests that are evenly distributed + * across the whole dest list. It can avoid the clustering of connections across dests on the + * early phase after the service setup, especially for such scheduling methods as rr/wrr/wlc. + */ +struct list_head * dp_vs_sched_first_dest(const struct dp_vs_service *svc) +{ + int i, cid, loc; + struct list_head *ini; + + cid = rte_lcore_id(); + ini = svc->dests.next; + loc = (svc->num_dests / g_slave_lcore_num ?: 1) * g_lcore_index[cid] % (svc->num_dests ?: 1); + + for (i = 0; i < loc; i++) { + ini = ini->next; + if (unlikely(ini == &svc->dests)) + ini = ini->next; + } + + return ini; +} + /* * Lookup scheduler and try to load it if it doesn't exist */ diff --git a/src/ipvs/ip_vs_wlc.c b/src/ipvs/ip_vs_wlc.c index 550853ada..1d695fbb0 100644 --- a/src/ipvs/ip_vs_wlc.c +++ b/src/ipvs/ip_vs_wlc.c @@ -26,9 +26,11 @@ static inline unsigned int dp_vs_wlc_dest_overhead(struct dp_vs_dest *dest) static struct dp_vs_dest *dp_vs_wlc_schedule(struct dp_vs_service *svc, const struct rte_mbuf *mbuf, const struct dp_vs_iphdr *iph __rte_unused) { + struct list_head *first, *cur; struct dp_vs_dest *dest, *least; unsigned int loh, doh; + first = dp_vs_sched_first_dest(svc); /* * We calculate the load of each dest server as follows: * (dest overhead) / dest->weight @@ -36,26 +38,36 @@ static struct dp_vs_dest *dp_vs_wlc_schedule(struct dp_vs_service *svc, * The server with weight=0 is quiesced and will not receive any * new connections. */ - - list_for_each_entry(dest, &svc->dests, n_list) { + cur = first; + do { + if (unlikely(cur == &svc->dests)) { + cur = cur->next; + continue; + } + dest = list_entry(cur, struct dp_vs_dest, n_list); if (dp_vs_dest_is_valid(dest)) { least = dest; loh = dp_vs_wlc_dest_overhead(least); goto nextstage; } - } + cur = cur->next; + } while (cur != first); + return NULL; /* * Find the destination with the least load. */ nextstage: - list_for_each_entry_continue(dest, &svc->dests, n_list) { + for (cur = cur->next; cur != first; cur = cur->next) { + if (unlikely(cur == &svc->dests)) + continue; + dest = list_entry(cur, struct dp_vs_dest, n_list); if (dest->flags & DPVS_DEST_F_OVERLOAD) continue; doh = dp_vs_wlc_dest_overhead(dest); if (loh * rte_atomic16_read(&dest->weight) > - doh * rte_atomic16_read(&least->weight)) { + doh * rte_atomic16_read(&least->weight)) { least = dest; loh = doh; } diff --git a/src/ipvs/ip_vs_wrr.c b/src/ipvs/ip_vs_wrr.c index a0a4551fb..cbe163e17 100644 --- a/src/ipvs/ip_vs_wrr.c +++ b/src/ipvs/ip_vs_wrr.c @@ -54,7 +54,7 @@ static int dp_vs_wrr_init_svc(struct dp_vs_service *svc) if (mark == NULL) { return EDPVS_NOMEM; } - mark->cl = &svc->dests; + mark->cl = dp_vs_sched_first_dest(svc); mark->cw = 0; mark->mw = dp_vs_wrr_max_weight(svc); mark->di = dp_vs_gcd_weight(svc); @@ -78,7 +78,7 @@ static int dp_vs_wrr_update_svc(struct dp_vs_service *svc, { struct dp_vs_wrr_mark *mark = svc->sched_data; - mark->cl = &svc->dests; + mark->cl = dp_vs_sched_first_dest(svc); mark->mw = dp_vs_wrr_max_weight(svc); mark->di = dp_vs_gcd_weight(svc); if (mark->cw > mark->mw) From f6414b0d55035bb4f2ff004f0f9c034b820ed46b Mon Sep 17 00:00:00 2001 From: weiyanhua Date: Thu, 27 May 2021 09:35:51 +0800 Subject: [PATCH 02/41] Add UDP_CHECK health checkers 1.The MISC_CHECK method consumes more CPU resources,When reaching hundreds of RSs, the CPU usage will be close to 100%. 2.The UDP_CHECK method has less CPU usage,The CPU usage is less than 100% at 10,000 RSs. use example: real_server 10.xxx.xxx.xxx 8000 { weight 1 inhibit_on_failure UDP_CHECK { retry 3 connect_timeout 5 connect_port 8000 payload hello world require_reply hello world min_reply_length 3 max_reply_length 15 } !UDP_CHECK } !real_server --- tools/keepalived/keepalived/check/Makefile.am | 2 +- tools/keepalived/keepalived/check/check_api.c | 2 + tools/keepalived/keepalived/check/check_udp.c | 363 ++++++++++++++++++ tools/keepalived/keepalived/core/layer4.c | 272 +++++++++++++ .../keepalived/keepalived/include/check_udp.h | 48 +++ tools/keepalived/keepalived/include/layer4.h | 6 + tools/keepalived/lib/align.h | 111 ++++++ tools/keepalived/lib/vector.c | 28 ++ tools/keepalived/lib/vector.h | 1 + 9 files changed, 832 insertions(+), 1 deletion(-) create mode 100644 tools/keepalived/keepalived/check/check_udp.c create mode 100644 tools/keepalived/keepalived/include/check_udp.h create mode 100644 tools/keepalived/lib/align.h diff --git a/tools/keepalived/keepalived/check/Makefile.am b/tools/keepalived/keepalived/check/Makefile.am index 27d1ed314..8b904dc1f 100644 --- a/tools/keepalived/keepalived/check/Makefile.am +++ b/tools/keepalived/keepalived/check/Makefile.am @@ -14,7 +14,7 @@ noinst_LIBRARIES = libcheck.a libcheck_a_SOURCES = \ check_daemon.c check_data.c check_parser.c \ - check_api.c check_tcp.c check_http.c check_ssl.c \ + check_api.c check_tcp.c check_udp.c check_http.c check_ssl.c \ check_smtp.c check_misc.c check_dns.c check_print.c \ ipwrapper.c ipvswrapper.c libipvs.c sockopt.c diff --git a/tools/keepalived/keepalived/check/check_api.c b/tools/keepalived/keepalived/check/check_api.c index 3eec3dcee..ad09fdd08 100644 --- a/tools/keepalived/keepalived/check/check_api.c +++ b/tools/keepalived/keepalived/check/check_api.c @@ -38,6 +38,7 @@ #include "check_misc.h" #include "check_smtp.h" #include "check_tcp.h" +#include "check_udp.h" #include "check_http.h" #include "check_ssl.h" #include "check_dns.h" @@ -683,6 +684,7 @@ install_checkers_keyword(void) install_misc_check_keyword(); install_smtp_check_keyword(); install_tcp_check_keyword(); + install_udp_check_keyword(); install_http_check_keyword(); install_ssl_check_keyword(); install_dns_check_keyword(); diff --git a/tools/keepalived/keepalived/check/check_udp.c b/tools/keepalived/keepalived/check/check_udp.c new file mode 100644 index 000000000..5f1f3c6f6 --- /dev/null +++ b/tools/keepalived/keepalived/check/check_udp.c @@ -0,0 +1,363 @@ +/* + * Soft: Keepalived is a failover program for the LVS project + * . It monitor & manipulate + * a loadbalanced server pool using multi-layer checks. + * + * Part: UDP checker. + * + * Author: Jie Liu, + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (C) 2019-2019 Alexandre Cassen, + */ + +#include "config.h" + +/* system includes */ +#include +#include + +/* local includes */ +#include "scheduler.h" +#include "check_udp.h" +#include "check_api.h" +#include "memory.h" +#include "ipwrapper.h" +#include "layer4.h" +#include "logger.h" +#include "global_data.h" +#include "smtp.h" +#include "utils.h" +#include "parser.h" + +static int udp_connect_thread(thread_ref_t); + +/* Configuration stream handling */ +static void +free_udp_check(checker_t *checker) +{ + udp_check_t *udp_check = CHECKER_ARG(checker); + + FREE_PTR(udp_check->payload); + FREE_PTR(udp_check->reply_data); + FREE_PTR(udp_check->reply_mask); + FREE(checker->co); + FREE(checker->data); + FREE(checker); +} + +static void +dump_udp_check(FILE *fp, const checker_t *checker) +{ + udp_check_t *udp_check = CHECKER_ARG(checker); + + conf_write(fp, " Keepalive method = UDP_CHECK"); + dump_checker_opts(fp, checker); + + if (udp_check->payload) + conf_write(fp, " Payload len = %u", udp_check->payload_len); + else + conf_write(fp, " Payload specified = no"); + + conf_write(fp, " Require reply = %s", udp_check->require_reply ? "yes" : "no"); + if (udp_check->require_reply) { + conf_write(fp, " Min reply length = %u", udp_check->min_reply_len); + conf_write(fp, " Max reply length = %u", udp_check->max_reply_len); + conf_write(fp, " Reply data len = %u", udp_check->reply_len); + if (udp_check->reply_data) + conf_write(fp, " Reply data mask = %s", udp_check->reply_mask ? "yes" : "no"); + } +} + +static bool +compare_udp_check(const checker_t *a, const checker_t *b) +{ + return compare_conn_opts(a->co, b->co); +} + +//static const checker_funcs_t udp_checker_funcs = { CHECKER_UDP, free_udp_check, dump_udp_check, compare_udp_check, NULL }; + +static void +udp_check_handler(__attribute__((unused)) const vector_t *strvec) +{ + udp_check_t *udp_check = MALLOC(sizeof (udp_check_t)); + + udp_check->min_reply_len = 0; + udp_check->max_reply_len = UINT8_MAX; + + /* queue new checker */ + queue_checker(free_udp_check, dump_udp_check, udp_connect_thread, + compare_udp_check, udp_check, CHECKER_NEW_CO(), true); +} + +static void +payload_handler(const vector_t *strvec) +{ + udp_check_t *udp_check = CHECKER_GET(); + char *hex_str; + + if (vector_size(strvec) == 1) { + report_config_error(CONFIG_GENERAL_ERROR, "UDP_CHECK payload requires a payload"); + return; + } + + hex_str = make_strvec_str(strvec, 1); + udp_check->payload = STRDUP((const char*)hex_str); + udp_check->payload_len = strlen(hex_str); //read_hex_str(hex_str, &udp_check->payload, NULL); + if (!udp_check->payload_len) + report_config_error(CONFIG_GENERAL_ERROR, "Invalid hex string for UDP_CHECK payload"); + + FREE_ONLY(hex_str); +} + +static void +require_reply_handler(const vector_t *strvec) +{ + udp_check_t *udp_check = CHECKER_GET(); + char *hex_str; + + udp_check->require_reply = true; + + if (vector_size(strvec) == 1) + return; + + hex_str = make_strvec_str(strvec, 1); + udp_check->reply_data = STRDUP((const char*)hex_str); + udp_check->reply_len = strlen(hex_str); //read_hex_str(hex_str, &udp_check->reply_data, &udp_check->reply_mask); + if (!udp_check->reply_len) + report_config_error(CONFIG_GENERAL_ERROR, "Invalid hex string for UDP_CHECK reply"); + + FREE_ONLY(hex_str); +} + +static void +min_length_handler(const vector_t *strvec) +{ + udp_check_t *udp_check = CHECKER_GET(); + unsigned len; + + if (!read_unsigned_strvec(strvec, 1, &len, 0, UINT16_MAX, false)) { + report_config_error(CONFIG_GENERAL_ERROR, "UDP_CHECK min length %s not valid - must be between 0 & %d", strvec_slot(strvec, 1), UINT16_MAX); + return; + } + + udp_check->min_reply_len = len; +} + +static void +max_length_handler(const vector_t *strvec) +{ + udp_check_t *udp_check = CHECKER_GET(); + unsigned len; + + if (!read_unsigned_strvec(strvec, 1, &len, 0, UINT16_MAX, false)) { + report_config_error(CONFIG_GENERAL_ERROR, "UDP_CHECK max length %s not valid - must be between 0 & %d", strvec_slot(strvec, 1), UINT16_MAX); + return; + } + + udp_check->max_reply_len = len; +} + +static void +udp_check_end_handler(void) +{ + udp_check_t *udp_check = CHECKER_GET(); + + if (!check_conn_opts(CHECKER_GET_CO())) { + dequeue_new_checker(); + return; + } + + if (udp_check->min_reply_len > udp_check->max_reply_len) + report_config_error(CONFIG_GENERAL_ERROR, "UDP_CHECK min_reply length %d > max_reply_length %d - will always fail", + udp_check->min_reply_len, udp_check->max_reply_len); +} + +void +install_udp_check_keyword(void) +{ + /* We don't want some common keywords */ + install_keyword("UDP_CHECK", &udp_check_handler); + install_sublevel(); + install_checker_common_keywords(true); + install_keyword("payload", &payload_handler); + install_keyword("require_reply", &require_reply_handler); + install_keyword("min_reply_length", &min_length_handler); + install_keyword("max_reply_length", &max_length_handler); + install_sublevel_end_handler(udp_check_end_handler); + install_sublevel_end(); +} + +static void +udp_epilog(thread_ref_t thread, bool is_success) +{ + checker_t *checker; + unsigned long delay; + bool checker_was_up; + bool rs_was_alive; + + checker = THREAD_ARG(thread); + + delay = checker->delay_loop; + if (is_success || ((checker->is_up || !checker->has_run) && checker->retry_it >= checker->retry)) { + checker->retry_it = 0; + + if (is_success && (!checker->is_up || !checker->has_run)) { + log_message(LOG_INFO, "UDP connection to %s success." + , FMT_CHK(checker)); + checker_was_up = checker->is_up; + rs_was_alive = checker->rs->alive; + update_svr_checker_state(UP, checker); + if (checker->rs->smtp_alert && !checker_was_up && + (rs_was_alive != checker->rs->alive || !global_data->no_checker_emails)) + smtp_alert(SMTP_MSG_RS, checker, NULL, + "=> UDP CHECK succeed on service <="); + } else if (!is_success && + (checker->is_up || !checker->has_run)) { + if (checker->retry && checker->has_run) + log_message(LOG_INFO + , "UDP_CHECK on service %s failed after %u retries." + , FMT_CHK(checker) + , checker->retry); + else + log_message(LOG_INFO + , "UDP_CHECK on service %s failed." + , FMT_CHK(checker)); + checker_was_up = checker->is_up; + rs_was_alive = checker->rs->alive; + update_svr_checker_state(DOWN, checker); + if (checker->rs->smtp_alert && checker_was_up && + (rs_was_alive != checker->rs->alive || !global_data->no_checker_emails)) + smtp_alert(SMTP_MSG_RS, checker, NULL, + "=> UDP CHECK failed on service <="); + } + } else if (checker->is_up) { + delay = checker->delay_before_retry; + ++checker->retry_it; + } + + checker->has_run = true; + + thread_add_timer(thread->master, udp_connect_thread, checker, delay); +} + +static bool +check_udp_reply(const uint8_t *recv_data, size_t len, const udp_check_t *udp_check) +{ + unsigned i; + unsigned check_len; + + if (len < udp_check->min_reply_len || + len > udp_check->max_reply_len) + return true; + + /* We only checker lesser of len and udp_check->reply_len octets */ + check_len = udp_check->reply_len; + if (len < check_len) + check_len = len; + + /* Check the received data matches */ + for (i = 0; i < check_len; i++) { + if ((recv_data[i] ^ udp_check->reply_data[i])) + return true; + } + + /* Success */ + return false; +} + +static int +udp_check_thread(thread_ref_t thread) +{ + checker_t *checker = THREAD_ARG(thread); + udp_check_t *udp_check = CHECKER_ARG(checker); + int status; + uint8_t *recv_buf = NULL; + size_t len = 0; + + len = udp_check->require_reply ? (udp_check->max_reply_len + 1) : 1; + recv_buf = MALLOC(len); + + status = udp_socket_state(thread->u.f.fd, thread, recv_buf, &len); + + thread_close_fd(thread); + + if (status == connect_success) { + /* coverity[var_deref_model] - udp_check->reply_data is only set if udp_check->require_reply is set */ + if (udp_check->reply_data && check_udp_reply(recv_buf, len, udp_check)) { + if (checker->is_up && + (global_data->checker_log_all_failures || checker->log_all_failures)) + log_message(LOG_INFO, "UDP check to %s reply data mismatch." + , FMT_CHK(checker)); + udp_epilog(thread, false); + } else + udp_epilog(thread, true); + } else { + if (checker->is_up && + (global_data->checker_log_all_failures || checker->log_all_failures)) + log_message(LOG_INFO, "UDP connection to %s failed." + , FMT_CHK(checker)); + udp_epilog(thread, false); + } + + if (recv_buf) + FREE(recv_buf); + + return; +} + +static int +udp_connect_thread(thread_ref_t thread) +{ + checker_t *checker = THREAD_ARG(thread); + udp_check_t *udp_check = CHECKER_ARG(checker); + conn_opts_t *co = checker->co; + int fd; + int status; + + /* + * Register a new checker thread & return + * if checker is disabled + */ + if (!checker->enabled) { + thread_add_timer(thread->master, udp_connect_thread, checker, + checker->delay_loop); + return; + } + + if ((fd = socket(co->dst.ss_family, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_UDP)) == -1) { + log_message(LOG_INFO, "UDP connect fail to create socket. Rescheduling."); + thread_add_timer(thread->master, udp_connect_thread, checker, + checker->delay_loop); + + return; + } + + status = udp_bind_connect(fd, co, udp_check->payload, udp_check->payload_len); + + /* handle udp connection status & register check worker thread */ + if (udp_check_state(fd, status, thread, udp_check_thread, co->connection_to)) { + close(fd); + udp_epilog(thread, false); + } + + return; +} + +#ifdef THREAD_DUMP +void +register_check_udp_addresses(void) +{ + register_thread_address("udp_check_thread", udp_check_thread); + register_thread_address("udp_connect_thread", udp_connect_thread); +} +#endif diff --git a/tools/keepalived/keepalived/core/layer4.c b/tools/keepalived/keepalived/core/layer4.c index a450fb106..94c475a0a 100644 --- a/tools/keepalived/keepalived/core/layer4.c +++ b/tools/keepalived/keepalived/core/layer4.c @@ -23,12 +23,53 @@ #include "config.h" +#include #include #include +#include +#include +#include +#ifdef ERRQUEUE_NEEDS_SYS_TIME +#include +#endif +#include +#include #include "layer4.h" #include "logger.h" #include "scheduler.h" +#ifdef _WITH_LVS_ +#include "check_api.h" +#endif +#include "bitops.h" +#include "utils.h" +#include "align.h" + +#ifdef _WITH_LVS_ +#define UDP_BUFSIZE 32 +#endif + +#ifdef _WITH_LVS_ +void +set_buf(char *buf, size_t buf_len) +{ + const char *str = "keepalived check - "; + size_t str_len = strlen(str); + char *p = buf; + + /* We need to overwrite the send buffer to avoid leaking + * stack content. */ + + while (buf_len >= str_len) { + memcpy(p, str, str_len); + p += str_len; + buf_len -= str_len; + } + + if (buf_len) + memcpy(p, str, buf_len); +} +#endif #ifndef _WITH_LVS_ static @@ -178,4 +219,235 @@ socket_connection_state(int fd, enum connect_result status, thread_ref_t thread, return true; } + +enum connect_result +udp_bind_connect(int fd, conn_opts_t *co, uint8_t *payload, uint16_t payload_len) +{ + socklen_t addrlen; + ssize_t ret; + const struct sockaddr_storage *addr = &co->dst; + const struct sockaddr_storage *bind_addr = &co->bindto; + char buf[UDP_BUFSIZE]; + int on = 1; + int err; + + /* Ensure we don't leak our stack */ + if (!payload) { + set_buf(buf, sizeof(buf)); + payload = PTR_CAST(uint8_t, buf); + payload_len = sizeof(buf); + } + + /* We want to be able to receive ICMP error responses */ + if (co->dst.ss_family == AF_INET) + err = setsockopt(fd, SOL_IP, IP_RECVERR, PTR_CAST(char, &on), sizeof(on)); + else + err = setsockopt(fd, SOL_IPV6, IPV6_RECVERR, PTR_CAST(char, &on), sizeof(on)); + if (err) + log_message(LOG_INFO, "Error %d setting IP%s_RECVERR for socket %d - %m", errno, co->dst.ss_family == AF_INET ? "" : "V6", fd); + +#ifdef _WITH_SO_MARK_ + if (co->fwmark) { + if (setsockopt (fd, SOL_SOCKET, SO_MARK, &co->fwmark, sizeof (co->fwmark)) < 0) { + log_message(LOG_ERR, "Error setting fwmark %u to socket: %s", co->fwmark, strerror(errno)); + return connect_error; + } + } +#endif + + /* Bind socket */ + if (PTR_CAST_CONST(struct sockaddr, bind_addr)->sa_family != AF_UNSPEC) { + addrlen = sizeof(*bind_addr); + if (bind(fd, PTR_CAST_CONST(struct sockaddr, bind_addr), addrlen) != 0) { + log_message(LOG_INFO, "bind failed. errno: %d, error: %s", errno, strerror(errno)); + return connect_error; + } + } + + /* Set remote IP and connect */ + addrlen = sizeof(*addr); + ret = connect(fd, PTR_CAST_CONST(struct sockaddr, addr), addrlen); + + if (ret < 0) { + /* We want to know about the error, but not repeatedly */ + if (errno != co->last_errno) { + co->last_errno = errno; + if (__test_bit(LOG_DETAIL_BIT, &debug)) + log_message(LOG_INFO, "UDP connect error %d - %m", errno); + } + + return connect_error; + } + + /* Send udp packet */ + ret = send(fd, payload, payload_len, 0); + + if (ret == payload_len) + return connect_success; + + if (ret == -1) { + /* We want to know about the error, but not repeatedly */ + if (errno != co->last_errno) { + co->last_errno = errno; + if (__test_bit(LOG_DETAIL_BIT, &debug)) + log_message(LOG_INFO, "UDP send error %d - %m", errno); + } + } + else if (__test_bit(LOG_DETAIL_BIT, &debug)) + log_message(LOG_INFO, "udp_bind_connect send - sent %zd bytes instead of %zu", ret, sizeof(buf)); + + return connect_error; +} + +static enum connect_result +udp_socket_error(int fd) +{ + struct msghdr msg; + char name_buf[128]; + struct iovec iov; + char control[2560] __attribute__((aligned(__alignof__(struct cmsghdr)))); + struct icmphdr icmph; + struct cmsghdr *cmsg; /* Control related data */ + struct sock_extended_err *sock_err; + ssize_t n; + + iov.iov_base = &icmph; + iov.iov_len = sizeof icmph; + msg.msg_name = name_buf; + msg.msg_namelen = sizeof(name_buf); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = control; + msg.msg_controllen = sizeof control; + msg.msg_flags = 0; + + n = recvmsg(fd, &msg, MSG_ERRQUEUE); + + if (n == -1) { + log_message(LOG_INFO, "udp_socket_error recvmsg failed - errno %d", errno); + return connect_success; + } + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + sock_err = PTR_CAST(struct sock_extended_err, CMSG_DATA(cmsg)); + if (cmsg->cmsg_level == SOL_IP && cmsg->cmsg_type == IP_RECVERR) { + if (sock_err) { + /* We are interested in ICMP errors */ + if (sock_err->ee_origin == SO_EE_ORIGIN_ICMP && sock_err->ee_type == ICMP_DEST_UNREACH) { +#ifdef ICMP_DEBUG + /* Handle ICMP errors types */ + switch (sock_err->ee_code) + { + case ICMP_NET_UNREACH: + /* Handle this error */ + log_message(LOG_INFO, "Network Unreachable Error"); + break; + case ICMP_HOST_UNREACH: + /* Handle this error */ + log_message(LOG_INFO, "Host Unreachable Error"); + break; + case ICMP_PORT_UNREACH: + /* Handle this error */ + log_message(LOG_INFO, "Port Unreachable Error"); + break; + default: + log_message(LOG_INFO, "Unreach code %d", sock_err->ee_code); + } +#endif + return connect_error; +#ifndef ICMP_DEBUG + } + } + } +#else + } else + log_message(LOG_INFO, "ee_origin %d, ee_type %d", sock_err->ee_origin, sock_err->ee_type); + } else + log_message(LOG_INFO, "No CMSG_DATA"); + } +#endif + else if (cmsg->cmsg_level == SOL_IPV6 && cmsg->cmsg_type == IPV6_RECVERR) { + if (sock_err) { + /* We are interested in ICMP errors */ + if (sock_err->ee_origin == SO_EE_ORIGIN_ICMP6 && sock_err->ee_type == ICMPV6_DEST_UNREACH) { +#ifdef ICMP_DEBUG + /* Handle ICMP errors types */ + switch (sock_err->ee_code) + { + case ICMPV6_NOROUTE: + /* Handle this error */ + log_message(LOG_INFO, "No Route Error"); + break; + case ICMPV6_ADDR_UNREACH: + /* Handle this error */ + log_message(LOG_INFO, "Address Unreachable Error"); + break; + case ICMPV6_PORT_UNREACH: + /* Handle this error */ + log_message(LOG_INFO, "Port Unreachable Error"); + break; + default: + log_message(LOG_INFO, "Unreach code %d", sock_err->ee_code); + } +#endif + return connect_error; +#ifndef ICMP_DEBUG + } + } + } +#else + } else + log_message(LOG_INFO, "ee_origin %d, ee_type %d", sock_err->ee_origin, sock_err->ee_type); + } else + log_message(LOG_INFO, "No CMSG_DATA"); + } + else + log_message(LOG_INFO, "cmsg_level %d, cmsg->type %d", cmsg->cmsg_level, cmsg->cmsg_type); +#endif + } + + return connect_success; +} + +enum connect_result +udp_socket_state(int fd, thread_ref_t thread, uint8_t *recv_buf, size_t *len) +{ + int ret; + + /* Handle Read timeout, we consider it success unless require_reply is set */ + if (thread->type == THREAD_READ_TIMEOUT) + return recv_buf ? connect_error : connect_success; + + if (thread->type == THREAD_READ_ERROR) + return udp_socket_error(fd); + + ret = recv(fd, recv_buf, *len, 0); + + /* Ret less than 0 means the port is unreachable. + * Otherwise, we consider it success. + */ + + if (ret < 0) + return connect_error; + + *len = ret; + return connect_success; +} + +bool +udp_check_state(int fd, enum connect_result status, thread_ref_t thread, + thread_func_t func, unsigned long timeout) +{ + checker_t *checker; + + checker = THREAD_ARG(thread); + + if (status == connect_success) { + thread_add_read(thread->master, func, checker, fd, timeout, true); + return false; + } + + return true; +} + #endif diff --git a/tools/keepalived/keepalived/include/check_udp.h b/tools/keepalived/keepalived/include/check_udp.h new file mode 100644 index 000000000..420a3672e --- /dev/null +++ b/tools/keepalived/keepalived/include/check_udp.h @@ -0,0 +1,48 @@ +/* + * Soft: Keepalived is a failover program for the LVS project + * . It monitor & manipulate + * a loadbalanced server pool using multi-layer checks. + * + * Part: check_udp.c include file. + * + * Author: Jie Liu, + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (C) 2019-2019 Alexandre Cassen, + */ + +#ifndef _CHECK_UDP_H +#define _CHECK_UDP_H + +#include "config.h" + +#include + + +typedef struct _udp_check { + uint16_t payload_len; + uint8_t *payload; + bool require_reply; + uint16_t reply_len; + uint8_t *reply_data; + uint8_t *reply_mask; + uint16_t min_reply_len; + uint16_t max_reply_len; +} udp_check_t; + +/* Prototypes defs */ +extern void install_udp_check_keyword(void); +#ifdef THREAD_DUMP +extern void register_check_udp_addresses(void); +#endif + +#endif diff --git a/tools/keepalived/keepalived/include/layer4.h b/tools/keepalived/keepalived/include/layer4.h index 3e66f1d3a..6f47bb8c2 100644 --- a/tools/keepalived/keepalived/include/layer4.h +++ b/tools/keepalived/keepalived/include/layer4.h @@ -27,6 +27,7 @@ #include #include #include +#include /* local includes */ #include "scheduler.h" @@ -49,6 +50,7 @@ typedef struct _conn_opts { #ifdef _WITH_SO_MARK_ unsigned int fwmark; /* to mark packets going out of the socket using SO_MARK */ #endif + int last_errno; /* Errno from last call to connect */ } conn_opts_t; /* Prototypes defs */ @@ -98,6 +100,10 @@ tcp_connection_state(int fd, enum connect_result status, thread_ref_t thread, { return socket_connection_state(fd, status, thread, func, timeout); } + +extern enum connect_result udp_bind_connect(int, conn_opts_t *, uint8_t *, uint16_t); +extern enum connect_result udp_socket_state(int, thread_ref_t, uint8_t *, size_t *); +extern bool udp_check_state(int, enum connect_result, thread_ref_t, thread_func_t, unsigned long); #endif #endif diff --git a/tools/keepalived/lib/align.h b/tools/keepalived/lib/align.h new file mode 100644 index 000000000..aba548b5d --- /dev/null +++ b/tools/keepalived/lib/align.h @@ -0,0 +1,111 @@ +/* + * Soft: Keepalived is a failover program for the LVS project + * . It monitor & manipulate + * a loadbalanced server pool using multi-layer checks. + * + * Part: align.h include file. + * + * Author: Quentin Armitage + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (C) 2020-2020 Alexandre Cassen, + */ + +#ifndef _ALIGN_H +#define _ALIGN_H + +#include "config.h" + +#ifdef CHECK_CAST_ALIGN +#include "logger.h" +#endif + +/* PTR_CAST and PTR_CAST_CONST should be used for all casts of pointers. + * + * PTR_CAST and PTR_CAST_CONST serve several purposes. + * + * 1) On 32 bit ARM systems which don't support unaligned memory access, configure + * will have defined CAST_VIA_VOID to avoid the compiler spewing out 1000s of + * "cast increases required alignment of target type" warnings which are caused + * due to the char * used in the cast possibly not being aligned for the pointer + * being cast to. CAST_VIA_VOID merely means that the char * is first cast to a + * void * which is then cast to the pointer of the type required. Casting via a + * void * should not alter the code produced by the compiler, since the initial + * pointer (char *) only has 1 byte alignment. + * + * This still leaves the problem that, if the keepalived code is not correct, there + * may be an unaligned pointer being used. See 2) below for how this is dealt with. + * + * On systems which do allow unaligned memory access, the warnings generated by + * not using a void * can be generated by using configure options: + * --enable-strict-cast-align --disable-cast-via-void + * + * 2) As identified in 1) above, there is a need to be able to ensure that there + * are no unaligned casts, both for performance reasons on sytems which do allow + * unaligned casts, and to ensure that there are not alignment traps, or worse + * still incorrect values returned (which happens with ARMv5) from unaligned reads. + * + * For this reason there is a configure option --enable-cast-align-checks which + * defines CHECK_CAST_ALIGN. This causes PTR_CAST and PTR_CAST_CONST to generate + * run-time code to check that casts made via PTR_CAST and PRT_CAST_CONST are + * properly aligned, and logs a message if they are not. The checks work on any + * architecture, whether unaligned memory access works or not, and so can be + * performed on Intel x86_64, aarch64 etc. + * + * Developers should periodically build with this option enabled and then run + * keepalived to check that there are no unaligned casts. 22 such instances of + * unaligned char arrays being cast to structure pointers with greater alignment + * were found when this check was first added. + * + * 3) Other cast checks can be added later by simply adding further definitions for + * PTR_CAST and PTR_CAST_CONST, probably just by adding a further definition of + * PTR_CAST_ALL. + */ + +#ifdef CAST_VIA_VOID +#define __CAST_PTR(__const) (__const void *) +#define PTR_CAST_ASSIGN (void *) +#define PTR_CAST_ASSIGN_CONST (const void *) +#else +#define __CAST_PTR(__const) +#define PTR_CAST_ASSIGN +#define PTR_CAST_ASSIGN_CONST +#endif + +#ifdef CHECK_CAST_ALIGN +#define PTR_CAST_ALL(__type, __ptr, __const) ({ \ + __const void *sav_ptr = __ptr; \ + if ((long)sav_ptr % __alignof__(__type)) \ + log_message(LOG_INFO, "Alignment error - (" #__type " *)(" #__ptr ") - alignment %zu, address %p", __alignof__(__type), sav_ptr); \ + (__const __type *) __CAST_PTR(__const) (sav_ptr); \ + }) + +#define PTR_CAST2_ALL(__type, __type1, __ptr, __field, __const) ({ \ + __const void *sav_ptr1 = __ptr; \ + if ((long)sav_ptr1 % __alignof__(__type1)) \ + printf("Alignment error - (" #__type1 " *)(" #__ptr ") - alignment %zu, address %p", __alignof__(__type1), sav_ptr1); \ + PTR_CAST_ALL(__type, &(((__const __type1 *) __CAST_PTR(__const) (sav_ptr1))->__field), __const);\ + }) +#else +#define PTR_CAST_ALL(__type, __ptr, __const) \ + ({ (__const __type *) __CAST_PTR(__const) (__ptr); }) +#define PTR_CAST2_ALL(__type, __type1, __ptr, __field, __const) \ + ({ (__const __type *) __CAST_PTR(__const) &((__const __type1 *) __CAST_PTR(__const) (__ptr))->__field; }) +#endif + +#define PTR_CAST(__type, __ptr) PTR_CAST_ALL(__type, __ptr,) +#define PTR_CAST_CONST(__type, __ptr) PTR_CAST_ALL(__type, __ptr, const) + +#define PTR_CAST2(__type, __type1, __ptr, __field) PTR_CAST2_ALL(__type, __type1, __ptr, __field,) +#define PTR_CAST2_CONST(__type, __type1, __ptr, __field) PTR_CAST2_ALL(__type, __type1, __ptr, __field, const) + +#endif diff --git a/tools/keepalived/lib/vector.c b/tools/keepalived/lib/vector.c index d5d4a3586..a5e245066 100644 --- a/tools/keepalived/lib/vector.c +++ b/tools/keepalived/lib/vector.c @@ -22,6 +22,8 @@ #include "config.h" +#include + #include "vector.h" #include "memory.h" @@ -309,6 +311,32 @@ vector_dump(FILE *fp, const vector_t *v) #endif /* String vector related */ +char * +make_strvec_str(const vector_t *v, unsigned start) +{ + size_t len; + char *str; + unsigned i; + + for (i = start, len = 0; i < v->allocated; i++) { + if (v->slot[i]) + len += strlen(v->slot[i]) + 1; + } + + str = MALLOC(len); + + for (i = start, len = 0; i < v->allocated; i++) { + if (v->slot[i]) { + if (i > start) + str[len++] = ' '; + strcpy(str + len, v->slot[i]); + len += strlen(v->slot[i]); + } + } + + return str; +} + void free_strvec(const vector_t *strvec) { diff --git a/tools/keepalived/lib/vector.h b/tools/keepalived/lib/vector.h index 42e294d5f..87b0767e0 100644 --- a/tools/keepalived/lib/vector.h +++ b/tools/keepalived/lib/vector.h @@ -73,6 +73,7 @@ extern void vector_free_r(const vector_t *); #ifdef _INCLUDE_UNUSED_CODE_ extern void vector_dump(FILE *fp, const vector_t *); #endif +extern char *make_strvec_str(const vector_t *v, unsigned start); extern void free_strvec(const vector_t *); #endif From 1b2af29e30c9e08a8af59765749c36269376f0cf Mon Sep 17 00:00:00 2001 From: ywc689 Date: Thu, 10 Jun 2021 20:24:30 +0800 Subject: [PATCH 03/41] doc: update tutorial doc of section 'Full-NAT with Keepalived (one-arm)' Signed-off-by: ywc689 --- doc/tutorial.md | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/doc/tutorial.md b/doc/tutorial.md index 73ec78b68..5c39850ce 100644 --- a/doc/tutorial.md +++ b/doc/tutorial.md @@ -431,9 +431,20 @@ virtual_server group 192.168.100.254-80 { } ``` -The keepalived config for backup is the same with Master, except the `state` should be 'BACKUP', and `priority` should be lower. +The keepalived config for backup is the same with Master, except + +* local address is not the same with MASTER, +* vrrp_instance `state` should be 'BACKUP', +* vrrp_instance `priority` should be lower. ``` +local_address_group laddr_g1 { + 192.168.100.202 dpdk0 # use DPDK interface + 192.168.100.203 dpdk0 # use DPDK interface +} + +... ... + vrrp_instance VI_1 { state BACKUP priority 80 @@ -447,12 +458,19 @@ Start `keepalived` on both Master and Backup. ./keepalived -f /etc/keepalived/keepalived.conf ``` -For **test only**, add `VIP` and *routes* to DPDK interface manually on Master. Do not set VIP on both master and backup, in practice they should be added to keepalived configure file. +Then, add *routes* to DPDK interface manually on both MASTER and BACKUP. ```bash -./dpip addr add 192.168.100.254/32 dev dpdk0 ./dpip route add 192.168.100.0/24 dev dpdk0 ``` +Lastly, configure dpdk0.kni to make keepalived's vrrp and health-check work properly. + +```bash +ip link set dpdk0.kni up +ip addr add 192.168.100.28/24 dev dpdk0.kni # assign an IP to dpdk0.kni +dpip route add 192.168.100.28/32 scope kni_host dev dpdk0 # route packets target at 192.168.100.28 to dpdk0.kni +``` +Note the dpdk0.kni's IP addresses should be different for MASTER and BACKUP. Check if parameters just set are correct: @@ -465,7 +483,7 @@ TCP 192.168.100.254:80 rr -> 192.168.100.2:80 FullNat 100 0 0 -> 192.168.100.3:80 FullNat 100 0 0 -$ ./dpip addr show +$ ./dpip addr show -s inet 192.168.100.254/32 scope global dpdk0 valid_lft forever preferred_lft forever inet 192.168.100.201/32 scope global dpdk0 @@ -474,8 +492,10 @@ inet 192.168.100.200/32 scope global dpdk0 valid_lft forever preferred_lft forever sa_used 0 sa_free 1032176 sa_miss 0 $ ./dpip route show +inet 192.168.100.28/32 via 0.0.0.0 src 0.0.0.0 dev dpdk0 mtu 1500 tos 0 scope kni_host metric 0 proto auto inet 192.168.100.200/32 via 0.0.0.0 src 0.0.0.0 dev dpdk0 mtu 1500 tos 0 scope host metric 0 proto auto inet 192.168.100.201/32 via 0.0.0.0 src 0.0.0.0 dev dpdk0 mtu 1500 tos 0 scope host metric 0 proto auto +inet 192.168.100.254/32 via 0.0.0.0 src 0.0.0.0 dev dpdk0 mtu 1500 tos 0 scope host metric 0 proto auto inet 192.168.100.0/24 via 0.0.0.0 src 0.0.0.0 dev dpdk0 mtu 1500 tos 0 scope link metric 0 proto auto $ ./ipvsadm -G @@ -492,7 +512,20 @@ client$ curl 192.168.100.254 Your ip:port : 192.168.100.146:42394 ``` -> We just explain how DPVS works with keepalived, and not verify if the master/backup feature provided by keepalived works. Please refer LVS docs if needed. +> Note: +> 1. We just explain how DPVS works with keepalived, and not verify if the master/backup feature provided by keepalived works. Please refer LVS docs if needed. +> 2. Keepalived master/backup failover may fail if switch enabled the ARP broadcast suppression (unfortunately often is the case). If you don't want to change configurations of your switch, decrease the number of gratuitous ARP packets sent by keepalived (dpvs) on failover may help. + +``` +global_defs { + ... ... + vrrp_garp_master_repeat 1 # repeat counts for master state gratuitous arp + vrrp_garp_master_delay 1 # time to relaunch gratuitous arp after failover for master, in second + vrrp_garp_master_refresh 600 # time interval to refresh gratuitous arp periodically(0 = none), in second + vrrp_garp_master_refresh_repeat 1 # repeat counts to refresh gratuitous arp periodically + ... ... +} +``` From 5579aff4443c43e520581386df285ccdf15d48cf Mon Sep 17 00:00:00 2001 From: ywc689 Date: Tue, 8 Jun 2021 13:05:14 +0800 Subject: [PATCH 04/41] Fix bonding mode 4 problem caused by LACP failure. The problem is disscussed in Issue #725 in detail. Signed-off-by: ywc689 --- ...link-event-for-multicast-driver-part.patch | 4 +- ...iable-IP-header-len-for-checksum-API.patch | 4 +- ...e-flow_item-type-comparsion-in-flow_.patch | 4 +- ...imental-attribute-of-rte_memseg_walk.patch | 12 +- ...-and-change-dpdk-pdump-tool-for-dpvs.patch | 338 +++++++++--------- .../0006-enable-dpdk-eal-memory-debug.patch | 6 +- ...ode-4-problem-caused-by-LACP-failure.patch | 62 ++++ 7 files changed, 254 insertions(+), 176 deletions(-) create mode 100644 patch/dpdk-stable-18.11.2/0007-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch diff --git a/patch/dpdk-stable-18.11.2/0001-kni-use-netlink-event-for-multicast-driver-part.patch b/patch/dpdk-stable-18.11.2/0001-kni-use-netlink-event-for-multicast-driver-part.patch index 17aea758a..0bc548181 100644 --- a/patch/dpdk-stable-18.11.2/0001-kni-use-netlink-event-for-multicast-driver-part.patch +++ b/patch/dpdk-stable-18.11.2/0001-kni-use-netlink-event-for-multicast-driver-part.patch @@ -1,7 +1,7 @@ -From 659c6e84e3ae0c5e5b93894aa15dd4983b3ac6c3 Mon Sep 17 00:00:00 2001 +From 76eb58e26ec25be8e0d281085a3c0274d9abfede Mon Sep 17 00:00:00 2001 From: ywc689 Date: Fri, 28 Jun 2019 16:52:24 +0800 -Subject: [PATCH 1/3] kni: use netlink event for multicast (driver part) +Subject: [PATCH 1/7] kni: use netlink event for multicast (driver part) kni driver send netlink event every time hw-multicast list updated by kernel, the user kni app should capture the event and update multicast diff --git a/patch/dpdk-stable-18.11.2/0002-net-support-variable-IP-header-len-for-checksum-API.patch b/patch/dpdk-stable-18.11.2/0002-net-support-variable-IP-header-len-for-checksum-API.patch index 2356e2154..70a2de217 100644 --- a/patch/dpdk-stable-18.11.2/0002-net-support-variable-IP-header-len-for-checksum-API.patch +++ b/patch/dpdk-stable-18.11.2/0002-net-support-variable-IP-header-len-for-checksum-API.patch @@ -1,7 +1,7 @@ -From 86d8695113517403c59497dc2f43a333fa44316b Mon Sep 17 00:00:00 2001 +From 30a0939aa7c1f2de926b7af1d881144fe8a315bb Mon Sep 17 00:00:00 2001 From: ywc689 Date: Fri, 28 Jun 2019 16:27:08 +0800 -Subject: [PATCH 2/3] net: support variable IP header len for checksum API. +Subject: [PATCH 2/7] net: support variable IP header len for checksum API. IPv4 checksum APIs use fixe IP header length, it will failed if there is any IP option. Now calculating header length by "ihl" field, so that we diff --git a/patch/dpdk-stable-18.11.2/0003-driver-kni-enable-flow_item-type-comparsion-in-flow_.patch b/patch/dpdk-stable-18.11.2/0003-driver-kni-enable-flow_item-type-comparsion-in-flow_.patch index d5634fb53..cfaf95059 100644 --- a/patch/dpdk-stable-18.11.2/0003-driver-kni-enable-flow_item-type-comparsion-in-flow_.patch +++ b/patch/dpdk-stable-18.11.2/0003-driver-kni-enable-flow_item-type-comparsion-in-flow_.patch @@ -1,7 +1,7 @@ -From 2e26428dc4d450e974ceb9bc737f691057cd80b6 Mon Sep 17 00:00:00 2001 +From e3d1e01d57ebca7feb9a602a2949661e4351c5ad Mon Sep 17 00:00:00 2001 From: ywc689 Date: Fri, 28 Jun 2019 17:02:40 +0800 -Subject: [PATCH 3/3] driver:kni: enable flow_item type comparsion in +Subject: [PATCH 3/7] driver:kni: enable flow_item type comparsion in flow_fdir_cmp the existence is checked before adding/deleting a fdir flow, but diff --git a/patch/dpdk-stable-18.11.2/0004-rm-rte_experimental-attribute-of-rte_memseg_walk.patch b/patch/dpdk-stable-18.11.2/0004-rm-rte_experimental-attribute-of-rte_memseg_walk.patch index 65b69b387..b44843c45 100644 --- a/patch/dpdk-stable-18.11.2/0004-rm-rte_experimental-attribute-of-rte_memseg_walk.patch +++ b/patch/dpdk-stable-18.11.2/0004-rm-rte_experimental-attribute-of-rte_memseg_walk.patch @@ -1,7 +1,7 @@ -From f90a349979926bb547583dc6f1b6b1f1ab3b3189 Mon Sep 17 00:00:00 2001 +From 5230e76681ade3da2f25a896887ce31fac5ff397 Mon Sep 17 00:00:00 2001 From: liuchuanqi Date: Fri, 7 Aug 2020 19:20:57 +0800 -Subject: [PATCH] rm rte_experimental attribute of rte_memseg_walk +Subject: [PATCH 4/7] rm rte_experimental attribute of rte_memseg_walk there is no __rte_experimental attribute in function rte_mempool_walk and rte_memzone_walk of dpdk 18.11, and there is no __rte_experimental attribute in function rte_memseg_walk of the higher version's dpdk(eg: dpdk 20.05). @@ -12,10 +12,10 @@ so remove it to prevent compilation error when dpdk application calls the functi 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c -index d47ea4938..3d8ce67f0 100644 +index e3ef371..ed442e7 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c -@@ -601,7 +601,7 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg) +@@ -606,7 +606,7 @@ unsigned rte_memory_get_nrank(void) return ret; } @@ -25,7 +25,7 @@ index d47ea4938..3d8ce67f0 100644 { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h -index d970825df..71bee8b6b 100644 +index d970825..71bee8b 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -227,7 +227,7 @@ typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl, @@ -38,5 +38,5 @@ index d970825df..71bee8b6b 100644 /** -- -2.21.1 (Apple Git-122.3) +1.8.3.1 diff --git a/patch/dpdk-stable-18.11.2/0005-enable-pdump-and-change-dpdk-pdump-tool-for-dpvs.patch b/patch/dpdk-stable-18.11.2/0005-enable-pdump-and-change-dpdk-pdump-tool-for-dpvs.patch index d2a68efbc..53c027ed4 100644 --- a/patch/dpdk-stable-18.11.2/0005-enable-pdump-and-change-dpdk-pdump-tool-for-dpvs.patch +++ b/patch/dpdk-stable-18.11.2/0005-enable-pdump-and-change-dpdk-pdump-tool-for-dpvs.patch @@ -1,5 +1,18 @@ +From 769e98ca325dee673ff297d5906e5ef3c9632862 Mon Sep 17 00:00:00 2001 +From: yuwenchao +Date: Tue, 8 Jun 2021 09:52:12 +0800 +Subject: [PATCH 5/7] enable pdump and change dpdk pdump tool for dpvs + +Signed-off-by: yuwenchao +--- + app/pdump/main.c | 170 +++++++++++++++++++++++++++++++++++++++++-- + config/common_base | 2 +- + lib/librte_pdump/rte_pdump.c | 146 ++++++++++++++++++++++++++++++++++++- + lib/librte_pdump/rte_pdump.h | 27 +++++++ + 4 files changed, 332 insertions(+), 13 deletions(-) + diff --git a/app/pdump/main.c b/app/pdump/main.c -index ccf2a1d..7e72c16 100644 +index ccf2a1d..3913515 100644 --- a/app/pdump/main.c +++ b/app/pdump/main.c @@ -26,6 +26,8 @@ @@ -66,59 +79,59 @@ index ccf2a1d..7e72c16 100644 static int +parse_host(const char *key __rte_unused, const char *value, void *extra_args) +{ -+ struct pdump_tuples *pt = extra_args; -+ struct in_addr inaddr; -+ struct in6_addr inaddr6; -+ union addr addr; -+ int af = 0; -+ -+ if (inet_pton(AF_INET6, value, &inaddr6) > 0) { -+ af = AF_INET6; -+ addr.in6 = inaddr6; -+ } else if (inet_pton(AF_INET, value, &inaddr) > 0){ -+ af = AF_INET; -+ addr.in = inaddr; -+ } else { -+ printf("IP address invaled\n"); -+ return -EINVAL; -+ } -+ -+ if (pt->filter && pt->filter->af != 0 && af != pt->filter->af) { -+ printf("IPv4 and IPv6 conflict\n"); -+ return -EINVAL; -+ } else { -+ pt->filter->af = af; -+ } -+ -+ if (!strcmp(key, PDUMP_HOST_ARG)) { -+ rte_memcpy(&pt->filter->host_addr, &addr, sizeof(addr)); -+ } else if (!strcmp(key, PDUMP_SRC_ARG)) { -+ rte_memcpy(&pt->filter->s_addr, &addr, sizeof(addr)); -+ } else if (!strcmp(key, PDUMP_DST_ARG)) { -+ rte_memcpy(&pt->filter->d_addr, &addr, sizeof(addr)); -+ } -+ -+ return 0; ++ struct pdump_tuples *pt = extra_args; ++ struct in_addr inaddr; ++ struct in6_addr inaddr6; ++ union addr addr; ++ int af = 0; ++ ++ if (inet_pton(AF_INET6, value, &inaddr6) > 0) { ++ af = AF_INET6; ++ addr.in6 = inaddr6; ++ } else if (inet_pton(AF_INET, value, &inaddr) > 0){ ++ af = AF_INET; ++ addr.in = inaddr; ++ } else { ++ printf("IP address invaled\n"); ++ return -EINVAL; ++ } ++ ++ if (pt->filter && pt->filter->af != 0 && af != pt->filter->af) { ++ printf("IPv4 and IPv6 conflict\n"); ++ return -EINVAL; ++ } else { ++ pt->filter->af = af; ++ } ++ ++ if (!strcmp(key, PDUMP_HOST_ARG)) { ++ rte_memcpy(&pt->filter->host_addr, &addr, sizeof(addr)); ++ } else if (!strcmp(key, PDUMP_SRC_ARG)) { ++ rte_memcpy(&pt->filter->s_addr, &addr, sizeof(addr)); ++ } else if (!strcmp(key, PDUMP_DST_ARG)) { ++ rte_memcpy(&pt->filter->d_addr, &addr, sizeof(addr)); ++ } ++ ++ return 0; +} + +static int +parse_proto(const char *key __rte_unused, const char *value, void *extra_args) +{ -+ struct pdump_tuples *pt = extra_args; -+ -+ if (!strcmp(value, "tcp")) { -+ pt->filter->proto = IPPROTO_TCP; -+ } else if (!strcmp(value, "udp")) { -+ pt->filter->proto = IPPROTO_UDP; -+ } else if (!strcmp(value, "icmp")) { -+ pt->filter->proto = IPPROTO_ICMP; -+ } else { -+ printf("invalid value:\"%s\" for key:\"%s\", " -+ "value must be tcp/udp/icmp\n", value, key); -+ return -EINVAL; -+ } -+ -+ return 0; ++ struct pdump_tuples *pt = extra_args; ++ ++ if (!strcmp(value, "tcp")) { ++ pt->filter->proto = IPPROTO_TCP; ++ } else if (!strcmp(value, "udp")) { ++ pt->filter->proto = IPPROTO_UDP; ++ } else if (!strcmp(value, "icmp")) { ++ pt->filter->proto = IPPROTO_ICMP; ++ } else { ++ printf("invalid value:\"%s\" for key:\"%s\", " ++ "value must be tcp/udp/icmp\n", value, key); ++ return -EINVAL; ++ } ++ ++ return 0; +} + + @@ -130,74 +143,74 @@ index ccf2a1d..7e72c16 100644 } else pt->total_num_mbufs = MBUFS_PER_POOL; -+ /* filter parsing and validation */ -+ pt->filter = rte_zmalloc("pdump_filter", -+ sizeof(struct pdump_filter), 0); -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_HOST_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_HOST_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_SRC_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_SRC_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_DST_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_DST_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_PORT_AGE); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_PORT_AGE, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->proto_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_SPORT_ARG); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_SPORT_ARG, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->s_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_DPORT_ARG); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_DPORT_ARG, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->d_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_ARG, -+ &parse_proto, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } ++ /* filter parsing and validation */ ++ pt->filter = rte_zmalloc("pdump_filter", ++ sizeof(struct pdump_filter), 0); ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_HOST_ARG); ++ if (cnt1 == 1) { ++ ret = rte_kvargs_process(kvlist, PDUMP_HOST_ARG, ++ &parse_host, pt); ++ if (ret < 0) ++ goto free_kvlist; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_SRC_ARG); ++ if (cnt1 == 1) { ++ ret = rte_kvargs_process(kvlist, PDUMP_SRC_ARG, ++ &parse_host, pt); ++ if (ret < 0) ++ goto free_kvlist; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_DST_ARG); ++ if (cnt1 == 1) { ++ ret = rte_kvargs_process(kvlist, PDUMP_DST_ARG, ++ &parse_host, pt); ++ if (ret < 0) ++ goto free_kvlist; ++ } ++ ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_PORT_AGE); ++ if (cnt1 == 1) { ++ v.min = 1; ++ v.max = UINT16_MAX; ++ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_PORT_AGE, ++ &parse_uint_value, &v); ++ if (ret < 0) ++ goto free_kvlist; ++ pt->filter->proto_port = (uint16_t) v.val; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_SPORT_ARG); ++ if (cnt1 == 1) { ++ v.min = 1; ++ v.max = UINT16_MAX; ++ ret = rte_kvargs_process(kvlist, PDUMP_SPORT_ARG, ++ &parse_uint_value, &v); ++ if (ret < 0) ++ goto free_kvlist; ++ pt->filter->s_port = (uint16_t) v.val; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_DPORT_ARG); ++ if (cnt1 == 1) { ++ v.min = 1; ++ v.max = UINT16_MAX; ++ ret = rte_kvargs_process(kvlist, PDUMP_DPORT_ARG, ++ &parse_uint_value, &v); ++ if (ret < 0) ++ goto free_kvlist; ++ pt->filter->d_port = (uint16_t) v.val; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_ARG); ++ if (cnt1 == 1) { ++ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_ARG, ++ &parse_proto, pt); ++ if (ret < 0) ++ goto free_kvlist; ++ } + num_tuples++; @@ -272,7 +285,7 @@ index ccf2a1d..7e72c16 100644 if (ret < 0 || ret1 < 0) { cleanup_pdump_resources(); diff --git a/config/common_base b/config/common_base -index d12ae98..5c15ea0 100644 +index d12ae98..5ac8d11 100644 --- a/config/common_base +++ b/config/common_base @@ -451,7 +451,7 @@ CONFIG_RTE_LIBRTE_PMD_NULL=y @@ -285,7 +298,7 @@ index d12ae98..5c15ea0 100644 # # Compile example software rings based PMD diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c -index 6c3a885..d9a3258 100644 +index 6c3a885..971e095 100644 --- a/lib/librte_pdump/rte_pdump.c +++ b/lib/librte_pdump/rte_pdump.c @@ -9,6 +9,10 @@ @@ -303,7 +316,7 @@ index 6c3a885..d9a3258 100644 return m_dup; } -+static int ++static int +inet_addr_equal(int af, const union addr *a1, + const union addr *a2) +{ @@ -328,7 +341,7 @@ index 6c3a885..d9a3258 100644 + default: + return -1; + } -+ ++ + return -1; +} +static int @@ -340,13 +353,13 @@ index 6c3a885..d9a3258 100644 + int prepend = 0; + uint16_t type = 0; + uint16_t iph_len = 0; -+ uint8_t proto = 0; ++ uint8_t proto = 0; + + int af; + + if (filter->af == 0 && filter->s_port == 0 && -+ filter->d_port == 0 && filter->proto == 0 && -+ filter->proto_port == 0) ++ filter->d_port == 0 && filter->proto == 0 && ++ filter->proto_port == 0) + return 0; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); @@ -374,53 +387,53 @@ index 6c3a885..d9a3258 100644 + af = AF_INET; + s_addr.in.s_addr = ip4->src_addr; + d_addr.in.s_addr = ip4->dst_addr; -+ proto = ip4->next_proto_id; -+ iph_len = (ip4->version_ihl & 0xf) << 2; ++ proto = ip4->next_proto_id; ++ iph_len = (ip4->version_ihl & 0xf) << 2; + } else if (type == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) { + struct ipv6_hdr *ip6 = rte_pktmbuf_mtod(m, struct ipv6_hdr *); + af = AF_INET6; + rte_memcpy(&s_addr.in6, &ip6->src_addr, 16); + rte_memcpy(&d_addr.in6, &ip6->dst_addr, 16); -+ proto = ip6->proto; -+ iph_len = sizeof(struct ipv6_hdr); ++ proto = ip6->proto; ++ iph_len = sizeof(struct ipv6_hdr); + } else { + goto prepend; + } + + /*filter*/ -+ if (!inet_is_addr_any(af, &filter->s_addr) && ++ if (!inet_is_addr_any(af, &filter->s_addr) && + !inet_addr_equal(af, &filter->s_addr, &s_addr)) + goto prepend; -+ if (!inet_is_addr_any(af, &filter->d_addr) && ++ if (!inet_is_addr_any(af, &filter->d_addr) && + !inet_addr_equal(af, &filter->d_addr, &d_addr)) + goto prepend; -+ if (!inet_is_addr_any(af, &filter->host_addr) && ++ if (!inet_is_addr_any(af, &filter->host_addr) && + !inet_addr_equal(af, &filter->host_addr, &s_addr) && + !inet_addr_equal(af, &filter->host_addr, &d_addr)) + goto prepend; + + if (filter->proto && filter->proto != proto) -+ goto prepend; -+ -+ if (filter->s_port || filter->d_port || filter->proto_port) { -+ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) + goto prepend; -+ struct udp_hdr _uh; -+ const struct udp_hdr *uh; -+ uh = rte_pktmbuf_read(m, iph_len, sizeof(_uh), &_uh); -+ if (uh == NULL) -+ goto prepend; -+ if (filter->s_port && filter->s_port != rte_cpu_to_be_16(uh->src_port)) -+ goto prepend; -+ -+ if (filter->d_port && filter->d_port != rte_cpu_to_be_16(uh->dst_port)) -+ goto prepend; -+ -+ if (filter->proto_port && -+ filter->proto_port != rte_cpu_to_be_16(uh->src_port) && -+ filter->proto_port != rte_cpu_to_be_16(uh->dst_port)) -+ goto prepend; -+ } ++ ++ if (filter->s_port || filter->d_port || filter->proto_port) { ++ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) ++ goto prepend; ++ struct udp_hdr _uh; ++ const struct udp_hdr *uh; ++ uh = rte_pktmbuf_read(m, iph_len, sizeof(_uh), &_uh); ++ if (uh == NULL) ++ goto prepend; ++ if (filter->s_port && filter->s_port != rte_cpu_to_be_16(uh->src_port)) ++ goto prepend; ++ ++ if (filter->d_port && filter->d_port != rte_cpu_to_be_16(uh->dst_port)) ++ goto prepend; ++ ++ if (filter->proto_port && ++ filter->proto_port != rte_cpu_to_be_16(uh->src_port) && ++ filter->proto_port != rte_cpu_to_be_16(uh->dst_port)) ++ goto prepend; ++ } + + rte_pktmbuf_prepend(m, prepend); + return 0; @@ -472,7 +485,7 @@ index 6c3a885..d9a3258 100644 } cbs->ring = ring; cbs->mp = mp; -+ cbs->filter = filter; ++ cbs->filter = filter; cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx, cbs); if (cbs->cb == NULL) { @@ -480,7 +493,7 @@ index 6c3a885..d9a3258 100644 uint16_t operation; struct rte_ring *ring; struct rte_mempool *mp; -+ struct pdump_filter *filter; ++ struct pdump_filter *filter; flags = p->flags; operation = p->op; @@ -488,7 +501,7 @@ index 6c3a885..d9a3258 100644 queue = p->data.en_v1.queue; ring = p->data.en_v1.ring; mp = p->data.en_v1.mp; -+ filter = p->data.en_v1.filter; ++ filter = p->data.en_v1.filter; } else { ret = rte_eth_dev_get_port_by_name(p->data.dis_v1.device, &port); @@ -496,7 +509,7 @@ index 6c3a885..d9a3258 100644 queue = p->data.dis_v1.queue; ring = p->data.dis_v1.ring; mp = p->data.dis_v1.mp; -+ filter = p->data.dis_v1.filter; ++ filter = p->data.dis_v1.filter; } /* validation if packet capture is for all queues */ @@ -519,7 +532,7 @@ index 6c3a885..d9a3258 100644 return ret; } diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h -index 673a2b0..633b48f 100644 +index 673a2b0..e9568e0 100644 --- a/lib/librte_pdump/rte_pdump.h +++ b/lib/librte_pdump/rte_pdump.h @@ -15,6 +15,8 @@ @@ -536,18 +549,18 @@ index 673a2b0..633b48f 100644 }; +union addr { -+ struct in_addr in; -+ struct in6_addr in6; ++ struct in_addr in; ++ struct in6_addr in6; +}; + +struct pdump_filter { -+ int af; ++ int af; + union addr s_addr; + union addr d_addr; + union addr host_addr; //s_addr or d_addr + -+ uint8_t proto; -+ uint16_t proto_port; //s_port or d_port ++ uint8_t proto; ++ uint16_t proto_port; //s_port or d_port + uint16_t s_port; + uint16_t d_port; +}; @@ -563,3 +576,6 @@ index 673a2b0..633b48f 100644 /** * Initialize packet capturing handling * +-- +1.8.3.1 + diff --git a/patch/dpdk-stable-18.11.2/0006-enable-dpdk-eal-memory-debug.patch b/patch/dpdk-stable-18.11.2/0006-enable-dpdk-eal-memory-debug.patch index c286c15f3..3b13fd0bd 100644 --- a/patch/dpdk-stable-18.11.2/0006-enable-dpdk-eal-memory-debug.patch +++ b/patch/dpdk-stable-18.11.2/0006-enable-dpdk-eal-memory-debug.patch @@ -1,7 +1,7 @@ -From 19652889ed74b09aba6f22dfa96b19c009a7309a Mon Sep 17 00:00:00 2001 +From 51aa71b8c000a55e9caac0fe12b216d8bde05ac8 Mon Sep 17 00:00:00 2001 From: ywc Date: Mon, 25 Jan 2021 10:27:52 +0800 -Subject: [PATCH] enable dpdk eal memory debug +Subject: [PATCH 6/7] enable dpdk eal memory debug --- config/common_base | 2 +- @@ -10,7 +10,7 @@ Subject: [PATCH] enable dpdk eal memory debug 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/config/common_base b/config/common_base -index d12ae98..765ae2e 100644 +index 5ac8d11..ef15b0a 100644 --- a/config/common_base +++ b/config/common_base @@ -94,7 +94,7 @@ CONFIG_RTE_EAL_IGB_UIO=n diff --git a/patch/dpdk-stable-18.11.2/0007-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch b/patch/dpdk-stable-18.11.2/0007-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch new file mode 100644 index 000000000..6fe3edfdc --- /dev/null +++ b/patch/dpdk-stable-18.11.2/0007-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch @@ -0,0 +1,62 @@ +From 9b1bcdc0419328b4a88128922567dacbe5630dd0 Mon Sep 17 00:00:00 2001 +From: yuwenchao +Date: Tue, 8 Jun 2021 11:45:11 +0800 +Subject: [PATCH 7/7] Fix bonding mode 4 problem caused by LACP failure. + +The problem is disscussed in Issue #725 of iqiyi/dpvs in detail. +https://github.com/iqiyi/dpvs/issues/725 + +Signed-off-by: yuwenchao +--- + drivers/net/bonding/rte_eth_bond_8023ad.c | 20 +++++++++++--------- + 1 file changed, 11 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c +index 1e6a3fc..6d1cca5 100644 +--- a/drivers/net/bonding/rte_eth_bond_8023ad.c ++++ b/drivers/net/bonding/rte_eth_bond_8023ad.c +@@ -811,7 +811,6 @@ + struct port *port; + struct rte_eth_link link_info; + struct ether_addr slave_addr; +- struct rte_mbuf *lacp_pkt = NULL; + uint16_t slave_id; + uint16_t i; + +@@ -876,6 +875,7 @@ + /* Find LACP packet to this port. Do not check subtype, + * it is done in function that queued packet + */ ++ struct rte_mbuf *lacp_pkt = NULL; + int retval = rte_ring_dequeue(port->rx_ring, + (void **)&lacp_pkt); + +@@ -884,15 +884,17 @@ + + rx_machine_update(internals, slave_id, lacp_pkt); + } else { +- uint16_t rx_count = rte_eth_rx_burst(slave_id, +- internals->mode4.dedicated_queues.rx_qid, +- &lacp_pkt, 1); +- +- if (rx_count == 1) +- bond_mode_8023ad_handle_slow_pkt(internals, +- slave_id, lacp_pkt); +- else ++ uint16_t rx_count, j; ++ struct rte_mbuf *lacp_pkt[16] = { NULL }; ++ ++ rx_count = rte_eth_rx_burst(slave_id, internals->mode4.dedicated_queues.rx_qid, ++ &lacp_pkt[0], sizeof(lacp_pkt)/sizeof(struct rte_mbuf *)); ++ if (rx_count > 0) { ++ for (j = 0; j < rx_count; j++) ++ bond_mode_8023ad_handle_slow_pkt(internals, slave_id, lacp_pkt[j]); ++ } else { + rx_machine_update(internals, slave_id, NULL); ++ } + } + + periodic_machine(internals, slave_id); +-- +1.8.3.1 + From ed4ffd2f3e9810b0ed951b9700002b1931c0c1fa Mon Sep 17 00:00:00 2001 From: ywc689 Date: Tue, 8 Jun 2021 15:56:22 +0800 Subject: [PATCH 05/41] netif: add config option "dedicated_queues" for bonding mode 4 (802.3ad) It helps avoid the lacp failure problem for some pmd drivers(i.e. mlx5) when enabled dedicated queues in 802.3ad bonding mode. Signed-off-by: ywc689 --- conf/dpvs.conf.items | 4 ++ conf/dpvs.conf.sample | 1 + conf/dpvs.conf.single-bond.sample | 3 +- src/netif.c | 65 +++++++++++++++++++++++++++++-- 4 files changed, 69 insertions(+), 4 deletions(-) diff --git a/conf/dpvs.conf.items b/conf/dpvs.conf.items index df579f7ff..12c0fb0d1 100644 --- a/conf/dpvs.conf.items +++ b/conf/dpvs.conf.items @@ -67,6 +67,10 @@ netif_defs { slave dpdk1 primary dpdk0 kni_name bond0.kni + + ! supported options: + ! dedicated_queues=on|enable|off|disable, default on + options OPT1=VAL1;OPT2=VAL2;... } } diff --git a/conf/dpvs.conf.sample b/conf/dpvs.conf.sample index f9baf3f7a..c7c305a46 100644 --- a/conf/dpvs.conf.sample +++ b/conf/dpvs.conf.sample @@ -69,6 +69,7 @@ netif_defs { ! slave dpdk1 ! primary dpdk0 ! kni_name bond0.kni + ! options dedicated_queues=off # for mode 4 only !} } diff --git a/conf/dpvs.conf.single-bond.sample b/conf/dpvs.conf.single-bond.sample index aec33d3a8..8d8ef0d64 100644 --- a/conf/dpvs.conf.single-bond.sample +++ b/conf/dpvs.conf.single-bond.sample @@ -63,11 +63,12 @@ netif_defs { } bonding bond0 { - mode 0 + mode 4 slave dpdk0 slave dpdk2 primary dpdk0 kni_name bond0.kni + options dedicated_queues=off } } diff --git a/src/netif.c b/src/netif.c index b77cb4ef3..3bdda16fe 100644 --- a/src/netif.c +++ b/src/netif.c @@ -108,6 +108,10 @@ struct port_conf_stream { struct list_head port_list_node; }; +struct bond_options { + bool dedicated_queues_enable; +}; + struct bond_conf_stream { int port_id; char name[32]; @@ -115,6 +119,7 @@ struct bond_conf_stream { int mode; char primary[32]; char slaves[NETIF_MAX_BOND_SLAVES][32]; + struct bond_options options; struct list_head bond_list_node; }; @@ -560,6 +565,7 @@ static void bonding_handler(vector_t tokens) RTE_LOG(INFO, NETIF, "netif bonding config: %s\n", str); strncpy(bond_cfg->name, str, sizeof(bond_cfg->name)); bond_cfg->mode = NETIF_BOND_MODE_DEF; + bond_cfg->options.dedicated_queues_enable = true; list_add(&bond_cfg->bond_list_node, &bond_list); } @@ -648,6 +654,59 @@ static void bonding_kni_name_handler(vector_t tokens) FREE_PTR(str); } +static inline char * get_bonding_option_value(char *token) +{ + char *ptr, *saveptr = NULL, *ret = token; + + if (!token) + return NULL; + + for (ptr = token; ret == token; ptr = NULL) + ret = strtok_r(ptr, "=", &saveptr); + + return ret; +} + +static void bonding_options_handler(vector_t tokens) +{ + char *str; + char *opt, *val, *ptr, *saveptr = NULL; + + str = set_value(tokens); + struct bond_conf_stream *current_bond = list_entry(bond_list.next, + struct bond_conf_stream, bond_list_node); + + assert(str); + RTE_LOG(INFO, NETIF, "bonding %s options: %s\n", current_bond->name, str); + + for (ptr = str; ;ptr = NULL) { + opt = strtok_r(ptr, ";", &saveptr); + if (opt == NULL) + break; + val = get_bonding_option_value(opt); + + if (!strcmp(opt, "dedicated_queues")) { + if (current_bond->mode != BONDING_MODE_8023AD || !val) { + RTE_LOG(WARNING, NETIF, "invalid bonding %s mode 4 option: %s, value: %s\n", + current_bond->name, opt, val ?: "null"); + continue; + } + if (!strcasecmp(val, "on") || !strcasecmp(val, "enable")) + current_bond->options.dedicated_queues_enable = true; + else if (!strcasecmp(val, "off") || !strcasecmp(val, "disable")) + current_bond->options.dedicated_queues_enable = false; + else + RTE_LOG(WARNING, NETIF, "invalid bonding %s option value: %s=%s\n", + current_bond->name, opt, val); + } else { + RTE_LOG(WARNING, NETIF, "unsupported bonding %s option: %s\n", + current_bond->name, opt); + } + } + + FREE_PTR(str); +} + static void worker_defs_handler(vector_t tokens) { struct worker_conf_stream *worker_cfg, *worker_cfg_next; @@ -919,6 +978,7 @@ void install_netif_keywords(void) install_keyword("slave", bonding_slave_handler, KW_TYPE_INIT); install_keyword("primary", bonding_primary_handler, KW_TYPE_INIT); install_keyword("kni_name", bonding_kni_name_handler, KW_TYPE_INIT); + install_keyword("options", bonding_options_handler, KW_TYPE_INIT); install_sublevel_end(); install_keyword_root("worker_defs", worker_defs_handler); @@ -4254,9 +4314,8 @@ int netif_vdevs_add(void) RTE_LOG(INFO, NETIF, "create bondig device %s: mode=%d, primary=%s, socket=%d\n", bond_cfg->name, bond_cfg->mode, bond_cfg->primary, socket_id); bond_cfg->port_id = pid; /* relate port_id with port_name, used by netif_rte_port_alloc */ - if (bond_cfg->mode == BONDING_MODE_8023AD) { - if (!rte_eth_bond_8023ad_dedicated_queues_enable(bond_cfg->port_id)) - { + if (bond_cfg->mode == BONDING_MODE_8023AD && bond_cfg->options.dedicated_queues_enable) { + if (!rte_eth_bond_8023ad_dedicated_queues_enable(bond_cfg->port_id)) { RTE_LOG(INFO, NETIF, "bonding mode4 dedicated queues enable failed!\n"); } } From 3eed60164cd6e286fff5518369273f357216a2a3 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Sat, 19 Sep 2020 18:36:51 +0800 Subject: [PATCH 06/41] dpvs netif_flow module using generic flow(rte_flow) --- include/netif_flow.h | 96 +++++++++++ src/netif_flow.c | 374 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 470 insertions(+) create mode 100644 include/netif_flow.h create mode 100644 src/netif_flow.c diff --git a/include/netif_flow.h b/include/netif_flow.h new file mode 100644 index 000000000..372a40899 --- /dev/null +++ b/include/netif_flow.h @@ -0,0 +1,96 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2020 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NETIF_FLOW_H__ +#define __NETIF_FLOW_H__ + +#include "netif.h" + +struct netif_flow_handler { + portid_t pid; + void *handler; +}; + +typedef struct netif_flow_handler_param { + int size; + int flow_num; + struct netif_flow_handler *handlers; // pointing to an netif_flow_handler array from outside +} netif_flow_handler_param_t; + +/* + * Add sapool flow rules (for fullnat and snat). + * + * @param dev [in] + * Target device for the flow rules, supporting bonding/physical ports. + * @param cid [in] + * Lcore id to which to route the target flow. + * @param af [in] + * IP address family. + * @param addr [in] + * IP address of the sapool. + * @param port_base [in] + * TCP/UDP base port of the sapool. + * @param port_mask [in] + * TCP/UDP mask mask of the sapool. + * @param flows [out] + * Containing netif flow handlers if success, undefined otherwise. + * + * @return + * DPVS error code. + */ +int netif_sapool_flow_add(struct netif_port *dev, lcoreid_t cid, + int af, const union inet_addr *addr, + __be16 port_base, __be16 port_mask, + netif_flow_handler_param_t *flows); + +/* + * Delete saflow rules (for fullnat and snat). + * @param dev [in] + * Target device for the flow rules, supporting bonding/physical ports. + * @param cid [in] + * Lcore id to which to route the target flow. + * @param af [in] + * IP address family. + * @param addr [in] + * IP address of the sapool. + * @param port_base [in] + * TCP/UDP base port of the sapool. + * @param port_mask [in] + * TCP/UDP mask mask of the sapool. + * @param flows [in] + * Containing netif flow handlers to delete. + * + * @return + * DPVS error code. + */ +int netif_sapool_flow_del(struct netif_port *dev, lcoreid_t cid, + int af, const union inet_addr *addr, + __be16 port_base, __be16 port_mask, + netif_flow_handler_param_t *flows); + +/* + * Flush all flow rules on a port. * + * @param dev + * Target device, supporting bonding/physical ports. + * + * @return + * DPVS error code. + */ +int netif_flow_flush(struct netif_port *dev); + +#endif diff --git a/src/netif_flow.c b/src/netif_flow.c new file mode 100644 index 000000000..6fcd4a76c --- /dev/null +++ b/src/netif_flow.c @@ -0,0 +1,374 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2020 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include "vlan.h" +#include "netif_flow.h" + +#define RTE_LOGTYPE_FLOW RTE_LOGTYPE_USER1 + +/* sapool pattern stack: ETH | IP | TCP/UDP | END */ +#define SAPOOL_PATTERN_NUM 4 +/* sapool action stack: QUEUE | END */ +#define SAPOOL_ACTION_NUM 2 + +/* dpvs use only one flow group */ +#define NETIF_FLOW_GROUP 0 + +/* DPVS flow type and priority. + * The enum value matters. Lower value denotes higher priority. */ +typedef enum { + NETIF_FLOW_PRIO_SAPOOL = 1, // sapool flow rules + NETIF_FLOW_PRIO_TUNNEL, // TODO, gre tunnel flow rules + // more ... +} netif_flow_type_prio_t; + +/* + * Create a rte_flow on a physical port. + */ +static inline int __netif_flow_create(struct netif_port *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct netif_flow_handler *flow) +{ + struct rte_flow_error flow_error; + + if (unlikely(!flow || !dev || (dev->type != PORT_TYPE_GENERAL && + dev->type != PORT_TYPE_BOND_SLAVE))) + return EDPVS_INVAL; + + if (rte_flow_validate(dev->id, attr, pattern, actions, &flow_error)) { + RTE_LOG(WARNING, FLOW, "rte_flow_validate on %s failed -- %d, %s\n", + dev->name, flow_error.type, flow_error.message); + return EDPVS_DPDKAPIFAIL; + } + + flow->handler = rte_flow_create(dev->id, attr, pattern, actions, &flow_error); + if (!flow->handler) { + flow->pid = 0; + RTE_LOG(WARNING, FLOW, "rte_flow_create on %s failed -- %d, %s\n", + dev->name, flow_error.type, flow_error.message); + return EDPVS_DPDKAPIFAIL; + } + flow->pid = dev->id; + + return EDPVS_OK; +} + +/* + * Remove a specified rte_flow. + */ +static int __netif_flow_destroy(struct netif_flow_handler *flow) +{ + struct netif_port *dev; + struct rte_flow_error flow_error; + + if (unlikely(!flow || !flow->handler)) + return EDPVS_INVAL; + + dev = netif_port_get(flow->pid); + if (unlikely(!dev || (dev->type != PORT_TYPE_GENERAL && + dev->type != PORT_TYPE_BOND_SLAVE))) + return EDPVS_INVAL; + + if (rte_flow_destroy(flow->pid, (struct rte_flow *)flow->handler, &flow_error)) { + RTE_LOG(WARNING, FLOW, "rte_flow_destroy on %s failed -- %d, %s\n", + dev->name, flow_error.type, flow_error.message); + return EDPVS_DPDKAPIFAIL; + } + + return EDPVS_OK; +} + +/* + * Create rte_flow on specified device. + */ +static int netif_flow_create(struct netif_port *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + netif_flow_handler_param_t *flows) +{ + int err; + + if (unlikely(!dev || !flows)) + return EDPVS_INVAL; + + if (dev->type == PORT_TYPE_VLAN) { + struct vlan_dev_priv *vlan = netif_priv(dev); + if (unlikely(!vlan || !vlan->real_dev)) + return EDPVS_INVAL; + dev = vlan->real_dev; + } + + if (dev->type == PORT_TYPE_GENERAL) { + if (unlikely(flows->size < 1 || !flows->handlers)) + return EDPVS_INVAL; + err = __netif_flow_create(dev, attr, pattern, actions, &flows->handlers[0]); + flows->flow_num = (err == EDPVS_OK) ? 1 : 0; + return err; + } + + if (dev->type == PORT_TYPE_BOND_MASTER) { + int i, slave_nb; + slave_nb = dev->bond->master.slave_nb; + + if (unlikely(flows->size < slave_nb || !flows->handlers)) + return EDPVS_INVAL; + for (i = 0; i < slave_nb; i++) { + err = __netif_flow_create(dev, attr, pattern, actions, &flows->handlers[i]); + if (err != EDPVS_OK) { + while (--i >= 0) + __netif_flow_destroy(&flows->handlers[i]); + return err; + } + } + flows->flow_num = slave_nb; + return EDPVS_OK; + } + + return EDPVS_INVAL; +} + +/* + * Destroy specified rte_flow. + */ +static int netif_flow_destroy(netif_flow_handler_param_t *flows) +{ + int i, err, ret = EDPVS_OK; + + if (unlikely(!flows || flows->flow_num >= flows->size || !flows->handlers)) + return EDPVS_INVAL; + + for (i = 0; i < flows->flow_num; i++) { + err = __netif_flow_destroy(&flows->handlers[i]); + if (err != EDPVS_OK) + ret = err; + } + + return ret; +} + +/* + * Flush rte_flow of a physical port. + */ +static inline int __netif_flow_flush(struct netif_port *dev) +{ + struct rte_flow_error flow_error; + + if (unlikely(!dev || (dev->type != PORT_TYPE_GENERAL && + dev->type != PORT_TYPE_BOND_SLAVE))) + return EDPVS_INVAL; + + if (rte_flow_flush(dev->id, &flow_error)) { + RTE_LOG(WARNING, FLOW, "rte_flow_flush on %s failed -- %d, %s, %s\n", + dev->name, flow_error.type, flow_error.cause, flow_error.message); + return EDPVS_DPDKAPIFAIL; + } + + return EDPVS_OK; +} + +/* + * Flush rte_flow on specified device. + * + * Note: + * It invalidates all rte_flow handlers related to this device. + * If the handlers are saved elsewhere previously, don't use any of them after being flushed. + */ +int netif_flow_flush(struct netif_port *dev) +{ + if (unlikely(!dev)) + return EDPVS_INVAL; + + if (dev->type == PORT_TYPE_GENERAL) { + if (__netif_flow_flush(dev) != EDPVS_OK) + return EDPVS_RESOURCE; + } + + if (dev->type == PORT_TYPE_BOND_MASTER) { + int i, slave_nb, err; + err = EDPVS_OK; + slave_nb = dev->bond->master.slave_nb; + for (i = 0; i < slave_nb; i++) { + if (__netif_flow_flush(dev) != EDPVS_OK) + err = EDPVS_RESOURCE; + } + return err; + } + + return EDPVS_INVAL; +} + +/* + * Set sa_pool flow rules. + * + * Ether | IPv4/IPv6 | TCP/UDP + */ +int netif_sapool_flow_add(struct netif_port *dev, lcoreid_t cid, + int af, const union inet_addr *addr, + __be16 port_base, __be16 port_mask, + netif_flow_handler_param_t *flows) +{ + int err, ret = EDPVS_OK, nflows = 0; + char ipbuf[64]; + struct rte_flow_attr attr = { + .group = NETIF_FLOW_GROUP, + .priority = NETIF_FLOW_PRIO_SAPOOL, + .ingress = 1, + .egress = 0, + .transfer = 0, + }; + struct rte_flow_item pattern[SAPOOL_PATTERN_NUM]; + struct rte_flow_action action[SAPOOL_ACTION_NUM]; + netif_flow_handler_param_t resp; + + struct rte_flow_item_ipv4 ip_spec, ip_mask; + struct rte_flow_item_ipv6 ip6_spec, ip6_mask; + struct rte_flow_item_tcp tcp_spec, tcp_mask; + struct rte_flow_item_udp udp_spec, udp_mask; + + queueid_t queue_id; + struct rte_flow_action_queue queue; + + if (unlikely(!dev || !addr || !flows)) + return EDPVS_INVAL; + if (unlikely(flows->size < 4 || !flows->handlers)) + return EDPVS_INVAL; + + memset(pattern, 0, sizeof(pattern)); + memset(action, 0, sizeof(action)); + + /* create pattern stack */ + pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH; + + /* create action stack */ + err = netif_get_queue(dev, cid, &queue_id); + if (unlikely(err != EDPVS_OK)) + return err; + queue.index = queue_id; + action[0].type = RTE_FLOW_ACTION_TYPE_QUEUE; + action[0].conf = &queue; + action[1].type = RTE_FLOW_ACTION_TYPE_END; + + /* create pattern stack */ + if (af == AF_INET) { + memset(&ip_spec, 0, sizeof(struct rte_flow_item_ipv4)); + memset(&ip_mask, 0, sizeof(struct rte_flow_item_ipv4)); + ip_spec.hdr.dst_addr = addr->in.s_addr; + ip_mask.hdr.dst_addr = htonl(0xffffffff); + pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV4; + pattern[1].spec = &ip_spec; + pattern[1].mask = &ip_mask; + } else if (af == AF_INET6) { + memset(&ip6_spec, 0, sizeof(struct rte_flow_item_ipv6)); + memset(&ip6_mask, 0, sizeof(struct rte_flow_item_ipv6)); + memcpy(&ip6_spec.hdr.dst_addr, &addr->in6, sizeof(ip6_spec.hdr.dst_addr)); + memset(&ip6_mask.hdr.dst_addr, 0xff, sizeof(ip6_mask.hdr.dst_addr)); + pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV6; + pattern[1].spec = &ip6_spec; + pattern[1].mask = &ip6_mask; + } else { + return EDPVS_INVAL; + } + memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp)); + tcp_spec.hdr.dst_port = port_base; + tcp_mask.hdr.dst_port = port_mask; + pattern[2].type = RTE_FLOW_ITEM_TYPE_TCP; + pattern[2].spec = &tcp_spec; + pattern[2].mask = &tcp_mask; + pattern[3].type = RTE_FLOW_ITEM_TYPE_END; + + /* set tcp flow */ + resp.size = flows->size; + resp.flow_num = 0; + resp.handlers = &flows->handlers[0]; + err = netif_flow_create(dev, &attr, pattern, action, &resp); + if (err) { + ret = EDPVS_RESOURCE; + RTE_LOG(ERR, FLOW, "%s: adding tcp sapool flow failed: %s ip %s port %d(0x%04X) mask 0x%04X," + " queue %d lcore %2d\n", __func__, dev->name, + inet_ntop(af, addr, ipbuf, sizeof(ipbuf)) ? : "::", + ntohs(port_base), ntohs(port_base), ntohs(port_mask), queue_id, cid); + } else { + nflows += resp.flow_num; + RTE_LOG(INFO, FLOW, "%s: adding tcp sapool flow succeed: %s ip %s port %d(0x%04X) mask 0x%04X," + " queue %d lcore %2d\n", __func__, dev->name, + inet_ntop(af, addr, ipbuf, sizeof(ipbuf)) ? : "::", + ntohs(port_base), ntohs(port_base), ntohs(port_mask), queue_id, cid); + } + + memset(&udp_spec, 0, sizeof(struct rte_flow_item_udp)); + udp_spec.hdr.dst_port = port_base; + udp_mask.hdr.dst_port = port_mask; + pattern[2].type = RTE_FLOW_ITEM_TYPE_UDP; + pattern[2].spec = &udp_spec; + pattern[2].mask = &udp_mask; + /* set udp flow */ + resp.size = flows->size - nflows; + resp.flow_num = 0; + resp.handlers = &flows->handlers[nflows]; + err = netif_flow_create(dev, &attr, pattern, action, &resp); + if (err) { + ret = EDPVS_RESOURCE; + RTE_LOG(ERR, FLOW, "%s: adding udp sapool flow failed: %s ip %s port %d(0x%04X) mask 0x%04X," + " queue %d lcore %2d\n", __func__, dev->name, + inet_ntop(af, addr, ipbuf, sizeof(ipbuf)) ? : "::", + ntohs(port_base), ntohs(port_base), ntohs(port_mask), queue_id, cid); + } else { + nflows += resp.flow_num; + RTE_LOG(INFO, FLOW, "%s: adding udp sapool flow succeed: %s ip %s port %d(0x%04X) mask 0x%04X," + " queue %d lcore %2d\n", __func__, dev->name, + inet_ntop(af, addr, ipbuf, sizeof(ipbuf)) ? : "::", + ntohs(port_base), ntohs(port_base), ntohs(port_mask), queue_id, cid); + } + + flows->flow_num = nflows; + return ret; +} + +/* + * Delete sa_pool flow rules. + * + * Ether | IPv4/IPv6 | TCP/UDP + */ +int netif_sapool_flow_del(struct netif_port *dev, lcoreid_t cid, + int af, const union inet_addr *addr, + __be16 port_base, __be16 port_mask, + netif_flow_handler_param_t *flows) +{ + int err, ret = EDPVS_OK; + char ipbuf[64]; + + err = netif_flow_destroy(flows); + + if (err) { + err = EDPVS_RESOURCE; + RTE_LOG(ERR, FLOW, "%s: deleting sapool flow failed: %s ip %s port %d(0x%04X) mask 0x%04X\n", + __func__, dev->name, inet_ntop(af, addr, ipbuf, sizeof(ipbuf)) ? : "::", + ntohs(port_base), ntohs(port_base), ntohs(port_mask)); + } else { + flows->flow_num = 0; + RTE_LOG(INFO, FLOW, "%s: deleting sapool flow failed: %s ip %s port %d(0x%04X) mask 0x%04X\n", + __func__, dev->name, inet_ntop(af, addr, ipbuf, sizeof(ipbuf)) ? : "::", + ntohs(port_base), ntohs(port_base), ntohs(port_mask)); + } + + return ret; +} From dd24c38f6ed5d7bb415a275f04a12e76adad007c Mon Sep 17 00:00:00 2001 From: ywc689 Date: Mon, 21 Sep 2020 20:52:24 +0800 Subject: [PATCH 07/41] sapool: replace flow director with rte_flow --- conf/dpvs.bond.conf.sample | 23 +- conf/dpvs.conf.items | 7 +- conf/dpvs.conf.sample | 8 +- conf/dpvs.conf.single-bond.sample | 13 +- conf/dpvs.conf.single-nic.sample | 8 +- include/ipvs/conn.h | 1 - include/netif.h | 9 - include/sa_pool.h | 32 +-- src/netif.c | 366 +----------------------------- src/sa_pool.c | 216 ++++++------------ src/vlan.c | 25 -- 11 files changed, 96 insertions(+), 612 deletions(-) diff --git a/conf/dpvs.bond.conf.sample b/conf/dpvs.bond.conf.sample index f6e554c8d..532af4c34 100644 --- a/conf/dpvs.bond.conf.sample +++ b/conf/dpvs.bond.conf.sample @@ -33,11 +33,6 @@ netif_defs { queue_number 8 descriptor_number 1024 } - fdir { - mode perfect - pballoc 64k - status matched - } ! mtu 1500 ! promisc_mode ! kni_name dpdk0.kni @@ -53,11 +48,6 @@ netif_defs { queue_number 8 descriptor_number 1024 } - fdir { - mode perfect - pballoc 64k - status matched - } ! mtu 1500 ! promisc_mode ! kni_name dpdk1.kni @@ -74,11 +64,6 @@ netif_defs { queue_number 8 descriptor_number 1024 } - fdir { - mode perfect - pballoc 64k - status matched - } ! mtu 1500 ! promisc_mode ! kni_name dpdk2.kni @@ -94,11 +79,6 @@ netif_defs { queue_number 8 descriptor_number 1024 } - fdir { - mode perfect - pballoc 64k - status matched - } ! mtu 1500 ! promisc_mode ! kni_name dpdk3.kni @@ -386,5 +366,6 @@ ipvs_defs { ! sa_pool config sa_pool { - pool_hash_size 16 + pool_hash_size 16 + flow_enable on } diff --git a/conf/dpvs.conf.items b/conf/dpvs.conf.items index 12c0fb0d1..db0047747 100644 --- a/conf/dpvs.conf.items +++ b/conf/dpvs.conf.items @@ -34,12 +34,6 @@ netif_defs { queue_number 6 <16, 0-16> descriptor_number 512 <512, 16-8192> } - fdir { - filter on - mode perfect - pballoc 64k <64k, 64k|128k|256k> - status matched - } ! mtu 1500 <1500,0-9000> ! promisc_mode ! kni_name dpdk0.kni @@ -266,4 +260,5 @@ ipvs_defs { sa_pool { pool_hash_size 16 <16, 1-128> + flow_enable on } diff --git a/conf/dpvs.conf.sample b/conf/dpvs.conf.sample index c7c305a46..76082c416 100644 --- a/conf/dpvs.conf.sample +++ b/conf/dpvs.conf.sample @@ -33,11 +33,6 @@ netif_defs { queue_number 8 descriptor_number 1024 } - fdir { - mode perfect - pballoc 64k - status matched - } ! mtu 1500 ! promisc_mode kni_name dpdk0.kni @@ -338,5 +333,6 @@ ipvs_defs { ! sa_pool config sa_pool { - pool_hash_size 16 + pool_hash_size 16 + flow_enable on } diff --git a/conf/dpvs.conf.single-bond.sample b/conf/dpvs.conf.single-bond.sample index 8d8ef0d64..7df1a4a78 100644 --- a/conf/dpvs.conf.single-bond.sample +++ b/conf/dpvs.conf.single-bond.sample @@ -32,11 +32,6 @@ netif_defs { queue_number 8 descriptor_number 1024 } - fdir { - mode perfect - pballoc 64k - status matched - } ! mtu 1500 ! promisc_mode ! kni_name dpdk0.kni @@ -52,11 +47,6 @@ netif_defs { queue_number 8 descriptor_number 1024 } - fdir { - mode perfect - pballoc 64k - status matched - } ! mtu 1500 ! promisc_mode ! kni_name dpdk2.kni @@ -286,5 +276,6 @@ ipvs_defs { ! sa_pool config sa_pool { - pool_hash_size 16 + pool_hash_size 16 + flow_enable on } diff --git a/conf/dpvs.conf.single-nic.sample b/conf/dpvs.conf.single-nic.sample index 40a34dd99..7fed5b4e9 100644 --- a/conf/dpvs.conf.single-nic.sample +++ b/conf/dpvs.conf.single-nic.sample @@ -32,11 +32,6 @@ netif_defs { queue_number 8 descriptor_number 1024 } - fdir { - mode perfect - pballoc 64k - status matched - } ! mtu 1500 ! promisc_mode kni_name dpdk0.kni @@ -257,5 +252,6 @@ ipvs_defs { ! sa_pool config sa_pool { - pool_hash_size 16 + pool_hash_size 16 + flow_enable on } diff --git a/include/ipvs/conn.h b/include/ipvs/conn.h index c49f535f2..cb64d9707 100644 --- a/include/ipvs/conn.h +++ b/include/ipvs/conn.h @@ -81,7 +81,6 @@ struct dp_vs_conn_stats { rte_atomic64_t outbytes; } __rte_cache_aligned; -struct dp_vs_fdir_filt; struct dp_vs_proto; struct dp_vs_conn { diff --git a/include/netif.h b/include/netif.h index 0226457d8..28c5e0dc1 100644 --- a/include/netif.h +++ b/include/netif.h @@ -193,9 +193,6 @@ struct netif_ops { int (*op_stop)(struct netif_port *dev); int (*op_xmit)(struct rte_mbuf *m, struct netif_port *dev); int (*op_set_mc_list)(struct netif_port *dev); - int (*op_filter_supported)(struct netif_port *dev, enum rte_filter_type fltype); - int (*op_set_fdir_filt)(struct netif_port *dev, enum rte_filter_op op, - const struct rte_eth_fdir_filter *filt); int (*op_get_queue)(struct netif_port *dev, lcoreid_t cid, queueid_t *qid); int (*op_get_link)(struct netif_port *dev, struct rte_eth_link *link); int (*op_get_promisc)(struct netif_port *dev, bool *promisc); @@ -279,10 +276,6 @@ int netif_register_pkt(struct pkt_type *pt); int netif_unregister_pkt(struct pkt_type *pt); /**************************** port API ******************************/ -int netif_fdir_filter_set(struct netif_port *port, enum rte_filter_op opcode, - const struct rte_eth_fdir_filter *fdir_flt); -void netif_mask_fdir_filter(int af, const struct netif_port *port, - struct rte_eth_fdir_filter *filt); struct netif_port* netif_port_get(portid_t id); /* port_conf can be NULL for default port configure */ int netif_print_port_conf(const struct rte_eth_conf *port_conf, char *buf, int *len); @@ -339,6 +332,4 @@ static inline uint16_t dpvs_rte_eth_dev_count(void) #endif } -extern bool dp_vs_fdir_filter_enable; - #endif /* __DPVS_NETIF_H__ */ diff --git a/include/sa_pool.h b/include/sa_pool.h index dfcdc4ace..958aa0428 100644 --- a/include/sa_pool.h +++ b/include/sa_pool.h @@ -23,10 +23,10 @@ * ways to achieve the goal. one is to calc RSS the same way of * NIC to select the correct CPU for connect. * - * the way we use is based on Flow-Director (fdir), allocate + * the way we use is based on DPDK Generic Flow(rte_flow), allocate * local source (e.g., ) for each CPU core in advance. - * and redirect the back traffic to that CPU by fdir. it does not - * need two many fdir rules, the number of rules can be equal to + * and redirect the back traffic to that CPU by rte_flow. it does not + * need two many flow rules, the number of rules can be equal to * the number of CPU core. * * LVS use laddr and try to see if is used when @@ -42,9 +42,10 @@ #ifndef __DPVS_SA_POOL__ #define __DPVS_SA_POOL__ -#define MAX_PORT 65536 +#include "netif_flow.h" -#define MAX_FDIR_PROTO 2 +#define MAX_PORT 65536 +#define MAX_SA_FLOW 4 struct sa_pool_stats { uint32_t used_cnt; @@ -58,8 +59,7 @@ struct sa_pool_stats { * 2. use uint8_t flag * 3. remove sa_entry.addr, and get IP from sa_pool->ifa * 4. to __packed__ sa_entry. - * 5. alloc sa_entries[] for 65536/cpu_num only. - * 6. create sa_entry_pool only if pool_hash hit. + * 5. create sa_entry_pool only if pool_hash hit. * since when dest (like RS) num may small. */ @@ -87,21 +87,21 @@ struct sa_entry_pool { /* no lock needed because inet_ifaddr.sa_pool * is per-lcore. */ struct sa_pool { - struct inet_ifaddr *ifa; /* back-pointer */ + struct inet_ifaddr *ifa; /* back-pointer */ - uint16_t low; /* min port */ - uint16_t high; /* max port */ - rte_atomic32_t refcnt; + uint16_t low; /* min port */ + uint16_t high; /* max port */ + rte_atomic32_t refcnt; /* hashed pools by dest's . if no dest provided, * just use first pool. it's not need create/destroy pool * for each dest, that'll be too complicated. */ - struct sa_entry_pool *pool_hash; - uint8_t pool_hash_sz; - uint32_t flags; /* SA_POOL_F_XXX */ + struct sa_entry_pool *pool_hash; + uint8_t pool_hash_sz; + uint32_t flags; /* SA_POOL_F_XXX */ - /* fdir filter ID */ - uint32_t filter_id[MAX_FDIR_PROTO]; + int flow_num; + struct netif_flow_handler flows[MAX_SA_FLOW]; }; int sa_pool_init(void); diff --git a/src/netif.c b/src/netif.c index 3bdda16fe..5a3350a70 100644 --- a/src/netif.c +++ b/src/netif.c @@ -38,6 +38,7 @@ #include "parser/parser.h" #include "neigh.h" #include "scheduler.h" +#include "netif_flow.h" #include #include @@ -99,10 +100,6 @@ struct port_conf_stream { int tx_queue_nb; int tx_desc_nb; - enum rte_fdir_mode fdir_mode; - enum rte_fdir_pballoc_type fdir_pballoc; - enum rte_fdir_status_mode fdir_status; - bool promisc_mode; struct list_head port_list_node; @@ -158,8 +155,6 @@ static struct list_head port_ntab[NETIF_PORT_TABLE_BUCKETS]; /* hashed by name * /* function declarations */ static void kni_lcore_loop(void *dummy); -bool dp_vs_fdir_filter_enable = true; - bool is_lcore_id_valid(lcoreid_t cid) { if (unlikely(cid >= DPVS_MAX_LCORE)) @@ -274,9 +269,6 @@ static void device_handler(vector_t tokens) port_cfg->promisc_mode = false; strncpy(port_cfg->rss, "tcp", sizeof(port_cfg->rss)); - port_cfg->fdir_mode = RTE_FDIR_MODE_PERFECT; - port_cfg->fdir_pballoc = RTE_FDIR_PBALLOC_64K; - port_cfg->fdir_status = RTE_FDIR_REPORT_STATUS; list_add(&port_cfg->port_list_node, &port_list); } @@ -392,120 +384,6 @@ static void tx_desc_nb_handler(vector_t tokens) FREE_PTR(str); } -static void fdir_mode_handler(vector_t tokens) -{ - char *mode, *str = set_value(tokens); - struct port_conf_stream *current_device = list_entry(port_list.next, - struct port_conf_stream, port_list_node); - bool use_default = false; - assert(str); - - mode = strlwr(str); - - if (!strncmp(mode, "none", sizeof("none"))) - current_device->fdir_mode = RTE_FDIR_MODE_NONE; - else if (!strncmp(mode, "signature", sizeof("signature"))) - current_device->fdir_mode = RTE_FDIR_MODE_SIGNATURE; - else if (!strncmp(mode, "perfect", sizeof("perfect"))) - current_device->fdir_mode = RTE_FDIR_MODE_PERFECT; - else if (!strncmp(mode, "perfect_mac_vlan", sizeof("perfect_mac_vlan"))) - current_device->fdir_mode = RTE_FDIR_MODE_PERFECT_MAC_VLAN; - else if (!strncmp(mode, "perfect_tunnel", sizeof("perfect_tunnel"))) - current_device->fdir_mode = RTE_FDIR_MODE_PERFECT_TUNNEL; - else { - use_default = true; - current_device->fdir_mode = RTE_FDIR_MODE_PERFECT; - } - - if (use_default) - RTE_LOG(WARNING, NETIF, "invalid %s:fdir_mode '%s', " - "use default 'perfect'\n", current_device->name, mode); - else - RTE_LOG(INFO, NETIF, "%s:fdir_mode = %s\n", current_device->name, mode); - - FREE_PTR(str); -} - -static void fdir_pballoc_handler(vector_t tokens) -{ - char *pballoc, *str = set_value(tokens); - struct port_conf_stream *current_device = list_entry(port_list.next, - struct port_conf_stream, port_list_node); - bool use_default = false; - assert(str); - - pballoc = strlwr(str); - - if (!strncmp(pballoc, "64k", sizeof("64k"))) - current_device->fdir_pballoc = RTE_FDIR_PBALLOC_64K; - else if (!strncmp(pballoc, "128k", sizeof("128k"))) - current_device->fdir_pballoc = RTE_FDIR_PBALLOC_128K; - else if (!strncmp(pballoc, "256k", sizeof("256k"))) - current_device->fdir_pballoc = RTE_FDIR_PBALLOC_256K; - else { - use_default = true; - current_device->fdir_pballoc = RTE_FDIR_PBALLOC_64K; - } - - if (use_default) - RTE_LOG(WARNING, NETIF, "invalid %s:fdir_pballoc '%s', " - "use default '64k'\n", current_device->name, pballoc); - else - RTE_LOG(INFO, NETIF, "%s:fdir_pballoc = %s\n", - current_device->name, pballoc); - - FREE_PTR(str); -} - -static void fdir_status_handler(vector_t tokens) -{ - char *status, *str = set_value(tokens); - struct port_conf_stream *current_device = list_entry(port_list.next, - struct port_conf_stream, port_list_node); - bool use_default = false; - assert(str); - - status = strlwr(str); - - if (!strncmp(status, "close", sizeof("close"))) - current_device->fdir_status = RTE_FDIR_NO_REPORT_STATUS; - else if (!strncmp(status, "matched", sizeof("matched"))) - current_device->fdir_status = RTE_FDIR_REPORT_STATUS; - else if (!strncmp(status, "always", sizeof("always"))) - current_device->fdir_status = RTE_FDIR_REPORT_STATUS_ALWAYS; - else { - use_default = true; - current_device->fdir_status = RTE_FDIR_REPORT_STATUS; - } - - if (use_default) - RTE_LOG(WARNING, NETIF, "invalid %s:fdir_status '%s', " - "use default 'matched'\n", current_device->name, status); - else - RTE_LOG(INFO, NETIF, "%s:fdir_status = %s\n", - current_device->name, status); - - FREE_PTR(str); -} - -static void fdir_filter_handler(vector_t tokens) -{ - char *str = set_value(tokens); - - assert(str); - - if (strcasecmp(str, "on") == 0) - dp_vs_fdir_filter_enable = true; - else if (strcasecmp(str, "off") == 0) - dp_vs_fdir_filter_enable = false; - else - RTE_LOG(WARNING, IPVS, "invalid fdir:filter %s\n", str); - - RTE_LOG(INFO, IPVS, "fdir:filter = %s\n", dp_vs_fdir_filter_enable ? "on" : "off"); - - FREE_PTR(str); -} - static void promisc_mode_handler(vector_t tokens) { struct port_conf_stream *current_device = list_entry(port_list.next, @@ -961,13 +839,6 @@ void install_netif_keywords(void) install_keyword("queue_number", tx_queue_number_handler, KW_TYPE_INIT); install_keyword("descriptor_number", tx_desc_nb_handler, KW_TYPE_INIT); install_sublevel_end(); - install_keyword("fdir", NULL, KW_TYPE_INIT); - install_sublevel(); - install_keyword("mode", fdir_mode_handler, KW_TYPE_INIT); - install_keyword("pballoc", fdir_pballoc_handler, KW_TYPE_INIT); - install_keyword("status", fdir_status_handler, KW_TYPE_INIT); - install_keyword("filter", fdir_filter_handler, KW_TYPE_INIT); - install_sublevel_end(); install_keyword("promisc_mode", promisc_mode_handler, KW_TYPE_INIT); install_keyword("mtu", custom_mtu_handler,KW_TYPE_INIT); install_keyword("kni_name", kni_name_handler, KW_TYPE_INIT); @@ -3043,46 +2914,6 @@ static int bond_set_mc_list(struct netif_port *dev) return err; } -static int bond_filter_supported(struct netif_port *dev, enum rte_filter_type fltype) -{ - int i, err = EDPVS_NOTSUPP; - struct netif_port *slave; - - if (dev->type != PORT_TYPE_BOND_MASTER) - return EDPVS_INVAL; - - for (i = 0; i < dev->bond->master.slave_nb; i++) { - slave = dev->bond->master.slaves[i]; - err = rte_eth_dev_filter_supported(slave->id, fltype); - if (err < 0) - return err; - } - - return err; -} - -static int bond_set_fdir_filt(struct netif_port *dev, enum rte_filter_op op, - const struct rte_eth_fdir_filter *filt) -{ - int i, err; - struct netif_port *slave; - - if (dev->type != PORT_TYPE_BOND_MASTER) - return EDPVS_INVAL; - - for (i = 0; i < dev->bond->master.slave_nb; i++) { - slave = dev->bond->master.slaves[i]; - err = netif_fdir_filter_set(slave, op, filt); - if (err != EDPVS_OK) { - RTE_LOG(WARNING, NETIF, "%s: fail to set %s's fdir filter - %d\n", - __func__, slave->name, err); - return err; - } - } - - return EDPVS_OK; -} - static int dpdk_set_mc_list(struct netif_port *dev) { struct ether_addr addrs[NETIF_MAX_HWADDR]; @@ -3111,92 +2942,12 @@ static int dpdk_set_mc_list(struct netif_port *dev) return EDPVS_OK; } -static int dpdk_filter_supported(struct netif_port *dev, enum rte_filter_type fltype) -{ - return rte_eth_dev_filter_supported(dev->id, fltype); -} - -void netif_mask_fdir_filter(int af, const struct netif_port *port, - struct rte_eth_fdir_filter *filt) -{ - struct rte_eth_fdir_info fdir_info; - const struct rte_eth_fdir_masks *fmask; - union rte_eth_fdir_flow *flow = &filt->input.flow; - - /* There exists a defect here. If the netif_port 'port' is not PORT_TYPE_GENERAL, - mask fdir_filter of the port would fail. The correct way to accomplish the - function is to register this method for all device types. Considering the flow - is not changed after masking, we just skip netif_ports other than physical ones. */ - if (port->type != PORT_TYPE_GENERAL) - return; - - if (rte_eth_dev_filter_ctrl(port->id, RTE_ETH_FILTER_FDIR, - RTE_ETH_FILTER_INFO, &fdir_info) < 0) { - RTE_LOG(DEBUG, NETIF, "%s: Fail to fetch fdir info of %s !\n", - __func__, port->name); - return; - } - fmask = &fdir_info.mask; - - /* ipv4 flow */ - if (af == AF_INET) { - flow->ip4_flow.src_ip &= fmask->ipv4_mask.src_ip; - flow->ip4_flow.dst_ip &= fmask->ipv4_mask.dst_ip; - flow->ip4_flow.tos &= fmask->ipv4_mask.tos; - flow->ip4_flow.ttl &= fmask->ipv4_mask.ttl; - flow->ip4_flow.proto &= fmask->ipv4_mask.proto; - flow->tcp4_flow.src_port &= fmask->src_port_mask; - flow->tcp4_flow.dst_port &= fmask->dst_port_mask; - return; - } - - /* ipv6 flow */ - if (af == AF_INET6) { - flow->ipv6_flow.src_ip[0] &= fmask->ipv6_mask.src_ip[0]; - flow->ipv6_flow.src_ip[1] &= fmask->ipv6_mask.src_ip[1]; - flow->ipv6_flow.src_ip[2] &= fmask->ipv6_mask.src_ip[2]; - flow->ipv6_flow.src_ip[3] &= fmask->ipv6_mask.src_ip[3]; - flow->ipv6_flow.dst_ip[0] &= fmask->ipv6_mask.dst_ip[0]; - flow->ipv6_flow.dst_ip[1] &= fmask->ipv6_mask.dst_ip[1]; - flow->ipv6_flow.dst_ip[2] &= fmask->ipv6_mask.dst_ip[2]; - flow->ipv6_flow.dst_ip[3] &= fmask->ipv6_mask.dst_ip[3]; - flow->ipv6_flow.tc &= fmask->ipv6_mask.tc; - flow->ipv6_flow.proto &= fmask->ipv6_mask.proto; - flow->ipv6_flow.hop_limits &= fmask->ipv6_mask.hop_limits; - flow->tcp6_flow.src_port &= fmask->src_port_mask; - flow->tcp6_flow.dst_port &= fmask->dst_port_mask; - return; - } -} - -static int dpdk_set_fdir_filt(struct netif_port *dev, enum rte_filter_op op, - const struct rte_eth_fdir_filter *filt) -{ - int ret; - - rte_rwlock_write_lock(&dev->dev_lock); - ret = rte_eth_dev_filter_ctrl(dev->id, - RTE_ETH_FILTER_FDIR, op, (void *)filt); - rte_rwlock_write_unlock(&dev->dev_lock); - if (ret < 0) { - RTE_LOG(WARNING, NETIF, "%s: fdir filt set failed for %s -- %s(%d)\n!", - __func__, dev->name, rte_strerror(-ret), ret); - return EDPVS_DPDKAPIFAIL; - } - - return EDPVS_OK; -} - static struct netif_ops dpdk_netif_ops = { .op_set_mc_list = dpdk_set_mc_list, - .op_set_fdir_filt = dpdk_set_fdir_filt, - .op_filter_supported = dpdk_filter_supported, }; static struct netif_ops bond_netif_ops = { .op_set_mc_list = bond_set_mc_list, - .op_set_fdir_filt = bond_set_fdir_filt, - .op_filter_supported = bond_filter_supported, }; static inline void setup_dev_of_flags(struct netif_port *port) @@ -3414,17 +3165,6 @@ int netif_get_stats(struct netif_port *dev, struct rte_eth_stats *stats) return EDPVS_OK; } -int netif_fdir_filter_set(struct netif_port *port, enum rte_filter_op opcode, - const struct rte_eth_fdir_filter *fdir_flt) -{ - assert(port && port->netif_ops); - - if (!port->netif_ops->op_set_fdir_filt) - return EDPVS_NOTSUPP; - - return port->netif_ops->op_set_fdir_filt(port, opcode, fdir_flt); -} - int netif_port_conf_get(struct netif_port *port, struct rte_eth_conf *eth_conf) { @@ -3474,33 +3214,6 @@ static inline void port_mtu_set(struct netif_port *port) } -/* - * fdir mask must be set according to configured slave lcore number - * */ -inline static int netif_port_fdir_dstport_mask_set(struct netif_port *port) -{ - uint8_t slave_nb; - int shift; - - netif_get_slave_lcores(&slave_nb, NULL); - for (shift = 0; (0x1 << shift) < slave_nb; shift++) - ; - if (shift >= 16) { - RTE_LOG(ERR, NETIF, "%s: %s's fdir dst_port_mask init failed\n", - __func__, port->name); - return EDPVS_NOTSUPP; - } -#if RTE_VERSION >= 0x10040010 - port->dev_conf.fdir_conf.mask.dst_port_mask = htons(~((~0x0) << shift)); -#else - port->dev_conf.fdir_conf.mask.dst_port_mask = ~((~0x0) << shift); -#endif - - RTE_LOG(INFO, NETIF, "%s:dst_port_mask=%0x\n", port->name, - port->dev_conf.fdir_conf.mask.dst_port_mask); - return EDPVS_OK; -} - static int rss_resolve_proc(char *rss) { int rss_value = 0; @@ -3600,11 +3313,7 @@ static void fill_port_config(struct netif_port *port, char *promisc_on) port->dev_conf.rx_adv_conf.rss_conf.rss_hf |= rss_resolve_proc(rss); } - port->dev_conf.fdir_conf.mode = cfg_stream->fdir_mode; - port->dev_conf.fdir_conf.pballoc = cfg_stream->fdir_pballoc; - port->dev_conf.fdir_conf.status = cfg_stream->fdir_status; port->mtu = cfg_stream->mtu; - if (cfg_stream->rx_queue_nb > 0 && port->nrxq > cfg_stream->rx_queue_nb) { RTE_LOG(WARNING, NETIF, "%s: rx-queues(%d) configured in workers != " "rx-queues(%d) configured in device, setup %d rx-queues for %s\n", @@ -3708,17 +3417,6 @@ static int add_bond_slaves(struct netif_port *port) return EDPVS_OK; } -/* flush FDIR filters for all physical dpdk ports */ -static int fdir_filter_flush(const struct netif_port *port) -{ - if (!port || port->type != PORT_TYPE_GENERAL) - return EDPVS_OK; - if (rte_eth_dev_filter_ctrl(port->id, RTE_ETH_FILTER_FDIR, - RTE_ETH_FILTER_FLUSH, NULL) < 0) - return EDPVS_DPDKAPIFAIL; - return EDPVS_OK; -} - /* * Note: Invoke the function after port is allocated and lcores are configured. */ @@ -3751,11 +3449,14 @@ int netif_port_start(struct netif_port *port) } // device configure +<<<<<<< HEAD if ((ret = netif_port_fdir_dstport_mask_set(port)) != EDPVS_OK) return ret; if ((ret = rte_eth_dev_set_mtu(port->id,port->mtu)) != EDPVS_OK) return ret; +======= +>>>>>>> sapool: replace flow director with rte_flow if (port->flag & NETIF_PORT_FLAG_TX_IP_CSUM_OFFLOAD) port->dev_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM; if (port->flag & NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD) @@ -3869,10 +3570,10 @@ int netif_port_start(struct netif_port *port) } } - /* flush FDIR filters */ - ret = fdir_filter_flush(port); + /* flush rte_flows */ + ret = netif_flow_flush(port); if (ret != EDPVS_OK) { - RTE_LOG(WARNING, NETIF, "fail to flush FDIR filters for device %s\n", port->name); + RTE_LOG(WARNING, NETIF, "fail to flush rte_flows on device %s\n", port->name); return ret; } @@ -4044,35 +3745,6 @@ static struct rte_eth_conf default_port_conf = { .txmode = { .mq_mode = ETH_MQ_TX_NONE, }, - .fdir_conf = { - .mode = RTE_FDIR_MODE_PERFECT, - .pballoc = RTE_FDIR_PBALLOC_64K, - .status = RTE_FDIR_REPORT_STATUS/*_ALWAYS*/, - .mask = { - .vlan_tci_mask = 0x0, - .ipv4_mask = { - .src_ip = 0x00000000, - .dst_ip = 0xFFFFFFFF, - }, - .ipv6_mask = { - .src_ip = { 0, 0, 0, 0 }, - .dst_ip = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }, - }, - .src_port_mask = 0x0000, - - /* to be changed according to slave lcore number in use */ - .dst_port_mask = 0x00F8, - - .mac_addr_byte_mask = 0x00, - .tunnel_type_mask = 0, - .tunnel_id_mask = 0, - }, - .drop_queue = 127, - .flex_conf = { - .nb_payloads = 0, - .nb_flexmasks = 0, - }, - }, }; int netif_print_port_conf(const struct rte_eth_conf *port_conf, char *buf, int *len) @@ -4114,30 +3786,6 @@ int netif_print_port_conf(const struct rte_eth_conf *port_conf, char *buf, int * strncat(buf, tbuf1, *len - strlen(buf) - 1); } - memset(tbuf1, 0, sizeof(tbuf1)); - snprintf(tbuf1, sizeof(tbuf1), - "fdir ipv4 mask: src 0x%08x dst 0x%08x\n" - "fdir ipv6 mask: src 0x%08x:%08x:%08x:%08x dst 0x%08x:%08x:%08x:%08x\n" - "fdir port mask: src 0x%04x dst 0x%04x\n", - port_conf->fdir_conf.mask.ipv4_mask.src_ip, - port_conf->fdir_conf.mask.ipv4_mask.dst_ip, - port_conf->fdir_conf.mask.ipv6_mask.src_ip[0], - port_conf->fdir_conf.mask.ipv6_mask.src_ip[1], - port_conf->fdir_conf.mask.ipv6_mask.src_ip[2], - port_conf->fdir_conf.mask.ipv6_mask.src_ip[3], - port_conf->fdir_conf.mask.ipv6_mask.dst_ip[0], - port_conf->fdir_conf.mask.ipv6_mask.dst_ip[1], - port_conf->fdir_conf.mask.ipv6_mask.dst_ip[2], - port_conf->fdir_conf.mask.ipv6_mask.dst_ip[3], - port_conf->fdir_conf.mask.src_port_mask, - port_conf->fdir_conf.mask.dst_port_mask - ); - if (*len - strlen(buf) - 1 < strlen(tbuf1)) { - RTE_LOG(WARNING, NETIF, "[%s] no enough buf\n", __func__); - return EDPVS_INVAL; - } - strncat(buf, tbuf1, *len - strlen(buf) - 1); - *len = strlen(buf); return EDPVS_OK; } diff --git a/src/sa_pool.c b/src/sa_pool.c index e115cc564..a0909339d 100644 --- a/src/sa_pool.c +++ b/src/sa_pool.c @@ -23,10 +23,10 @@ * ways to achieve the goal. one is to calc RSS the same way of * NIC to select the currect CPU for connect. * - * the way we use is based on Flow-Director (fdir), allocate + * the way we use is based on DPDK Generic Flow(rte_flow), allocate * local source (e.g., ) for each CPU core in advance. - * and redirect the back traffic to that CPU by fdir. it does not - * need too many fdir rules, the number of rules can be equal to + * and redirect the back traffic to that CPU by rte_flow. it does not + * need too many flow rules, the number of rules can be equal to * the number of CPU core. * * LVS use laddr and try to see if is used when @@ -69,130 +69,25 @@ enum { SA_F_USED = 0x01, }; -struct sa_fdir { +struct sa_flow { /* the ports one lcore can use means - * "(fdir.mask & port) == port_base" */ + * "(sa_flow.mask & port) == port_base" */ uint16_t mask; /* filter's port mask */ lcoreid_t lcore; __be16 port_base; - uint16_t soft_id; /* current unsed soft-id, - increase after use. */ uint16_t shift; }; -static struct sa_fdir sa_fdirs[DPVS_MAX_LCORE]; +static struct sa_flow sa_flows[DPVS_MAX_LCORE]; static uint8_t sa_nlcore; static uint64_t sa_lcore_mask; -static uint8_t sa_pool_hash_size = SAPOOL_DEF_HASH_SZ; - -static int __add_del_filter(int af, struct netif_port *dev, lcoreid_t cid, - const union inet_addr *dip, __be16 dport, - uint32_t filter_id[MAX_FDIR_PROTO], bool add) -{ - queueid_t queue; - int err; - enum rte_filter_op op, rop; - - struct rte_eth_fdir_filter filt[MAX_FDIR_PROTO] = { - { - .action.behavior = RTE_ETH_FDIR_ACCEPT, - .action.report_status = RTE_ETH_FDIR_REPORT_ID, - .soft_id = filter_id[0], - }, - { - .action.behavior = RTE_ETH_FDIR_ACCEPT, - .action.report_status = RTE_ETH_FDIR_REPORT_ID, - .soft_id = filter_id[1], - }, - }; - - if (af == AF_INET) { - filt[0].input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_TCP; - filt[0].input.flow.tcp4_flow.ip.dst_ip = dip->in.s_addr; - filt[0].input.flow.tcp4_flow.dst_port = dport; - filt[1].input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP; - filt[1].input.flow.udp4_flow.ip.dst_ip = dip->in.s_addr; - filt[1].input.flow.udp4_flow.dst_port = dport; - } else if (af == AF_INET6) { - filt[0].input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV6_TCP; - memcpy(filt[0].input.flow.ipv6_flow.dst_ip, &dip->in6, sizeof(struct in6_addr)); - filt[0].input.flow.tcp6_flow.dst_port = dport; - filt[1].input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV6_UDP; - memcpy(filt[1].input.flow.ipv6_flow.dst_ip, &dip->in6, sizeof(struct in6_addr)); - filt[1].input.flow.udp6_flow.dst_port = dport; - } else { - return EDPVS_NOTSUPP; - } - - if (dev->netif_ops && dev->netif_ops->op_filter_supported) { - if (dev->netif_ops->op_filter_supported(dev, RTE_ETH_FILTER_FDIR) < 0) { - if (dev->nrxq <= 1) - return EDPVS_OK; - RTE_LOG(ERR, SAPOOL, "%s: FDIR is not supported by device %s. Only" - " single rxq can be configured.\n", __func__, dev->name); - return EDPVS_NOTSUPP; - } - } else { - RTE_LOG(ERR, SAPOOL, "%s: FDIR support of device %s is not known.\n", - __func__, dev->name); - return EDPVS_INVAL; - } - - err = netif_get_queue(dev, cid, &queue); - if (err != EDPVS_OK) - return err; - - filt[0].action.rx_queue = filt[1].action.rx_queue = queue; - op = add ? RTE_ETH_FILTER_ADD : RTE_ETH_FILTER_DELETE; - - netif_mask_fdir_filter(af, dev, &filt[0]); - netif_mask_fdir_filter(af, dev, &filt[1]); - - err = netif_fdir_filter_set(dev, op, &filt[0]); - if (err != EDPVS_OK) - return err; - - err = netif_fdir_filter_set(dev, op, &filt[1]); - if (err != EDPVS_OK) { - rop = add ? RTE_ETH_FILTER_DELETE : RTE_ETH_FILTER_ADD; - netif_fdir_filter_set(dev, rop, &filt[0]); - return err; - } - -#ifdef CONFIG_DPVS_SAPOOL_DEBUG - { - char ipaddr[64]; - RTE_LOG(DEBUG, SAPOOL, "FDIR: %s %s %s TCP/UDP " - "ip %s port %d (0x%04x) mask 0x%04X queue %d lcore %2d filterID %d/%d\n", - add ? "add" : "del", dev->name, - af == AF_INET ? "IPv4" : "IPv6", - inet_ntop(af, dip, ipaddr, sizeof(ipaddr)) ? : "::", - ntohs(dport), ntohs(dport), sa_fdirs[cid].mask, queue, cid, - filter_id[0], filter_id[1]); - } -#endif - - return err; -} - -static inline int sa_add_filter(int af, struct netif_port *dev, lcoreid_t cid, - const union inet_addr *dip, __be16 dport, - uint32_t filter_id[MAX_FDIR_PROTO]) -{ - return __add_del_filter(af, dev, cid, dip, dport, filter_id, true); -} - -static inline int sa_del_filter(int af, struct netif_port *dev, lcoreid_t cid, - const union inet_addr *dip, __be16 dport, - uint32_t filter_id[MAX_FDIR_PROTO]) -{ - return __add_del_filter(af, dev, cid, dip, dport, filter_id, false); -} +static uint8_t sa_pool_hash_size = SAPOOL_DEF_HASH_SZ; +static bool sapool_flow_enable = true; static int sa_pool_alloc_hash(struct sa_pool *ap, uint8_t hash_sz, - const struct sa_fdir *fdir) + const struct sa_flow *flow) { int hash; struct sa_entry_pool *pool; @@ -202,7 +97,7 @@ static int sa_pool_alloc_hash(struct sa_pool *ap, uint8_t hash_sz, uint32_t sa_entry_size; uint32_t sa_entry_num; - sa_entry_num = MAX_PORT >> fdir->shift; + sa_entry_num = MAX_PORT >> flow->shift; sa_entry_pool_size = sizeof(struct sa_entry_pool) * hash_sz; sa_entry_size = sizeof(struct sa_entry) * sa_entry_num * hash_sz; @@ -214,7 +109,7 @@ static int sa_pool_alloc_hash(struct sa_pool *ap, uint8_t hash_sz, ap->pool_hash_sz = hash_sz; sep = (struct sa_entry *)&ap->pool_hash[hash_sz]; - /* the big loop takes about 17ms */ + /* the big loop may take tens of milliseconds */ for (hash = 0; hash < hash_sz; hash++) { pool = &ap->pool_hash[hash]; @@ -223,14 +118,14 @@ static int sa_pool_alloc_hash(struct sa_pool *ap, uint8_t hash_sz, pool->used_cnt = 0; pool->free_cnt = 0; - pool->shift = fdir->shift; + pool->shift = flow->shift; pool->sa_entries = &sep[sa_entry_num * hash]; for (port = ap->low; port <= ap->high; port++) { struct sa_entry *sa; - if (fdir->mask && - ((uint16_t)port & fdir->mask) != ntohs(fdir->port_base)) + if (flow->mask && + ((uint16_t)port & flow->mask) != ntohs(flow->port_base)) continue; sa = &pool->sa_entries[(uint16_t)(port >> pool->shift)]; @@ -246,7 +141,7 @@ static int sa_pool_alloc_hash(struct sa_pool *ap, uint8_t hash_sz, static int sa_pool_free_hash(struct sa_pool *ap) { - /* FIXME: it may takes about 3ms to free the huge `sa->pool_hash`, and + /* FIXME: it may take about 3ms to free the huge `sa->pool_hash`, and * @rte_free uses a spinlock to protect its heap. If multiple workers * free their sapools simultaneously, a worker may be stuck up to 3*N ms, * where `N` is the dpvs worker number. @@ -254,7 +149,7 @@ static int sa_pool_free_hash(struct sa_pool *ap) * use mempool for sapool could solve the problem. we still use @rte_free * here considering sapool is not frequently changed. */ - rte_free(ap->pool_hash); /* it may takes up to 3ms */ + rte_free(ap->pool_hash); /* it may take up to 3ms */ ap->pool_hash_sz = 0; return EDPVS_OK; } @@ -262,23 +157,21 @@ static int sa_pool_free_hash(struct sa_pool *ap) static int sa_pool_add_filter(struct inet_ifaddr *ifa, struct sa_pool *ap, lcoreid_t cid) { - int err = EDPVS_OK; - uint32_t filtids[MAX_FDIR_PROTO]; - struct sa_fdir *fdir = &sa_fdirs[cid]; + int err; + struct sa_flow *flow = &sa_flows[cid]; - if (dp_vs_fdir_filter_enable) { - /* if add filter failed, waste some soft-id is acceptable. */ - filtids[0] = fdir->soft_id++; - filtids[1] = fdir->soft_id++; + netif_flow_handler_param_t flow_handlers = { + .size = MAX_SA_FLOW, + .flow_num = 0, + .handlers = ap->flows, + }; - err = sa_add_filter(ifa->af, ifa->idev->dev, cid, &ifa->addr, - fdir->port_base, filtids); + if (!sapool_flow_enable) + return EDPVS_OK; - if (err == EDPVS_OK) { - ap->filter_id[0] = filtids[0]; - ap->filter_id[1] = filtids[1]; - } - } + err = netif_sapool_flow_add(ifa->idev->dev, cid, ifa->af, &ifa->addr, + flow->port_base, flow->mask, &flow_handlers); + ap->flow_num = flow_handlers.flow_num; return err; } @@ -286,21 +179,25 @@ static int sa_pool_add_filter(struct inet_ifaddr *ifa, struct sa_pool *ap, static int sa_pool_del_filter(struct inet_ifaddr *ifa, struct sa_pool *ap, lcoreid_t cid) { - int err = EDPVS_OK; - struct sa_fdir *fdir = &sa_fdirs[cid]; + struct sa_flow *flow = &sa_flows[cid]; - if (dp_vs_fdir_filter_enable) - err = sa_del_filter(ifa->af, ifa->idev->dev, cid, &ifa->addr, - fdir->port_base, ap->filter_id); /* thread-safe ? */ + netif_flow_handler_param_t flow_handlers = { + .size = MAX_SA_FLOW, + .flow_num = ap->flow_num, + .handlers = ap->flows, + }; - return err; + if (!sapool_flow_enable) + return EDPVS_OK; + + return netif_sapool_flow_del(ifa->idev->dev, cid, ifa->af, &ifa->addr, + flow->port_base, flow->mask, &flow_handlers); } int sa_pool_create(struct inet_ifaddr *ifa, uint16_t low, uint16_t high) { int err; struct sa_pool *ap; - struct sa_fdir *fdir; lcoreid_t cid = rte_lcore_id(); if (cid > 64 || !((sa_lcore_mask & (1UL << cid)))) { @@ -317,8 +214,6 @@ int sa_pool_create(struct inet_ifaddr *ifa, uint16_t low, uint16_t high) return EDPVS_INVAL; } - fdir = &sa_fdirs[cid]; - ap = rte_zmalloc(NULL, sizeof(struct sa_pool), 0); if (unlikely(!ap)) return EDPVS_NOMEM; @@ -329,7 +224,7 @@ int sa_pool_create(struct inet_ifaddr *ifa, uint16_t low, uint16_t high) ap->flags = 0; rte_atomic32_set(&ap->refcnt, 1); - err = sa_pool_alloc_hash(ap, sa_pool_hash_size, fdir); + err = sa_pool_alloc_hash(ap, sa_pool_hash_size, &sa_flows[cid]); if (err != EDPVS_OK) { goto free_ap; } @@ -836,11 +731,10 @@ int sa_pool_init(void) continue; assert(rte_lcore_is_enabled(cid) && cid != rte_get_master_lcore()); - sa_fdirs[cid].mask = ~((~0x0) << shift); - sa_fdirs[cid].lcore = cid; - sa_fdirs[cid].port_base = htons(port_base); - sa_fdirs[cid].soft_id = 0; - sa_fdirs[cid].shift = shift; + sa_flows[cid].mask = ~((~0x0) << shift); + sa_flows[cid].lcore = cid; + sa_flows[cid].port_base = htons(port_base); + sa_flows[cid].shift = shift; port_base++; } @@ -856,7 +750,7 @@ int sa_pool_term(void) /* * config file */ -static void sa_pool_hash_size_conf(vector_t tokens) +static void sa_pool_hash_size_handler(vector_t tokens) { char *str = set_value(tokens); int size; @@ -874,8 +768,26 @@ static void sa_pool_hash_size_conf(vector_t tokens) FREE_PTR(str); } +static void sa_pool_flow_enable_handler(vector_t tokens) +{ + char *str = set_value(tokens); + + if (!str) + return; + + if (!strcasecmp(str, "on")) + sapool_flow_enable = true; + if (!strcasecmp(str, "off")) + sapool_flow_enable = false; + else + RTE_LOG(WARNING, SAPOOL, "sapool_filter_enable = %s\n", sapool_flow_enable ? "on" : "off"); + + FREE_PTR(str); +} + void install_sa_pool_keywords(void) { install_keyword_root("sa_pool", NULL); - install_keyword("pool_hash_size", sa_pool_hash_size_conf, KW_TYPE_INIT); + install_keyword("pool_hash_size", sa_pool_hash_size_handler, KW_TYPE_INIT); + install_keyword("flow_enable", sa_pool_flow_enable_handler, KW_TYPE_INIT); } diff --git a/src/vlan.c b/src/vlan.c index 10b35824b..1f312fab5 100644 --- a/src/vlan.c +++ b/src/vlan.c @@ -130,29 +130,6 @@ static int vlan_set_mc_list(struct netif_port *dev) return err; } -static int vlan_filter_supported(struct netif_port *dev, enum rte_filter_type fltype) -{ - struct netif_port *rdev; - struct vlan_dev_priv *vlan = netif_priv(dev); - assert(vlan && vlan->real_dev); - - rdev = vlan->real_dev; - - if (!rdev->netif_ops || !rdev->netif_ops->op_filter_supported) - return EDPVS_NOTSUPP; - - return rdev->netif_ops->op_filter_supported(rdev, fltype); -} - -static int vlan_set_fdir_filt(struct netif_port *dev, enum rte_filter_op op, - const struct rte_eth_fdir_filter *filt) -{ - struct vlan_dev_priv *vlan = netif_priv(dev); - assert(vlan && vlan->real_dev); - - return netif_fdir_filter_set(vlan->real_dev, op, filt); -} - static int vlan_get_queue(struct netif_port *dev, lcoreid_t cid, queueid_t *qid) { struct vlan_dev_priv *vlan = netif_priv(dev); @@ -188,8 +165,6 @@ static int vlan_get_stats(struct netif_port *dev, struct rte_eth_stats *stats) static struct netif_ops vlan_netif_ops = { .op_xmit = vlan_xmit, .op_set_mc_list = vlan_set_mc_list, - .op_filter_supported = vlan_filter_supported, - .op_set_fdir_filt = vlan_set_fdir_filt, .op_get_queue = vlan_get_queue, .op_get_link = vlan_get_link, .op_get_promisc = vlan_get_promisc, From e4e548664732f38238ad8decc6542e0c46b263a2 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Tue, 22 Sep 2020 14:48:29 +0800 Subject: [PATCH 08/41] netif_flow: bugfix --- src/netif_flow.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/netif_flow.c b/src/netif_flow.c index 6fcd4a76c..874ff86a6 100644 --- a/src/netif_flow.c +++ b/src/netif_flow.c @@ -132,7 +132,7 @@ static int netif_flow_create(struct netif_port *dev, if (unlikely(flows->size < slave_nb || !flows->handlers)) return EDPVS_INVAL; for (i = 0; i < slave_nb; i++) { - err = __netif_flow_create(dev, attr, pattern, actions, &flows->handlers[i]); + err = __netif_flow_create(dev->bond->master.slaves[i], attr, pattern, actions, &flows->handlers[i]); if (err != EDPVS_OK) { while (--i >= 0) __netif_flow_destroy(&flows->handlers[i]); @@ -153,7 +153,7 @@ static int netif_flow_destroy(netif_flow_handler_param_t *flows) { int i, err, ret = EDPVS_OK; - if (unlikely(!flows || flows->flow_num >= flows->size || !flows->handlers)) + if (unlikely(!flows || flows->flow_num > flows->size || !flows->handlers)) return EDPVS_INVAL; for (i = 0; i < flows->flow_num; i++) { @@ -197,9 +197,17 @@ int netif_flow_flush(struct netif_port *dev) if (unlikely(!dev)) return EDPVS_INVAL; + if (dev->type == PORT_TYPE_VLAN) { + struct vlan_dev_priv *vlan = netif_priv(dev); + if (unlikely(!vlan || !vlan->real_dev)) + return EDPVS_INVAL; + dev = vlan->real_dev; + } + if (dev->type == PORT_TYPE_GENERAL) { if (__netif_flow_flush(dev) != EDPVS_OK) return EDPVS_RESOURCE; + return EDPVS_OK; } if (dev->type == PORT_TYPE_BOND_MASTER) { @@ -207,7 +215,7 @@ int netif_flow_flush(struct netif_port *dev) err = EDPVS_OK; slave_nb = dev->bond->master.slave_nb; for (i = 0; i < slave_nb; i++) { - if (__netif_flow_flush(dev) != EDPVS_OK) + if (__netif_flow_flush(dev->bond->master.slaves[i]) != EDPVS_OK) err = EDPVS_RESOURCE; } return err; @@ -233,7 +241,7 @@ int netif_sapool_flow_add(struct netif_port *dev, lcoreid_t cid, .priority = NETIF_FLOW_PRIO_SAPOOL, .ingress = 1, .egress = 0, - .transfer = 0, + //.transfer = 0, }; struct rte_flow_item pattern[SAPOOL_PATTERN_NUM]; struct rte_flow_action action[SAPOOL_ACTION_NUM]; @@ -255,9 +263,6 @@ int netif_sapool_flow_add(struct netif_port *dev, lcoreid_t cid, memset(pattern, 0, sizeof(pattern)); memset(action, 0, sizeof(action)); - /* create pattern stack */ - pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH; - /* create action stack */ err = netif_get_queue(dev, cid, &queue_id); if (unlikely(err != EDPVS_OK)) @@ -268,6 +273,8 @@ int netif_sapool_flow_add(struct netif_port *dev, lcoreid_t cid, action[1].type = RTE_FLOW_ACTION_TYPE_END; /* create pattern stack */ + pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH; + if (af == AF_INET) { memset(&ip_spec, 0, sizeof(struct rte_flow_item_ipv4)); memset(&ip_mask, 0, sizeof(struct rte_flow_item_ipv4)); @@ -288,6 +295,7 @@ int netif_sapool_flow_add(struct netif_port *dev, lcoreid_t cid, return EDPVS_INVAL; } memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp)); + memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp)); tcp_spec.hdr.dst_port = port_base; tcp_mask.hdr.dst_port = port_mask; pattern[2].type = RTE_FLOW_ITEM_TYPE_TCP; @@ -315,6 +323,7 @@ int netif_sapool_flow_add(struct netif_port *dev, lcoreid_t cid, } memset(&udp_spec, 0, sizeof(struct rte_flow_item_udp)); + memset(&udp_mask, 0, sizeof(struct rte_flow_item_udp)); udp_spec.hdr.dst_port = port_base; udp_mask.hdr.dst_port = port_mask; pattern[2].type = RTE_FLOW_ITEM_TYPE_UDP; @@ -365,7 +374,7 @@ int netif_sapool_flow_del(struct netif_port *dev, lcoreid_t cid, ntohs(port_base), ntohs(port_base), ntohs(port_mask)); } else { flows->flow_num = 0; - RTE_LOG(INFO, FLOW, "%s: deleting sapool flow failed: %s ip %s port %d(0x%04X) mask 0x%04X\n", + RTE_LOG(INFO, FLOW, "%s: deleting sapool flow succeed: %s ip %s port %d(0x%04X) mask 0x%04X\n", __func__, dev->name, inet_ntop(af, addr, ipbuf, sizeof(ipbuf)) ? : "::", ntohs(port_base), ntohs(port_base), ntohs(port_mask)); } From 5df6130f422332913c377b54259cec4002bc233d Mon Sep 17 00:00:00 2001 From: ywc689 Date: Sun, 27 Sep 2020 17:55:48 +0800 Subject: [PATCH 09/41] netif_flow: lock rte_flow api if pmd driver implementation is not thread-safe --- src/netif_flow.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/netif_flow.c b/src/netif_flow.c index 874ff86a6..ab723fcf2 100644 --- a/src/netif_flow.c +++ b/src/netif_flow.c @@ -22,6 +22,9 @@ #define RTE_LOGTYPE_FLOW RTE_LOGTYPE_USER1 +/* uncomment the macro if rte_flow pmd driver is not thread-safe. */ +// #define CONFIG_DEV_FLOW_LOCK + /* sapool pattern stack: ETH | IP | TCP/UDP | END */ #define SAPOOL_PATTERN_NUM 4 /* sapool action stack: QUEUE | END */ @@ -38,6 +41,20 @@ typedef enum { // more ... } netif_flow_type_prio_t; +static inline void netif_flow_lock(struct netif_port *dev) +{ +#ifdef CONFIG_DEV_FLOW_LOCK + rte_rwlock_write_lock(&dev->dev_lock); +#endif +} + +static inline void netif_flow_unlock(struct netif_port *dev) +{ +#ifdef CONFIG_DEV_FLOW_LOCK + rte_rwlock_write_unlock(&dev->dev_lock); +#endif +} + /* * Create a rte_flow on a physical port. */ @@ -53,13 +70,16 @@ static inline int __netif_flow_create(struct netif_port *dev, dev->type != PORT_TYPE_BOND_SLAVE))) return EDPVS_INVAL; + netif_flow_lock(dev); if (rte_flow_validate(dev->id, attr, pattern, actions, &flow_error)) { + netif_flow_unlock(dev); RTE_LOG(WARNING, FLOW, "rte_flow_validate on %s failed -- %d, %s\n", dev->name, flow_error.type, flow_error.message); return EDPVS_DPDKAPIFAIL; } flow->handler = rte_flow_create(dev->id, attr, pattern, actions, &flow_error); + netif_flow_unlock(dev); if (!flow->handler) { flow->pid = 0; RTE_LOG(WARNING, FLOW, "rte_flow_create on %s failed -- %d, %s\n", @@ -87,11 +107,14 @@ static int __netif_flow_destroy(struct netif_flow_handler *flow) dev->type != PORT_TYPE_BOND_SLAVE))) return EDPVS_INVAL; + netif_flow_lock(dev); if (rte_flow_destroy(flow->pid, (struct rte_flow *)flow->handler, &flow_error)) { RTE_LOG(WARNING, FLOW, "rte_flow_destroy on %s failed -- %d, %s\n", dev->name, flow_error.type, flow_error.message); + netif_flow_unlock(dev); return EDPVS_DPDKAPIFAIL; } + netif_flow_unlock(dev); return EDPVS_OK; } From c7604f4edd263e7b4ac008c66b34c57d7507c0e0 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Tue, 29 Sep 2020 16:59:37 +0800 Subject: [PATCH 10/41] netif_flow: support ixgbe pmd driver --- conf/dpvs.bond.conf.sample | 1 + conf/dpvs.conf.items | 1 + conf/dpvs.conf.sample | 1 + ...add-debug-log-for-ixgbe-fdir-setting.patch | 53 +++++++ ...2-Patch-ixgbe-fdir-rte_flow-for-DPVS.patch | 147 ++++++++++++++++++ src/config.mk | 5 + src/netif.c | 82 +++++++++- 7 files changed, 284 insertions(+), 6 deletions(-) create mode 100644 patch/dpdk-stable-18.11.2/0001-add-debug-log-for-ixgbe-fdir-setting.patch create mode 100644 patch/dpdk-stable-18.11.2/0002-Patch-ixgbe-fdir-rte_flow-for-DPVS.patch diff --git a/conf/dpvs.bond.conf.sample b/conf/dpvs.bond.conf.sample index 532af4c34..a03f2f0e1 100644 --- a/conf/dpvs.bond.conf.sample +++ b/conf/dpvs.bond.conf.sample @@ -22,6 +22,7 @@ global_defs { netif_defs { pktpool_size 1048575 pktpool_cache 256 + fdir_mode perfect device dpdk0 { rx { diff --git a/conf/dpvs.conf.items b/conf/dpvs.conf.items index db0047747..585f97813 100644 --- a/conf/dpvs.conf.items +++ b/conf/dpvs.conf.items @@ -22,6 +22,7 @@ global_defs { netif_defs { pktpool_size 2097151 <65535, 1023-134217728> pktpool_cache 256 <256, 32-8192> + fdir_mode perfect # only for ixgbe device dpdk0 { rx { diff --git a/conf/dpvs.conf.sample b/conf/dpvs.conf.sample index 76082c416..069fad9a8 100644 --- a/conf/dpvs.conf.sample +++ b/conf/dpvs.conf.sample @@ -22,6 +22,7 @@ global_defs { netif_defs { pktpool_size 1048575 pktpool_cache 256 + fdir_mode perfect device dpdk0 { rx { diff --git a/patch/dpdk-stable-18.11.2/0001-add-debug-log-for-ixgbe-fdir-setting.patch b/patch/dpdk-stable-18.11.2/0001-add-debug-log-for-ixgbe-fdir-setting.patch new file mode 100644 index 000000000..69293f320 --- /dev/null +++ b/patch/dpdk-stable-18.11.2/0001-add-debug-log-for-ixgbe-fdir-setting.patch @@ -0,0 +1,53 @@ +From ecd84bd29fd7eff2b8db4c04e92224929322a51f Mon Sep 17 00:00:00 2001 +From: wencyu +Date: Tue, 29 Sep 2020 14:42:52 +0800 +Subject: [PATCH 1/2] add debug log for ixgbe fdir setting + +--- + drivers/net/ixgbe/ixgbe_fdir.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +diff --git a/drivers/net/ixgbe/ixgbe_fdir.c b/drivers/net/ixgbe/ixgbe_fdir.c +index e559f0f..c17642a 100644 +--- a/drivers/net/ixgbe/ixgbe_fdir.c ++++ b/drivers/net/ixgbe/ixgbe_fdir.c +@@ -278,6 +278,14 @@ static void ixgbe_fdir_stats_get(struct rte_eth_dev *dev, + uint32_t fdiripv6m; /* IPv6 source and destination masks. */ + volatile uint32_t *reg; + ++ PMD_INIT_LOG(DEBUG, "%s: tci 0x%04x, ip4src 0x%08x, ip4dst 0x%08x, " ++ "ip6src 0x%04x, ip6dst 0x%04x, src_port 0x%04x, dst_port 0x%04x, " ++ "flex 0x%04x, mac_addr 0x%02x, tunid 0x%08x, tuntype 0x02%x\n", __func__, ++ info->mask.vlan_tci_mask, info->mask.src_ipv4_mask, info->mask.dst_ipv4_mask, ++ info->mask.src_ipv6_mask, info->mask.dst_ipv6_mask, info->mask.src_port_mask, ++ info->mask.dst_port_mask, info->mask.flex_bytes_mask, info->mask.mac_addr_byte_mask, ++ info->mask.tunnel_id_mask, info->mask.tunnel_type_mask); ++ + PMD_INIT_FUNC_TRACE(); + + /* +@@ -1242,6 +1250,21 @@ static void ixgbe_fdir_stats_get(struct rte_eth_dev *dev, + struct ixgbe_fdir_filter *node; + bool add_node = FALSE; + ++ PMD_DRV_LOG(DEBUG, "%s: ixgbe_fdir_rule: b_spec %d, b_mask %d, mode %d, flags 0x%08x, softid %d, " ++ "queue %d, flex_off %d....ixgbe_fdir: vm_pool %d, flow_type %d, vlan_id %d, dst_ip 0x%8x, src_ip " ++ "0x%8x, inner_mac %02x:%02x:%02x:%02x:%02x:%02x, tuntype 0x%4x, tni_vni 0x%08x, src_port 0x%04x, " ++ "dst_port 0x%04x, flexbytes %d, bkt_hash %d\n", __func__, rule->b_spec, rule->b_mask, ++ rule->mode, rule->fdirflags, rule->soft_id, rule->queue, rule->flex_bytes_offset, ++ rule->ixgbe_fdir.formatted.vm_pool, rule->ixgbe_fdir.formatted.flow_type, ++ rule->ixgbe_fdir.formatted.vlan_id, ++ *((uint32_t *)&rule->ixgbe_fdir.formatted.dst_ip[0]), ++ *((uint32_t *)&rule->ixgbe_fdir.formatted.src_ip[0]), ++ rule->ixgbe_fdir.formatted.inner_mac[0], rule->ixgbe_fdir.formatted.inner_mac[1], ++ rule->ixgbe_fdir.formatted.inner_mac[2], rule->ixgbe_fdir.formatted.inner_mac[3], ++ rule->ixgbe_fdir.formatted.inner_mac[4], rule->ixgbe_fdir.formatted.inner_mac[5], ++ rule->ixgbe_fdir.formatted.tunnel_type, rule->ixgbe_fdir.formatted.tni_vni, ++ rule->ixgbe_fdir.formatted.src_port, rule->ixgbe_fdir.formatted.dst_port, ++ rule->ixgbe_fdir.formatted.flex_bytes, rule->ixgbe_fdir.formatted.bkt_hash); + if (fdir_mode == RTE_FDIR_MODE_NONE || + fdir_mode != rule->mode) + return -ENOTSUP; +-- +1.8.3.1 + diff --git a/patch/dpdk-stable-18.11.2/0002-Patch-ixgbe-fdir-rte_flow-for-DPVS.patch b/patch/dpdk-stable-18.11.2/0002-Patch-ixgbe-fdir-rte_flow-for-DPVS.patch new file mode 100644 index 000000000..dd5d1c39a --- /dev/null +++ b/patch/dpdk-stable-18.11.2/0002-Patch-ixgbe-fdir-rte_flow-for-DPVS.patch @@ -0,0 +1,147 @@ +From 09a4a420427dda9084669512e7c9c95ebe8586f4 Mon Sep 17 00:00:00 2001 +From: wencyu +Date: Tue, 29 Sep 2020 14:45:08 +0800 +Subject: [PATCH 2/2] Patch ixgbe fdir rte_flow for DPVS. 1. Ignore fdir flow + rule priority attribute. 2. Use different fdir soft-id for flow rules + configured for the same queue. 3. Disable fdir mask settings by rte_flow. 4. + Allow IPv6 to pass flow rule ETH item validation. + +--- + drivers/net/ixgbe/ixgbe_flow.c | 62 ++++++++++++++++++++++++++++++++++-------- + 1 file changed, 51 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c +index f0fafeb..05dd5df 100644 +--- a/drivers/net/ixgbe/ixgbe_flow.c ++++ b/drivers/net/ixgbe/ixgbe_flow.c +@@ -1428,11 +1428,8 @@ const struct rte_flow_action *next_no_void_action( + + /* not supported */ + if (attr->priority) { +- memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); +- rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, +- attr, "Not support priority."); +- return -rte_errno; ++ PMD_DRV_LOG(WARNING, "Ixgbe fdir not support flow priority %d (only 0 is supported), " ++ "ignore and continue....\n", attr->priority); + } + + /* check if the first not void action is QUEUE or DROP. */ +@@ -1651,7 +1648,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + * value. So, we need not do anything for the not provided fields later. + */ + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); +- memset(&rule->mask, 0xFF, sizeof(struct ixgbe_hw_fdir_mask)); ++ memset(&rule->mask, 0, sizeof(struct ixgbe_hw_fdir_mask)); /* mask default zero */ + rule->mask.vlan_tci_mask = 0; + rule->mask.flex_bytes_mask = 0; + +@@ -1769,6 +1766,8 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + } + } else { + if (item->type != RTE_FLOW_ITEM_TYPE_IPV4 && ++ /* Signature mode supports IPv6. */ ++ item->type != RTE_FLOW_ITEM_TYPE_IPV6 && + item->type != RTE_FLOW_ITEM_TYPE_VLAN) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, +@@ -1897,6 +1896,9 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + rule->ixgbe_fdir.formatted.flow_type = + IXGBE_ATR_FLOW_TYPE_IPV6; + ++ /* Update flow rule mode by global param. */ ++ rule->mode = dev->data->dev_conf.fdir_conf.mode; ++ + /** + * 1. must signature match + * 2. not support last +@@ -2757,12 +2759,45 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + return ixgbe_parse_fdir_act_attr(attr, actions, rule, error); + } + ++static inline int ++ixgbe_fdir_rule_patch(struct rte_eth_dev *dev, struct ixgbe_fdir_rule *rule) ++{ ++ static uint32_t softid[IXGBE_MAX_RX_QUEUE_NUM] = { 0 }; ++ ++ if (!rule) ++ return 0; ++ ++ if (!dev || !dev->data) ++ return -EINVAL; ++ if (rule->queue >= IXGBE_MAX_RX_QUEUE_NUM) ++ return -EINVAL; ++ ++ /* Soft-id for different rx-queue should be different. */ ++ rule->soft_id = softid[rule->queue]++; ++ ++ /* Disable mask config from rte_flow. ++ * FIXME: ++ * Ixgbe only supports one global mask, all the masks should be the same. ++ * Generally, fdir masks should be configured globally before port start. ++ * But the rte_flow configures masks at flow creation. So we disable fdir ++ * mask configs in rte_flow and configure it globally when port start. ++ * Refer to `ixgbe_dev_start/ixgbe_fdir_configure` for details. The global ++ * masks are configured into device initially with user specified params. ++ */ ++ rule->b_mask = 0; ++ ++ /* Use user-defined mode. */ ++ rule->mode = dev->data->dev_conf.fdir_conf.mode; ++ ++ return 0; ++} ++ + static int + ixgbe_parse_fdir_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], +- struct ixgbe_fdir_rule *rule, ++ struct ixgbe_fdir_rule *rule, bool b_patch, + struct rte_flow_error *error) + { + int ret; +@@ -2796,13 +2831,18 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + rule->ixgbe_fdir.formatted.dst_port != 0)) + return -ENOTSUP; + +- if (fdir_mode == RTE_FDIR_MODE_NONE || +- fdir_mode != rule->mode) ++ if (fdir_mode == RTE_FDIR_MODE_NONE) + return -ENOTSUP; + + if (rule->queue >= dev->data->nb_rx_queues) + return -ENOTSUP; + ++ if (ret) ++ return ret; ++ ++ if (b_patch) ++ return ixgbe_fdir_rule_patch(dev, rule); ++ + return ret; + } + +@@ -3137,7 +3177,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + + memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); + ret = ixgbe_parse_fdir_filter(dev, attr, pattern, +- actions, &fdir_rule, error); ++ actions, &fdir_rule, true, error); + if (!ret) { + /* A mask cannot be deleted. */ + if (fdir_rule.b_mask) { +@@ -3307,7 +3347,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + + memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); + ret = ixgbe_parse_fdir_filter(dev, attr, pattern, +- actions, &fdir_rule, error); ++ actions, &fdir_rule, false, error); + if (!ret) + return 0; + +-- +1.8.3.1 + diff --git a/src/config.mk b/src/config.mk index c9871fc96..92f3dddb9 100644 --- a/src/config.mk +++ b/src/config.mk @@ -46,6 +46,11 @@ CFLAGS += -D DPVS_MAX_LCORE=64 #CFLAGS += -D CONFIG_DPVS_MP_DEBUG #CFLAGS += -D CONFIG_ICMP_REDIRECT_CORE +# for ixgbe nic +ifneq ($(CONFIG_MLX5), y) +CFLAGS += -D CONFIG_DPVS_FDIR +endif + ifeq ($(CONFIG_PDUMP), y) CFLAGS += -D CONFIG_DPVS_PDUMP endif diff --git a/src/netif.c b/src/netif.c index 5a3350a70..6ca922049 100644 --- a/src/netif.c +++ b/src/netif.c @@ -245,6 +245,31 @@ static void pktpool_cache_handler(vector_t tokens) FREE_PTR(str); } +#ifdef CONFIG_DPVS_FDIR +static enum rte_fdir_mode g_fdir_mode = RTE_FDIR_MODE_PERFECT; + +static void fdir_mode_handler(vector_t tokens) +{ + char *mode, *str = set_value(tokens); + + assert(str); + mode = strlwr(str); + + if (!strncmp(mode, "perfect", sizeof("perfect"))) + g_fdir_mode = RTE_FDIR_MODE_PERFECT; + else if (!strncmp(mode, "signature", sizeof("signature"))) + g_fdir_mode = RTE_FDIR_MODE_SIGNATURE; + else { + RTE_LOG(WARNING, NETIF, "invalid fdir_mode %s, using default %s\n", + mode, "perfect"); + g_fdir_mode = RTE_FDIR_MODE_PERFECT; + } + RTE_LOG(INFO, NETIF, "%s:g_fdir_mode = %s\n", mode); + + FREE_PTR(str); +} +#endif + static void device_handler(vector_t tokens) { assert(VECTOR_SIZE(tokens) >= 1); @@ -817,6 +842,9 @@ void netif_keyword_value_init(void) /* KW_TYPE_INIT keyword */ netif_pktpool_nb_mbuf = NETIF_PKTPOOL_NB_MBUF_DEF; netif_pktpool_mbuf_cache = NETIF_PKTPOOL_MBUF_CACHE_DEF; +#ifdef CONFIG_DPVS_FDIR + g_fdir_mode = RTE_FDIR_MODE_PERFECT; +#endif } /* KW_TYPE_NORMAL keyword */ } @@ -826,6 +854,9 @@ void install_netif_keywords(void) install_keyword_root("netif_defs", netif_defs_handler); install_keyword("pktpool_size", pktpool_size_handler, KW_TYPE_INIT); install_keyword("pktpool_cache", pktpool_cache_handler, KW_TYPE_INIT); +#ifdef CONFIG_DPVS_FDIR + install_keyword("fdir_mode", fdir_mode_handler, KW_TYPE_INIT); +#endif install_keyword("device", device_handler, KW_TYPE_INIT); install_sublevel(); install_keyword("rx", NULL, KW_TYPE_INIT); @@ -3417,6 +3448,24 @@ static int add_bond_slaves(struct netif_port *port) return EDPVS_OK; } +#ifdef CONFIG_DPVS_FDIR +static int config_fdir_conf(struct rte_fdir_conf *fdir_conf) +{ + int shift; + + /* how many mask bits needed? */ + for (shift = 0; (0x1<= 16) + return EDPVS_INVAL; + + fdir_conf->mask.dst_port_mask = htons(~((~0x0) << shift)); + fdir_conf->mode = g_fdir_mode; + + return EDPVS_OK; +} +#endif + /* * Note: Invoke the function after port is allocated and lcores are configured. */ @@ -3449,14 +3498,13 @@ int netif_port_start(struct netif_port *port) } // device configure -<<<<<<< HEAD - if ((ret = netif_port_fdir_dstport_mask_set(port)) != EDPVS_OK) - return ret; if ((ret = rte_eth_dev_set_mtu(port->id,port->mtu)) != EDPVS_OK) return ret; - -======= ->>>>>>> sapool: replace flow director with rte_flow +#ifdef CONFIG_DPVS_FDIR + ret = config_fdir_conf(&port->dev_conf.fdir_conf); + if (ret != EDPVS_OK) + return ret; +#endif if (port->flag & NETIF_PORT_FLAG_TX_IP_CSUM_OFFLOAD) port->dev_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM; if (port->flag & NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD) @@ -3745,6 +3793,28 @@ static struct rte_eth_conf default_port_conf = { .txmode = { .mq_mode = ETH_MQ_TX_NONE, }, +#ifdef CONFIG_DPVS_FDIR + .fdir_conf = { + .mode = RTE_FDIR_MODE_PERFECT, /* maybe changed by config file */ + .pballoc = RTE_FDIR_PBALLOC_64K, + .status = RTE_FDIR_REPORT_STATUS, + .mask = { + .ipv4_mask = { + .dst_ip = 0xFFFFFFFF, + }, + .ipv6_mask = { + .dst_ip = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }, + }, + /* to be changed according to slave lcore number in use */ + .dst_port_mask = 0x0700, + }, + .drop_queue = 127, + .flex_conf = { + .nb_payloads = 0, + .nb_flexmasks = 0, + }, + }, +#endif }; int netif_print_port_conf(const struct rte_eth_conf *port_conf, char *buf, int *len) From 6f7b7122add0f8b5ec22f36ac95945d366599276 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Tue, 29 Sep 2020 17:00:57 +0800 Subject: [PATCH 11/41] sapool: fix flow mask byte order problem --- src/sa_pool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sa_pool.c b/src/sa_pool.c index a0909339d..366f8d196 100644 --- a/src/sa_pool.c +++ b/src/sa_pool.c @@ -170,7 +170,7 @@ static int sa_pool_add_filter(struct inet_ifaddr *ifa, struct sa_pool *ap, return EDPVS_OK; err = netif_sapool_flow_add(ifa->idev->dev, cid, ifa->af, &ifa->addr, - flow->port_base, flow->mask, &flow_handlers); + flow->port_base, htons(flow->mask), &flow_handlers); ap->flow_num = flow_handlers.flow_num; return err; @@ -191,7 +191,7 @@ static int sa_pool_del_filter(struct inet_ifaddr *ifa, struct sa_pool *ap, return EDPVS_OK; return netif_sapool_flow_del(ifa->idev->dev, cid, ifa->af, &ifa->addr, - flow->port_base, flow->mask, &flow_handlers); + flow->port_base, htons(flow->mask), &flow_handlers); } int sa_pool_create(struct inet_ifaddr *ifa, uint16_t low, uint16_t high) From f10db1e50b4cbe732ee6d93c7cea7634fcecf2da Mon Sep 17 00:00:00 2001 From: Vipin Varghese Date: Mon, 25 Jan 2021 22:15:21 +0530 Subject: [PATCH 12/41] makefile: update meson build for DPDK dpdk build infrastructure has moved out of Makefile to meson. Adding meson build support for extracting cflags and libs for meson installed pkg config path. Mitigate the error for inline function definition missing when not present in soruce c file. Signed-off-by: Vipin Varghese --- include/ipvs/kcompat.h | 4 ++-- src/Makefile | 7 +++++-- src/dpdk.mk | 7 +++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/include/ipvs/kcompat.h b/include/ipvs/kcompat.h index 2203b9312..2ffd760c9 100644 --- a/include/ipvs/kcompat.h +++ b/include/ipvs/kcompat.h @@ -53,7 +53,7 @@ * * Undefined if no bit exists, so code should check against 0 first. */ -inline unsigned long __ffs(unsigned long word); +unsigned long __ffs(unsigned long word); /** * fls - find last (most-significant) bit set @@ -62,7 +62,7 @@ inline unsigned long __ffs(unsigned long word); * This is defined the same way as ffs. * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -inline int fls(unsigned int x); +int fls(unsigned int x); /** * taken from definition in include/linux/gcd.h diff --git a/src/Makefile b/src/Makefile index 02f288c31..f2aefdbe1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -34,10 +34,14 @@ DATE_STRING := $(shell date +%Y.%m.%d.%H:%M:%S) # same path of THIS Makefile SRCDIR := $(dir $(realpath $(firstword $(MAKEFILE_LIST)))) +ifeq ($(shell pkg-config --exists libdpdk && echo 0),0) +else ifeq ($(RTE_SDK),) $(error "The variable RTE_SDK is not defined.") endif include $(RTE_SDK)/mk/rte.vars.mk +LIBS += -lpthread -lnuma +endif include $(SRCDIR)/config.mk include $(SRCDIR)/dpdk.mk @@ -62,7 +66,6 @@ else CFLAGS += -rdynamic endif -LIBS += -lpthread -lnuma CFLAGS += $(INCDIRS) $(LIBS) @@ -73,7 +76,7 @@ all: $(TARGET) $(TARGET): $(OBJS) @echo " $(notdir $@)" - $(Q)$(CC) $(CFLAGS) $^ -o $@ + $(Q)$(CC) $(CFLAGS) $^ $(LIBS) -o $@ %.o: %.c @echo " $(notdir $@)" diff --git a/src/dpdk.mk b/src/dpdk.mk index c96e75c7f..816c6c649 100644 --- a/src/dpdk.mk +++ b/src/dpdk.mk @@ -14,6 +14,12 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # +ifeq ($(shell pkg-config --exists libdpdk && echo 0),0) + +CFLAGS += -DALLOW_EXPERIMENTAL_API -static $(shell pkg-config --cflags libdpdk) +LIBS += $(shell pkg-config --libs --static libdpdk) + +else ifeq ($(RTE_SDK),) $(error "The variable RTE_SDK is not defined.") @@ -58,3 +64,4 @@ LIBS += -Wl,--whole-archive -lrte_pmd_mlx5 -Wl,--no-whole-archive LIBS += -libverbs -lmlx5 -lmnl endif +endif From b8da933b957b2a13bbf4f380a133dac37a3ad91e Mon Sep 17 00:00:00 2001 From: Vipin Varghese Date: Mon, 25 Jan 2021 22:29:43 +0530 Subject: [PATCH 13/41] doc: update the README for meson build update the steps for building with meson-ninja for DPDK and install path. Signed-off-by: Vipin Varghese --- README.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8db4504eb..5d7411612 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,18 @@ $ ... ### DPDK build and install -Now build DPDK and export `RTE_SDK` env variable for DPDK app (DPVS). +1. Using meson-ninja for building DPDK libraries. The `dpdk.mk` checks for presence of libdpdk + +```bash +$ cd dpdk-stable-18.11.11 +$ mkdir [user desired install folder - dpdklib] +$ mkdir [user desired build folder - dpdkbuild] +$ meson -Dprefix=[dpdklib] dpdkbuild +$ ninja -C dpdkbuild +$ cd dpdkbuild; ninja install +``` + +2. Using Makefile for build DPDK linraries and export `RTE_SDK` env variable for DPDK app (DPVS). ```bash $ cd dpdk-stable-18.11.2/ From 6741ce9083e16098ea4f5e3f4fb4cfe5f76cf1e8 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Mon, 19 Apr 2021 20:42:42 +0800 Subject: [PATCH 14/41] fix meson build failure problem Signed-off-by: ywc689 --- README.md | 1 + src/Makefile | 4 ++-- src/config.mk | 4 ++++ src/dpdk.mk | 21 ++++++++++----------- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 5d7411612..d255c50ee 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,7 @@ $ mkdir [user desired build folder - dpdkbuild] $ meson -Dprefix=[dpdklib] dpdkbuild $ ninja -C dpdkbuild $ cd dpdkbuild; ninja install +$ export PKG_CONFIG_PATH=${PKG_CONFIG_PATH:+$PKG_CONFIG_PATH:}$(pwd)/../dpdklib/lib64/pkgconfig/libdpdk.pc ``` 2. Using Makefile for build DPDK linraries and export `RTE_SDK` env variable for DPDK app (DPVS). diff --git a/src/Makefile b/src/Makefile index f2aefdbe1..f9abe073a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -40,7 +40,7 @@ ifeq ($(RTE_SDK),) $(error "The variable RTE_SDK is not defined.") endif include $(RTE_SDK)/mk/rte.vars.mk -LIBS += -lpthread -lnuma +LIBS += -lpthread -lnuma -lrt -lm -ldl -lcrypto -lpcap endif include $(SRCDIR)/config.mk @@ -67,7 +67,7 @@ else endif -CFLAGS += $(INCDIRS) $(LIBS) +CFLAGS += $(INCDIRS) OBJS := $(shell find $(SRCDIR) -name '*.c' | sort) OBJS := $(patsubst %.c,%.o,$(OBJS)) diff --git a/src/config.mk b/src/config.mk index 92f3dddb9..5142e00d0 100644 --- a/src/config.mk +++ b/src/config.mk @@ -58,3 +58,7 @@ endif GCC_MAJOR = $(shell echo __GNUC__ | $(CC) -E -x c - | tail -n 1) GCC_MINOR = $(shell echo __GNUC_MINOR__ | $(CC) -E -x c - | tail -n 1) GCC_VERSION = $(GCC_MAJOR)$(GCC_MINOR) + +ifeq ($(CONFIG_MLX5), y) +LIBS += -libverbs -lmlx5 -lmnl +endif diff --git a/src/dpdk.mk b/src/dpdk.mk index 816c6c649..1108be74a 100644 --- a/src/dpdk.mk +++ b/src/dpdk.mk @@ -35,17 +35,17 @@ CFLAGS += -include $(DPDKDIR)/include/rte_config.h LIBS += -L $(DPDKDIR)/lib -LIBS += -Wl,--no-as-needed -fvisibility=default \ - -Wl,--whole-archive -lrte_pmd_vmxnet3_uio -lrte_pmd_i40e -lrte_pmd_ixgbe -lrte_pmd_ena \ - -lrte_pmd_e1000 -lrte_pmd_bnxt -lrte_pmd_ring -lrte_pmd_bond -lrte_ethdev -lrte_ip_frag \ - -Wl,--whole-archive -lrte_hash -lrte_kvargs -Wl,-lrte_mbuf -lrte_eal \ - -Wl,-lrte_mempool -lrte_ring -lrte_cmdline -lrte_cfgfile -lrte_kni \ - -lrte_mempool_ring -lrte_timer -lrte_net -Wl,-lrte_pmd_virtio \ +LIBS += -Wl,--no-as-needed -fvisibility=default -Wl,--whole-archive + +LIBS += -lrte_pmd_vmxnet3_uio -lrte_pmd_i40e -lrte_pmd_ixgbe -lrte_pmd_ena \ + -lrte_pmd_e1000 -lrte_pmd_bnxt -lrte_pmd_ring -lrte_pmd_bond \ + -lrte_ethdev -lrte_ip_frag -lrte_hash -lrte_kvargs -lrte_mbuf \ + -lrte_eal -lrte_mempool -lrte_ring -lrte_cmdline -lrte_cfgfile \ + -lrte_kni -lrte_mempool_ring -lrte_timer -lrte_net -lrte_pmd_virtio \ -lrte_pci -lrte_bus_pci -lrte_bus_vdev -lrte_lpm -lrte_pdump \ - -Wl,--no-whole-archive -lrt -lm -ldl -lcrypto ifeq ($(CONFIG_PDUMP), y) -LIBS += -Wl,--whole-archive -lrte_acl -lrte_member -lrte_eventdev -lrte_reorder -lrte_cryptodev \ +LIBS += -lrte_acl -lrte_member -lrte_eventdev -lrte_reorder -lrte_cryptodev \ -lrte_vhost -lrte_pmd_pcap ifneq ("$(wildcard $(RTE_SDK)/$(RTE_TARGET)/lib/librte_bus_vmbus.a)", "") @@ -56,12 +56,11 @@ ifneq ("$(wildcard $(RTE_SDK)/$(RTE_TARGET)/lib/librte_pmd_netvsc.a)", "") LIBS += -lrte_pmd_netvsc endif -LIBS += -Wl,--no-whole-archive -lpcap endif ifeq ($(CONFIG_MLX5), y) -LIBS += -Wl,--whole-archive -lrte_pmd_mlx5 -Wl,--no-whole-archive -LIBS += -libverbs -lmlx5 -lmnl +LIBS += -lrte_pmd_mlx5 endif +LIBS += -Wl,--no-whole-archive endif From ff650eb71800fb253c88dc0ca67d85643610e362 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Mon, 26 Apr 2021 13:08:04 +0800 Subject: [PATCH 15/41] merge dpdk-stable-20.11.x (abandon dpdk-stable-18.11.x) Signed-off-by: ywc689 --- include/conf/eal_mem.h | 4 +- include/conf/neigh.h | 20 +- include/dpdk.h | 1 + include/ipv4.h | 16 +- include/ipvs/conn.h | 8 +- include/ipvs/nat64.h | 2 +- include/ipvs/proto_tcp.h | 4 +- include/ipvs/proto_udp.h | 4 +- include/mbuf.h | 34 ++++ include/neigh.h | 8 +- include/netif.h | 17 +- include/netif_addr.h | 18 +- src/ctrl.c | 2 +- src/dpdk.mk | 7 +- src/eal_mem.c | 15 +- src/icmp.c | 30 +-- src/iftraf.c | 2 +- src/inetaddr.c | 36 ++-- src/ip_gre.c | 2 +- src/ip_tunnel.c | 10 +- src/ipip.c | 2 +- src/ipset.c | 8 +- src/ipv4.c | 63 +++---- src/ipv4_frag.c | 18 +- src/ipv6/icmp6.c | 8 +- src/ipv6/ipv6.c | 48 ++--- src/ipv6/ipv6_exthdrs.c | 2 +- src/ipv6/ndisc.c | 22 +-- src/ipv6/route6.c | 12 +- src/ipv6/route6_lpm.c | 2 +- src/ipvs/ip_vs_blklst.c | 12 +- src/ipvs/ip_vs_conhash.c | 4 +- src/ipvs/ip_vs_conn.c | 12 +- src/ipvs/ip_vs_core.c | 48 ++--- src/ipvs/ip_vs_dest.c | 2 +- src/ipvs/ip_vs_laddr.c | 4 +- src/ipvs/ip_vs_nat64.c | 12 +- src/ipvs/ip_vs_proto_tcp.c | 38 ++-- src/ipvs/ip_vs_proto_udp.c | 69 +++---- src/ipvs/ip_vs_service.c | 18 +- src/ipvs/ip_vs_synproxy.c | 64 +++---- src/ipvs/ip_vs_whtlst.c | 12 +- src/ipvs/ip_vs_xmit.c | 364 ++++++++++++++++++------------------- src/kni.c | 18 +- src/log.c | 11 +- src/main.c | 2 + src/mbuf.c | 49 ++++- src/mempool.c | 4 +- src/neigh.c | 104 +++++------ src/netif.c | 112 ++++++------ src/netif_addr.c | 22 +-- src/pdump.c | 2 +- src/route.c | 10 +- src/sa_pool.c | 6 +- src/scheduler.c | 2 +- src/tc/cls_match.c | 2 +- src/tc/sch_pfifo_fast.c | 2 +- src/tc/tc.c | 2 +- src/timer.c | 12 +- src/vlan.c | 12 +- tools/dpip/eal_mem.c | 8 +- 61 files changed, 781 insertions(+), 683 deletions(-) diff --git a/include/conf/eal_mem.h b/include/conf/eal_mem.h index 6506ca9a3..bab2410f5 100644 --- a/include/conf/eal_mem.h +++ b/include/conf/eal_mem.h @@ -35,7 +35,7 @@ enum { }; typedef struct eal_mem_seg_ret_s { - uint64_t phys_addr; + uint64_t iova; uint64_t virt_addr; uint64_t len; uint64_t hugepage_sz; @@ -52,7 +52,7 @@ typedef struct eal_all_mem_seg_ret_s { typedef struct eal_mem_zone_ret_s { char name[EAL_MEM_NAME_LEN]; - uint64_t phys_addr; + uint64_t iova; uint64_t virt_addr; uint64_t len; uint64_t hugepage_sz; diff --git a/include/conf/neigh.h b/include/conf/neigh.h index d4881030d..afd874d06 100644 --- a/include/conf/neigh.h +++ b/include/conf/neigh.h @@ -33,14 +33,18 @@ enum { }; struct dp_vs_neigh_conf { - int af; - uint8_t flag; - uint32_t state; - union inet_addr ip_addr; - struct ether_addr eth_addr; - uint32_t que_num; - char ifname[IFNAMSIZ]; - uint8_t cid; + int af; + uint8_t flag; + uint32_t state; + union inet_addr ip_addr; +#ifdef __DPVS__ + struct rte_ether_addr eth_addr; +#else + struct ether_addr eth_addr; +#endif + uint32_t que_num; + char ifname[IFNAMSIZ]; + uint8_t cid; }__attribute__((__packed__)); struct dp_vs_neigh_conf_array { diff --git a/include/dpdk.h b/include/dpdk.h index 81d6465b3..2fdcd418d 100644 --- a/include/dpdk.h +++ b/include/dpdk.h @@ -57,6 +57,7 @@ #include #include #include +#include #include "mbuf.h" #ifdef CONFIG_DPVS_PDUMP #include diff --git a/include/ipv4.h b/include/ipv4.h index ce3fb3b63..cf95882f2 100644 --- a/include/ipv4.h +++ b/include/ipv4.h @@ -45,8 +45,8 @@ int ipv4_output(struct rte_mbuf *mbuf); * Transport Protocols */ struct inet_protocol { - /* mbuf->userdata can be used to get IPv4 header, - * save it if protocols need ->userdata for other purpose. */ + /* mbuf userdata (MBUF_FIELD_PROTO) can be used to get IPv4 header, + * save it if protocols need mbuf userdata (MBUF_FIELD_PROTO) for other purpose. */ int (*handler)(struct rte_mbuf *mbuf); }; @@ -117,15 +117,15 @@ struct ip4_stats; int ipv4_get_stats(struct ip4_stats *stats); int ip4_defrag(struct rte_mbuf *mbuf, int user); -uint32_t ip4_select_id(struct ipv4_hdr *iph); +uint32_t ip4_select_id(struct rte_ipv4_hdr *iph); int ipv4_local_out(struct rte_mbuf *mbuf); int ipv4_rcv_fin(struct rte_mbuf *mbuf); /* helper functions */ -static inline struct ipv4_hdr *ip4_hdr(const struct rte_mbuf *mbuf) +static inline struct rte_ipv4_hdr *ip4_hdr(const struct rte_mbuf *mbuf) { /* can only invoked at L3 */ - return rte_pktmbuf_mtod(mbuf, struct ipv4_hdr *); + return rte_pktmbuf_mtod(mbuf, struct rte_ipv4_hdr *); } static inline int ip4_hdrlen(const struct rte_mbuf *mbuf) @@ -133,16 +133,16 @@ static inline int ip4_hdrlen(const struct rte_mbuf *mbuf) return (ip4_hdr(mbuf)->version_ihl & 0xf) << 2; } -static inline void ip4_send_csum(struct ipv4_hdr *iph) +static inline void ip4_send_csum(struct rte_ipv4_hdr *iph) { iph->hdr_checksum = 0; iph->hdr_checksum = rte_ipv4_cksum(iph); } -static inline bool ip4_is_frag(struct ipv4_hdr *iph) +static inline bool ip4_is_frag(struct rte_ipv4_hdr *iph) { return (iph->fragment_offset - & htons(IPV4_HDR_MF_FLAG | IPV4_HDR_OFFSET_MASK)) != 0; + & htons(RTE_IPV4_HDR_MF_FLAG | RTE_IPV4_HDR_OFFSET_MASK)) != 0; } #endif /* __DPVS_IPV4_H__ */ diff --git a/include/ipvs/conn.h b/include/ipvs/conn.h index cb64d9707..b80acabef 100644 --- a/include/ipvs/conn.h +++ b/include/ipvs/conn.h @@ -120,10 +120,10 @@ struct dp_vs_conn { struct rte_mbuf *mbuf); /* L2 fast xmit */ - struct ether_addr in_smac; - struct ether_addr in_dmac; - struct ether_addr out_smac; - struct ether_addr out_dmac; + struct rte_ether_addr in_smac; + struct rte_ether_addr in_dmac; + struct rte_ether_addr out_smac; + struct rte_ether_addr out_dmac; /* route for neigbour */ struct netif_port *in_dev; /* inside to rs*/ diff --git a/include/ipvs/nat64.h b/include/ipvs/nat64.h index 4f397fe63..eb1017171 100644 --- a/include/ipvs/nat64.h +++ b/include/ipvs/nat64.h @@ -28,7 +28,7 @@ static inline int mbuf_nat6to4_len(struct rte_mbuf *mbuf) int len; offset = ip6_skip_exthdr(mbuf, offset, &nexthdr); - len = mbuf->pkt_len - offset + sizeof(struct ipv4_hdr); + len = mbuf->pkt_len - offset + sizeof(struct rte_ipv4_hdr); return len; } diff --git a/include/ipvs/proto_tcp.h b/include/ipvs/proto_tcp.h index 21ee6ef48..9f5162a85 100644 --- a/include/ipvs/proto_tcp.h +++ b/include/ipvs/proto_tcp.h @@ -100,8 +100,8 @@ struct tcp_state { #define sSA DPVS_TCP_S_SYNACK struct tcphdr *tcp_hdr(const struct rte_mbuf *mbuf); -void tcp4_send_csum(struct ipv4_hdr *iph, struct tcphdr *th); -void tcp6_send_csum(struct ipv6_hdr *iph, struct tcphdr *th); +void tcp4_send_csum(struct rte_ipv4_hdr *iph, struct tcphdr *th); +void tcp6_send_csum(struct rte_ipv6_hdr *iph, struct tcphdr *th); struct rte_mempool *get_mbuf_pool(const struct dp_vs_conn *conn, int dir); void install_proto_tcp_keywords(void); void tcp_keyword_value_init(void); diff --git a/include/ipvs/proto_udp.h b/include/ipvs/proto_udp.h index 3e379352e..66881f0ce 100644 --- a/include/ipvs/proto_udp.h +++ b/include/ipvs/proto_udp.h @@ -30,7 +30,7 @@ extern int g_defence_udp_drop; void install_proto_udp_keywords(void); void udp_keyword_value_init(void); -void udp4_send_csum(struct ipv4_hdr *iph, struct udp_hdr *uh); -void udp6_send_csum(struct ipv6_hdr *iph, struct udp_hdr *uh); +void udp4_send_csum(struct rte_ipv4_hdr *iph, struct rte_udp_hdr *uh); +void udp6_send_csum(struct rte_ipv6_hdr *iph, struct rte_udp_hdr *uh); #endif diff --git a/include/mbuf.h b/include/mbuf.h index aac4651a4..577de0ff2 100644 --- a/include/mbuf.h +++ b/include/mbuf.h @@ -39,6 +39,30 @@ s != NULL; \ s = n, n = s ? s->next : NULL) +#define MBUF_USERDATA(m, type, field) \ + (*((type *)(mbuf_userdata((m), (field))))) + +#define MBUF_USERDATA_CONST(m, type, field) \ + (*((type *)(mbuf_userdata_const((m), (field))))) + +typedef union { + void *hdr; + struct { + uint64_t l2_len:RTE_MBUF_L2_LEN_BITS; /* L2 Header Length */ + uint64_t l3_len:RTE_MBUF_L3_LEN_BITS; /* L3 Header Length */ + uint64_t l4_len:RTE_MBUF_L4_LEN_BITS; /* L4 Header Length */ + uint64_t outer_l2_len:RTE_MBUF_OUTL2_LEN_BITS; /* Outer L2 Header Length */ + uint64_t outer_l3_len:RTE_MBUF_OUTL3_LEN_BITS; /* Outer L3 Header Length */ + }; +} mbuf_userdata_field_proto_t; + +typedef void * mbuf_userdata_field_route_t; + +typedef enum { + MBUF_FIELD_PROTO = 0, + MBUF_FIELD_ROUTE, +} mbuf_usedata_field_t; + /** * mbuf_copy_bits - copy bits from mbuf to buffer. * see skb_copy_bits(). @@ -123,4 +147,14 @@ void mbuf_copy_metadata(struct rte_mbuf *mi, struct rte_mbuf *m); inline void dp_vs_mbuf_dump(const char *msg, int af, const struct rte_mbuf *mbuf); #endif +void *mbuf_userdata(struct rte_mbuf *, mbuf_usedata_field_t); +void *mbuf_userdata_const(const struct rte_mbuf *, mbuf_usedata_field_t); + +static inline void mbuf_userdata_reset(struct rte_mbuf *m) +{ + memset(m, 0, sizeof(m->dynfield1)); +} + +int mbuf_init(void); + #endif /* __DP_VS_MBUF_H__ */ diff --git a/include/neigh.h b/include/neigh.h index 3590fc641..f29f6f30d 100644 --- a/include/neigh.h +++ b/include/neigh.h @@ -54,7 +54,7 @@ struct neighbour_entry { int af; struct list_head neigh_list; union inet_addr ip_addr; - struct ether_addr eth_addr; + struct rte_ether_addr eth_addr; struct netif_port *port; struct dpvs_timer timer; struct list_head queue_list; @@ -89,7 +89,7 @@ struct neighbour_entry *neigh_lookup_entry(int af, const union inet_addr *key, void neigh_send_mbuf_cach(struct neighbour_entry *neighbour); int neigh_edit(struct neighbour_entry *neighbour, - struct ether_addr *eth_addr); + struct rte_ether_addr *eth_addr); int neigh_init(void); @@ -105,7 +105,7 @@ int neigh_output(int af, struct netif_port *port); struct neighbour_entry *neigh_add_table(int af, const union inet_addr *ipaddr, - const struct ether_addr *eth_addr, + const struct rte_ether_addr *eth_addr, struct netif_port *port, unsigned int hashkey, int flag); @@ -118,7 +118,7 @@ void neigh_confirm(int af, union inet_addr *nexthop, struct netif_port *port); int neigh_sync_core(const void *param, bool add_del, enum param_kind kind); static inline void ipv6_mac_mult(const struct in6_addr *mult_target, - struct ether_addr *mult_eth) + struct rte_ether_addr *mult_eth) { uint8_t *w = (uint8_t *)mult_eth; w[0] = 0x33; diff --git a/include/netif.h b/include/netif.h index 28c5e0dc1..8338f0ab6 100644 --- a/include/netif.h +++ b/include/netif.h @@ -18,6 +18,7 @@ #ifndef __DPVS_NETIF_H__ #define __DPVS_NETIF_H__ #include +#include #include "list.h" #include "dpdk.h" #include "inetaddr.h" @@ -166,12 +167,12 @@ typedef enum { } port_type_t; struct netif_kni { - char name[IFNAMSIZ]; - struct rte_kni *kni; - struct ether_addr addr; - struct dpvs_timer kni_rtnl_timer; - int kni_rtnl_fd; - struct rte_ring *rx_ring; + char name[IFNAMSIZ]; + struct rte_kni * kni; + struct rte_ether_addr addr; + struct dpvs_timer kni_rtnl_timer; + int kni_rtnl_fd; + struct rte_ring * rx_ring; } __rte_cache_aligned; union netif_bond { @@ -201,7 +202,7 @@ struct netif_ops { struct netif_hw_addr { struct list_head list; - struct ether_addr addr; + struct rte_ether_addr addr; rte_atomic32_t refcnt; /* * - sync only once! @@ -233,7 +234,7 @@ struct netif_port { int ntxq; /* tx queue numbe */ uint16_t rxq_desc_nb; /* rx queue descriptor number */ uint16_t txq_desc_nb; /* tx queue descriptor number */ - struct ether_addr addr; /* MAC address */ + struct rte_ether_addr addr; /* MAC address */ struct netif_hw_addr_list mc; /* HW multicast list */ int socket; /* socket id */ int hw_header_len; /* HW header length */ diff --git a/include/netif_addr.h b/include/netif_addr.h index 929395ec3..1a6b97d71 100644 --- a/include/netif_addr.h +++ b/include/netif_addr.h @@ -25,16 +25,16 @@ #define __DPVS_NETIF_ADDR_H__ #include "netif.h" -int __netif_mc_add(struct netif_port *dev, const struct ether_addr *addr); -int __netif_mc_del(struct netif_port *dev, const struct ether_addr *addr); -int netif_mc_add(struct netif_port *dev, const struct ether_addr *addr); -int netif_mc_del(struct netif_port *dev, const struct ether_addr *addr); +int __netif_mc_add(struct netif_port *dev, const struct rte_ether_addr *addr); +int __netif_mc_del(struct netif_port *dev, const struct rte_ether_addr *addr); +int netif_mc_add(struct netif_port *dev, const struct rte_ether_addr *addr); +int netif_mc_del(struct netif_port *dev, const struct rte_ether_addr *addr); void netif_mc_flush(struct netif_port *dev); void netif_mc_init(struct netif_port *dev); int __netif_mc_dump(struct netif_port *dev, - struct ether_addr *addrs, size_t *naddr); + struct rte_ether_addr *addrs, size_t *naddr); int netif_mc_dump(struct netif_port *dev, - struct ether_addr *addrs, size_t *naddr); + struct rte_ether_addr *addrs, size_t *naddr); int __netif_mc_print(struct netif_port *dev, char *buf, int *len, int *pnaddr); int netif_mc_print(struct netif_port *dev, @@ -50,8 +50,8 @@ int netif_mc_sync_multiple(struct netif_port *to, struct netif_port *from); int __netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from); int netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from); -static inline int eth_addr_equal(const struct ether_addr *addr1, - const struct ether_addr *addr2) +static inline int eth_addr_equal(const struct rte_ether_addr *addr1, + const struct rte_ether_addr *addr2) { const uint16_t *a = (const uint16_t *)addr1; const uint16_t *b = (const uint16_t *)addr2; @@ -59,7 +59,7 @@ static inline int eth_addr_equal(const struct ether_addr *addr1, return ((a[0]^b[0]) | (a[1]^b[1]) | (a[2]^b[2])) == 0; } -static inline char *eth_addr_dump(const struct ether_addr *ea, +static inline char *eth_addr_dump(const struct rte_ether_addr *ea, char *buf, size_t size) { snprintf(buf, size, "%02x:%02x:%02x:%02x:%02x:%02x", diff --git a/src/ctrl.c b/src/ctrl.c index c3a61b894..9487e70c8 100644 --- a/src/ctrl.c +++ b/src/ctrl.c @@ -1058,7 +1058,7 @@ static inline int msg_init(void) /* lcore mask init */ slave_lcore_mask = 0; slave_lcore_nb = 0; - master_lcore = rte_get_master_lcore(); + master_lcore = rte_get_main_lcore(); netif_get_slave_lcores(&slave_lcore_nb, &slave_lcore_mask); if (slave_lcore_nb > MSG_MAX_LCORE_SUPPORTED) { diff --git a/src/dpdk.mk b/src/dpdk.mk index 1108be74a..cd9d995a2 100644 --- a/src/dpdk.mk +++ b/src/dpdk.mk @@ -16,8 +16,11 @@ # ifeq ($(shell pkg-config --exists libdpdk && echo 0),0) -CFLAGS += -DALLOW_EXPERIMENTAL_API -static $(shell pkg-config --cflags libdpdk) -LIBS += $(shell pkg-config --libs --static libdpdk) +CFLAGS += -DALLOW_EXPERIMENTAL_API $(shell pkg-config --cflags libdpdk) +LIBS += $(shell pkg-config --static --libs libdpdk) + +# FIXME: DPVS would link if not specified the following PMD libraries. +LIBS += -l:librte_bus_vdev.a -l:librte_net_bond.a else diff --git a/src/eal_mem.c b/src/eal_mem.c index 44dd468fc..2574d0e36 100644 --- a/src/eal_mem.c +++ b/src/eal_mem.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "conf/eal_mem.h" #include "eal_mem.h" #include "ctrl.h" @@ -60,7 +61,7 @@ static int dp_vs_fill_mem_seg_info(const struct rte_memseg_list *msl, const stru seg_ret = &eal_mem_segs->seg_info[eal_mem_segs->seg_num]; eal_mem_segs->seg_num++; - seg_ret->phys_addr = ms->phys_addr; + seg_ret->iova = ms->iova; seg_ret->virt_addr = ms->addr_64; seg_ret->len = ms->len; seg_ret->hugepage_sz = ms->hugepage_sz; @@ -84,7 +85,7 @@ static void dp_vs_fill_mem_zone_info(const struct rte_memzone *mz, void *arg) eal_mem_zones->zone_num++; memcpy(zone_ret->name, mz->name, EAL_MEM_NAME_LEN); - zone_ret->phys_addr = mz->phys_addr; + zone_ret->iova = mz->iova; zone_ret->virt_addr = mz->addr_64; zone_ret->len = mz->len; zone_ret->hugepage_sz = mz->hugepage_sz; @@ -110,7 +111,7 @@ static int dp_vs_get_eal_mem_seg(eal_all_mem_seg_ret_t *eal_mem_segs) } seg_ret = &eal_mem_segs->seg_info[eal_mem_segs->seg_num]; eal_mem_segs->seg_num++; - seg_ret->phys_addr = mcfg->memseg[i].phys_addr; + seg_ret->iova = mcfg->memseg[i].iova; seg_ret->virt_addr = mcfg->memseg[i].addr_64; seg_ret->len = mcfg->memseg[i].len; seg_ret->hugepage_sz = mcfg->memseg[i].hugepage_sz; @@ -171,7 +172,7 @@ static int dp_vs_get_eal_mem_pool(eal_all_mem_pool_ret_t *eal_mem_pools) if (NULL == mempool_list) return -1; - rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK); + rte_mcfg_mempool_read_lock(); eal_mem_pools->mempool_num = 0; TAILQ_FOREACH(te, mempool_list, next) { mp = (struct rte_mempool *) te->data; @@ -186,7 +187,7 @@ static int dp_vs_get_eal_mem_pool(eal_all_mem_pool_ret_t *eal_mem_pools) mempool_ret->trailer_size = mp->trailer_size; mempool_ret->private_data_size = mp->private_data_size; } - rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK); + rte_mcfg_mempool_read_unlock(); return 0; } @@ -201,7 +202,7 @@ static int dp_vs_get_eal_mem_ring(eal_all_mem_ring_ret_t *eal_mem_rings) ring_list = RTE_TAILQ_LOOKUP("RTE_RING", rte_ring_list); - rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + rte_mcfg_tailq_read_lock(); eal_mem_rings->ring_num = 0; TAILQ_FOREACH(te, ring_list, next) { r = (struct rte_ring *)te->data; @@ -217,7 +218,7 @@ static int dp_vs_get_eal_mem_ring(eal_all_mem_ring_ret_t *eal_mem_rings) ring_ret->used = rte_ring_count(r); ring_ret->avail = rte_ring_free_count(r); } - rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + rte_mcfg_tailq_read_unlock(); return 0; } diff --git a/src/icmp.c b/src/icmp.c index 240311fc8..81d7598d4 100644 --- a/src/icmp.c +++ b/src/icmp.c @@ -39,7 +39,7 @@ struct icmp_ctrl { #ifdef CONFIG_DPVS_ICMP_DEBUG static void icmp_dump_hdr(const struct rte_mbuf *mbuf) { - struct icmp_hdr *ich = rte_pktmbuf_mtod(mbuf, struct icmp_hdr *); + struct rte_icmp_hdr *ich = rte_pktmbuf_mtod(mbuf, struct rte_icmp_hdr *); lcoreid_t lcore = rte_lcore_id(); fprintf(stderr, "lcore %d port %d icmp type %u code %u id %u seq %u\n", @@ -52,12 +52,12 @@ static void icmp_dump_hdr(const struct rte_mbuf *mbuf) static int icmp_echo(struct rte_mbuf *mbuf) { - struct ipv4_hdr *iph = mbuf->userdata; - struct icmp_hdr *ich = rte_pktmbuf_mtod(mbuf, struct icmp_hdr *); + struct rte_ipv4_hdr *iph = MBUF_USERDATA(mbuf, struct rte_ipv4_hdr *, MBUF_FIELD_PROTO); + struct rte_icmp_hdr *ich = rte_pktmbuf_mtod(mbuf, struct rte_icmp_hdr *); uint16_t csum; struct flow4 fl4; - if (ich->icmp_type != IP_ICMP_ECHO_REQUEST || ich->icmp_code != 0) { + if (ich->icmp_type != RTE_IP_ICMP_ECHO_REQUEST || ich->icmp_code != 0) { RTE_LOG(WARNING, ICMP, "%s: not echo-request\n", __func__); goto errout; } @@ -79,7 +79,7 @@ static int icmp_echo(struct rte_mbuf *mbuf) goto errout; } - ich->icmp_type = IP_ICMP_ECHO_REPLY; + ich->icmp_type = RTE_IP_ICMP_ECHO_REPLY; /* recalc the checksum */ ich->icmp_cksum = 0; csum = rte_raw_cksum(ich, mbuf->pkt_len); @@ -164,8 +164,8 @@ static struct icmp_ctrl icmp_ctrls[MAX_ICMP_CTRL] = { /* @imbuf is input (original) IP packet to trigger ICMP. */ void icmp_send(struct rte_mbuf *imbuf, int type, int code, uint32_t info) { - struct route_entry *rt = imbuf->userdata; - struct ipv4_hdr *iph = ip4_hdr(imbuf); + struct route_entry *rt = MBUF_USERDATA(imbuf, struct route_entry *, MBUF_FIELD_ROUTE); + struct rte_ipv4_hdr *iph = ip4_hdr(imbuf); eth_type_t etype = imbuf->packet_type; /* FIXME: use other field ? */ struct in_addr saddr; uint8_t tos; @@ -196,7 +196,7 @@ void icmp_send(struct rte_mbuf *imbuf, int type, int code, uint32_t info) } /* reply only first fragment. */ - if (iph->fragment_offset & htons(IPV4_HDR_OFFSET_MASK)) + if (iph->fragment_offset & htons(RTE_IPV4_HDR_OFFSET_MASK)) return; if (type > NR_ICMP_TYPES) @@ -249,7 +249,7 @@ void icmp_send(struct rte_mbuf *imbuf, int type, int code, uint32_t info) RTE_LOG(DEBUG, ICMP, "%s: no memory.\n", __func__); return; } - mbuf->userdata = NULL; + mbuf_userdata_reset(mbuf); assert(rte_pktmbuf_headroom(mbuf) >= 128); /* for L2/L3 */ /* prepare ICMP message */ @@ -265,7 +265,7 @@ void icmp_send(struct rte_mbuf *imbuf, int type, int code, uint32_t info) /* copy as much as we can without exceeding 576 (min-MTU) */ room = fl4.fl4_oif->mtu > 576 ? 576 : fl4.fl4_oif->mtu; - room -= sizeof(struct ipv4_hdr); + room -= sizeof(struct rte_ipv4_hdr); room -= sizeof(struct icmphdr); /* we support only linear mbuf now, use m.data_len @@ -291,13 +291,13 @@ void icmp_send(struct rte_mbuf *imbuf, int type, int code, uint32_t info) static int icmp_rcv(struct rte_mbuf *mbuf) { - struct ipv4_hdr *iph = mbuf->userdata; - struct icmp_hdr *ich; + struct rte_ipv4_hdr *iph = MBUF_USERDATA(mbuf, struct rte_ipv4_hdr *, MBUF_FIELD_PROTO); + struct rte_icmp_hdr *ich; struct icmp_ctrl *ctrl; - if (mbuf_may_pull(mbuf, sizeof(struct icmp_hdr)) != 0) + if (mbuf_may_pull(mbuf, sizeof(struct rte_icmp_hdr)) != 0) goto invpkt; - ich = rte_pktmbuf_mtod(mbuf, struct icmp_hdr *); + ich = rte_pktmbuf_mtod(mbuf, struct rte_icmp_hdr *); if (unlikely(!iph)) { RTE_LOG(WARNING, ICMP, "%s: no ipv4 header\n", __func__); @@ -395,7 +395,7 @@ void icmp_redirect_proc(void *args) /* Remove ether_hdr at the beginning of an mbuf */ data_off = mbuf->data_off; - if (unlikely(NULL == rte_pktmbuf_adj(mbuf, sizeof(struct ether_hdr)))) { + if (unlikely(NULL == rte_pktmbuf_adj(mbuf, sizeof(struct rte_ether_hdr)))) { rte_pktmbuf_free(mbuf); return; } diff --git a/src/iftraf.c b/src/iftraf.c index d0a8c829c..a03277402 100644 --- a/src/iftraf.c +++ b/src/iftraf.c @@ -675,7 +675,7 @@ static int iftraf_pkt_deliver(int af, struct rte_mbuf *mbuf, struct netif_port * portid_t devid; if (af == AF_INET) { - struct ipv4_hdr *ip4h = ip4_hdr(mbuf); + struct rte_ipv4_hdr *ip4h = ip4_hdr(mbuf); if (unlikely(ip4h->next_proto_id != IPPROTO_TCP && ip4h->next_proto_id != IPPROTO_UDP)) { diff --git a/src/inetaddr.c b/src/inetaddr.c index 9e32c99fb..13081e4e4 100644 --- a/src/inetaddr.c +++ b/src/inetaddr.c @@ -179,7 +179,7 @@ static int ifa_add_del_mcast(struct inet_ifaddr *ifa, bool add) { int err; union inet_addr iaddr; - struct ether_addr eaddr; + struct rte_ether_addr eaddr; /* for ipv6 only */ if (ifa->af != AF_INET6) @@ -219,7 +219,7 @@ int idev_add_mcast_init(void *args) int err; struct inet_device *idev; union inet_addr all_nodes, all_routers; - struct ether_addr eaddr_nodes, eaddr_routers; + struct rte_ether_addr eaddr_nodes, eaddr_routers; struct netif_port *dev = (struct netif_port *) args; @@ -600,7 +600,7 @@ static int ifa_add_route6(struct inet_ifaddr *ifa) static int ifa_add_route(struct inet_ifaddr *ifa) { /* set route from master */ - if (unlikely(rte_lcore_id() != rte_get_master_lcore())) + if (unlikely(rte_lcore_id() != rte_get_main_lcore())) return EDPVS_OK; switch (ifa->af) { @@ -666,7 +666,7 @@ static int ifa_del_route6(struct inet_ifaddr *ifa) static int ifa_del_route(struct inet_ifaddr *ifa) { /* set route from master */ - if (unlikely(rte_lcore_id() != rte_get_master_lcore())) + if (unlikely(rte_lcore_id() != rte_get_main_lcore())) return EDPVS_OK; switch (ifa->af) { @@ -686,7 +686,7 @@ static int inet_ifaddr_dad_completed(void *arg) struct inet_ifaddr *ifa = arg; /* only master's ifa scheduled ifa->dad_timer */ - assert(rte_lcore_id() == rte_get_master_lcore()); + assert(rte_lcore_id() == rte_get_main_lcore()); dpvs_timer_cancel_nolock(&ifa->dad_timer, true); ifa->flags &= ~(IFA_F_TENTATIVE | IFA_F_OPTIMISTIC | IFA_F_DADFAILED); @@ -719,7 +719,7 @@ static void inet_ifaddr_dad_start(struct inet_ifaddr *ifa) ifa->flags |= IFA_F_TENTATIVE | IFA_F_OPTIMISTIC; /* timing and sending dad on master only */ - if (cid != rte_get_master_lcore()) + if (cid != rte_get_main_lcore()) return; dpvs_time_rand_delay(&tv, 1000000); @@ -765,7 +765,7 @@ static int ifa_expire(void *arg) struct inet_ifaddr *ifa = (struct inet_ifaddr *)arg; /* only master's ifa scheduled ifa->timer */ - assert(cid == rte_get_master_lcore()); + assert(cid == rte_get_main_lcore()); err = inet_addr_del(ifa->af, ifa->idev->dev, &ifa->addr, ifa->plen); if (err != EDPVS_OK) { @@ -783,7 +783,7 @@ static int ifa_entry_add(const struct ifaddr_action *param) struct inet_device *idev; struct inet_ifaddr *ifa; struct timeval timeo = { 0 }; - bool is_master = (rte_lcore_id() == rte_get_master_lcore()); + bool is_master = (rte_lcore_id() == rte_get_main_lcore()); if (!param || !param->dev || !ifa_prefix_check(param->af, ¶m->addr, param->plen)) @@ -900,7 +900,7 @@ static int ifa_entry_mod(const struct ifaddr_action *param) struct inet_device *idev; struct inet_ifaddr *ifa; struct timeval timeo = { 0 }; - bool is_master = (rte_lcore_id() == rte_get_master_lcore()); + bool is_master = (rte_lcore_id() == rte_get_main_lcore()); if (!param || !param->dev || !ifa_prefix_check(param->af, ¶m->addr, param->plen)) @@ -1021,7 +1021,7 @@ static int ifa_entry_sync(const struct ifaddr_action *param) /* only support snyc flags now */ ifa->flags = param->flags; if ((ifa->flags & IFA_F_DADFAILED) && - (rte_lcore_id() == rte_get_master_lcore())) + (rte_lcore_id() == rte_get_main_lcore())) dpvs_timer_cancel(&ifa->dad_timer, true); ifa_put(ifa); @@ -1041,7 +1041,7 @@ static void ifa_free(struct inet_ifaddr **ifa_p) /* remove @ifa from @ifa_expired_list */ list_del_init(&ifa->h_list); - if (cid == rte_get_master_lcore()) { + if (cid == rte_get_main_lcore()) { /* it's safe to cancel timer not pending but zeroed */ dpvs_timer_cancel(&ifa->dad_timer, true); dpvs_timer_cancel(&ifa->timer, true); @@ -1105,7 +1105,7 @@ static void fill_ifaddr_entry(lcoreid_t cid, const struct inet_ifaddr *ifa, stru entry->ifa_entry.prefered_lft = 0; } else { struct timeval now, diff; - dpvs_time_now(&now, rte_lcore_id() == rte_get_master_lcore()); + dpvs_time_now(&now, rte_lcore_id() == rte_get_main_lcore()); timersub(&now, &ifa->tstemp, &diff); entry->ifa_entry.valid_lft = ifa->valid_lft - diff.tv_sec; entry->ifa_entry.prefered_lft = ifa->prefered_lft - diff.tv_sec; @@ -1227,7 +1227,7 @@ static int ifa_msg_sync_cb(struct dpvs_msg *msg) struct ifaddr_action *param; /* sync from master lcore only */ - assert(rte_lcore_id() == rte_get_master_lcore()); + assert(rte_lcore_id() == rte_get_main_lcore()); if (!msg || msg->len != sizeof(*param)) return EDPVS_INVAL; @@ -1459,7 +1459,7 @@ static int inet_addr_sync(const struct ifaddr_action *param) struct dpvs_msg *msg; cid = rte_lcore_id(); - mid = rte_get_master_lcore(); + mid = rte_get_main_lcore(); /* call from master */ if (cid == mid) @@ -1473,7 +1473,7 @@ static int inet_addr_sync(const struct ifaddr_action *param) return EDPVS_NOMEM; } - err = msg_send(msg, rte_get_master_lcore(), DPVS_MSG_F_ASYNC, NULL); + err = msg_send(msg, rte_get_main_lcore(), DPVS_MSG_F_ASYNC, NULL); if (err != EDPVS_OK) RTE_LOG(WARNING, IFA, "[%02d] %s: msg_send failed\n", cid, __func__); @@ -1488,7 +1488,7 @@ static int ifaddr_get_basic(struct inet_device *idev, struct inet_addr_data_arra /* convey ifa data on master lcore */ cid = rte_lcore_id(); - assert(cid == rte_get_master_lcore()); + assert(cid == rte_get_main_lcore()); if (idev) ifa_cnt = idev->ifa_cnt[cid]; @@ -1785,7 +1785,7 @@ static struct dpvs_msg_type ifa_msg_types[] = { .type = MSG_TYPE_IFA_SYNC, .prio = MSG_PRIO_NORM, .mode = DPVS_MSG_UNICAST, - //.cid = rte_get_master_lcore(), + //.cid = rte_get_main_lcore(), .unicast_msg_cb = ifa_msg_sync_cb, .multicast_msg_cb = NULL } @@ -1815,7 +1815,7 @@ int inet_addr_init(void) INIT_LIST_HEAD(&ifa_expired_list[cid]); } - ifa_msg_types[2].cid = rte_get_master_lcore(); + ifa_msg_types[2].cid = rte_get_main_lcore(); if ((err = sockopt_register(&ifa_sockopts)) != EDPVS_OK) { RTE_LOG(ERR, IFA, "%s: fail to register ifa_sockopts -- %s\n", diff --git a/src/ip_gre.c b/src/ip_gre.c index e8412b635..a0219846c 100644 --- a/src/ip_gre.c +++ b/src/ip_gre.c @@ -271,7 +271,7 @@ static int gre_rcv(struct rte_mbuf *mbuf) if (hlen < 0) goto drop; - iph = mbuf->userdata; /* see ipv4_local_in_fin */ + iph = MBUF_USERDATA(mbuf, struct iphdr *, MBUF_FIELD_PROTO); /* see ipv4_local_in_fin */ assert(iph->version == 4 && iph->protocol == IPPROTO_GRE); tnl = ip_tunnel_lookup(&gre_tunnel_tab, mbuf->port, tpi.flags, diff --git a/src/ip_tunnel.c b/src/ip_tunnel.c index e7d85f3a4..62acbcd6c 100644 --- a/src/ip_tunnel.c +++ b/src/ip_tunnel.c @@ -197,7 +197,7 @@ static struct netif_port *tunnel_create(struct ip_tunnel_tab *tab, set before tunnel_bind_dev */ if (tnl->link) { dev->flag |= tnl->link->flag; - ether_addr_copy(&tnl->link->addr, &dev->addr); + rte_ether_addr_copy(&tnl->link->addr, &dev->addr); } dev->flag |= NETIF_PORT_FLAG_RUNNING; /* XXX */ dev->flag |= NETIF_PORT_FLAG_NO_ARP; @@ -333,7 +333,7 @@ static int tunnel_update_pmtu(struct netif_port *dev, struct rte_mbuf *mbuf, else mtu = rt->mtu ? : dev->mtu; - if (mbuf->packet_type == ETHER_TYPE_IPv4) { + if (mbuf->packet_type == RTE_ETHER_TYPE_IPV4) { if ((iiph->frag_off & htons(IP_DF)) && mtu < pkt_size) { icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return EDPVS_FRAG; @@ -362,7 +362,7 @@ static int tunnel_xmit(struct rte_mbuf *mbuf, __be32 src, __be32 dst, oiph->daddr = dst; oiph->saddr = src; oiph->ttl = ttl; - oiph->id = ip4_select_id((struct ipv4_hdr *)oiph); + oiph->id = ip4_select_id((struct rte_ipv4_hdr *)oiph); return ipv4_local_out(mbuf); } @@ -805,7 +805,7 @@ int ip_tunnel_xmit(struct rte_mbuf *mbuf, struct netif_port *dev, assert(mbuf && dev && tiph); - if (mbuf->packet_type == ETHER_TYPE_IPv4) + if (mbuf->packet_type == RTE_ETHER_TYPE_IPV4) iiph = rte_pktmbuf_mtod_offset(mbuf, struct iphdr *, tnl->hlen); connected = tiph->daddr != 0; @@ -852,7 +852,7 @@ int ip_tunnel_xmit(struct rte_mbuf *mbuf, struct netif_port *dev, /* refer route in mbuf and this reference will be put later. */ route4_get(rt); - mbuf->userdata = (void *)rt; + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; err = tunnel_update_pmtu(dev, mbuf, rt, tiph->frag_off, iiph); if (err != EDPVS_OK) diff --git a/src/ipip.c b/src/ipip.c index d8b978b35..2b4e0dcbd 100644 --- a/src/ipip.c +++ b/src/ipip.c @@ -72,7 +72,7 @@ static int ipip_rcv(struct rte_mbuf *mbuf) /* IPv4's upper layer can use @userdata for IP header, * see ipv4_local_in_fin() */ - iph = mbuf->userdata; + iph = MBUF_USERDATA(mbuf, struct iphdr *, MBUF_FIELD_PROTO); assert(iph->version == 4 && iph->protocol == IPPROTO_IPIP); tnl = ip_tunnel_lookup(&ipip_tunnel_tab, mbuf->port, TUNNEL_F_NO_KEY, diff --git a/src/ipset.c b/src/ipset.c index c94624cc6..762d6a185 100644 --- a/src/ipset.c +++ b/src/ipset.c @@ -509,8 +509,8 @@ int ipset_init(void) for (i = 0; i < IPSET_TAB_SIZE; i++) INIT_LIST_HEAD(&this_ipset_table_lcore[i]); - rte_eal_mp_remote_launch(ipset_lcore_init, NULL, CALL_MASTER); - RTE_LCORE_FOREACH_SLAVE(cid) { + rte_eal_mp_remote_launch(ipset_lcore_init, NULL, CALL_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { if ((err = rte_eal_wait_lcore(cid)) < 0) { RTE_LOG(WARNING, IPSET, "%s: lcore %d: %s.\n", __func__, cid, dpvs_strerror(err)); @@ -542,8 +542,8 @@ int ipset_term(void) if ((err = sockopt_unregister(&ipset_sockopts)) != EDPVS_OK) return err; - rte_eal_mp_remote_launch(ipset_flush_lcore, NULL, CALL_MASTER); - RTE_LCORE_FOREACH_SLAVE(cid) { + rte_eal_mp_remote_launch(ipset_flush_lcore, NULL, CALL_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { if ((err = rte_eal_wait_lcore(cid)) < 0) { RTE_LOG(WARNING, IPSET, "%s: lcore %d: %s.\n", __func__, cid, dpvs_strerror(err)); diff --git a/src/ipv4.c b/src/ipv4.c index e3fb35849..652a351c3 100644 --- a/src/ipv4.c +++ b/src/ipv4.c @@ -107,7 +107,7 @@ static void ip4_show_hdr(const char *func, const struct rte_mbuf *mbuf) { portid_t port; lcoreid_t lcore; - struct ipv4_hdr *iph; + struct rte_ipv4_hdr *iph; char saddr[16], daddr[16]; port = mbuf->port; @@ -122,7 +122,7 @@ static void ip4_show_hdr(const char *func, const struct rte_mbuf *mbuf) RTE_LOG(DEBUG, IPV4, "%s: [%d] port %u ipv4 hl %u tos %u tot %u " "id %u ttl %u prot %u src %s dst %s\n", - func, lcore, port, IPV4_HDR_IHL_MASK & iph->version_ihl, + func, lcore, port, RTE_IPV4_HDR_IHL_MASK & iph->version_ihl, iph->type_of_service, ntohs(iph->total_length), ntohs(iph->packet_id), iph->time_to_live, iph->next_proto_id, saddr, daddr); @@ -155,8 +155,8 @@ static int ipv4_local_in_fin(struct rte_mbuf *mbuf) { int err, hlen; const struct inet_protocol *prot; - struct ipv4_hdr *iph = ip4_hdr(mbuf); - struct route_entry *rt = mbuf->userdata; + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); + struct route_entry *rt = MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE); int (*handler)(struct rte_mbuf *mbuf) = NULL; /* remove network header */ @@ -165,7 +165,7 @@ static int ipv4_local_in_fin(struct rte_mbuf *mbuf) if (rt) { route4_put(rt); - mbuf->userdata = NULL; + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = NULL; } /* @@ -174,13 +174,11 @@ static int ipv4_local_in_fin(struct rte_mbuf *mbuf) * but mbuf do not. Consider the length of header is variable * (e.g., IPv4 options), it's not make sence for every layer * to parse lower layer's headers. - * note if mbuf->userdata is not suitable, we can use 'extened' - * mbuf to save offsets like skb. * * BTW, if netif_port_get() called too many times we can also * use 'extend' mbuf to save 'netif_port *dev'. */ - mbuf->userdata = iph; + MBUF_USERDATA(mbuf, struct rte_ipv4_hdr *, MBUF_FIELD_PROTO) = iph; /* deliver to upper layer */ rte_spinlock_lock(&inet_prot_lock); @@ -203,8 +201,9 @@ static int ipv4_local_in_fin(struct rte_mbuf *mbuf) static int ipv4_local_in(struct rte_mbuf *mbuf) { int err; - struct route_entry *rt = mbuf->userdata; + struct route_entry *rt; + rt = MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE); if (ip4_is_frag(ip4_hdr(mbuf))) { if ((err = ip4_defrag(mbuf, IP_DEFRAG_LOCAL_IN)) != EDPVS_OK) { route4_put(rt); @@ -218,10 +217,11 @@ static int ipv4_local_in(struct rte_mbuf *mbuf) static int ipv4_output_fin2(struct rte_mbuf *mbuf) { - struct route_entry *rt = mbuf->userdata; + struct route_entry *rt; int err; struct in_addr nexthop; + rt = MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE); if (rt->gw.s_addr == htonl(INADDR_ANY)) nexthop.s_addr = ip4_hdr(mbuf)->dst_addr; else @@ -236,7 +236,7 @@ static int ipv4_output_fin2(struct rte_mbuf *mbuf) * note it was used in RX path for eth_type_t. * really confusing. */ - mbuf->packet_type = ETHER_TYPE_IPv4; + mbuf->packet_type = RTE_ETHER_TYPE_IPV4; mbuf->l3_len = ip4_hdrlen(mbuf); err = neigh_output(AF_INET, (union inet_addr *)&nexthop, mbuf, rt->port); @@ -246,7 +246,7 @@ static int ipv4_output_fin2(struct rte_mbuf *mbuf) static int ipv4_output_fin(struct rte_mbuf *mbuf) { - struct route_entry *rt = mbuf->userdata; + struct route_entry *rt = MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE); if (mbuf->pkt_len > rt->mtu) return ipv4_fragment(mbuf, rt->mtu, ipv4_output_fin2); @@ -256,7 +256,7 @@ static int ipv4_output_fin(struct rte_mbuf *mbuf) int ipv4_output(struct rte_mbuf *mbuf) { - struct route_entry *rt = mbuf->userdata; + struct route_entry *rt = MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE); assert(rt); IP4_UPD_PO_STATS(out, mbuf->pkt_len); @@ -277,8 +277,8 @@ static int ipv4_forward_fin(struct rte_mbuf *mbuf) static int ipv4_forward(struct rte_mbuf *mbuf) { - struct ipv4_hdr *iph = ip4_hdr(mbuf); - struct route_entry *rt = mbuf->userdata; + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); + struct route_entry *rt = MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE); uint32_t mtu, csum; assert(rt && rt->port); @@ -291,7 +291,7 @@ static int ipv4_forward(struct rte_mbuf *mbuf) mtu = rt->mtu; if (mbuf->pkt_len > mtu - && (iph->fragment_offset & htons(IPV4_HDR_DF_FLAG))) { + && (iph->fragment_offset & htons(RTE_IPV4_HDR_DF_FLAG))) { IP4_INC_STATS(fragfails); icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, htonl(mtu)); goto drop; @@ -327,7 +327,7 @@ int ipv4_rcv_fin(struct rte_mbuf *mbuf) { int err; struct route_entry *rt = NULL; - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); eth_type_t etype = mbuf->packet_type; /* FIXME: use other field ? */ /* input route decision */ @@ -346,7 +346,7 @@ int ipv4_rcv_fin(struct rte_mbuf *mbuf) } /* use extended mbuf if have more data then @rt */ - mbuf->userdata = (void *)rt; + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; if (rt->flag & RTF_LOCALIN) { return ipv4_local_in(mbuf); @@ -378,7 +378,7 @@ static int ipv4_rcv(struct rte_mbuf *mbuf, struct netif_port *port) #ifdef CONFIG_ICMP_REDIRECT_CORE struct icmphdr *ich, _icmph; #endif - struct ipv4_hdr *iph; + struct rte_ipv4_hdr *iph; uint16_t hlen, len; eth_type_t etype = mbuf->packet_type; /* FIXME: use other field ? */ assert(mbuf); @@ -390,13 +390,13 @@ static int ipv4_rcv(struct rte_mbuf *mbuf, struct netif_port *port) IP4_UPD_PO_STATS(in, mbuf->pkt_len); iftraf_pkt_in(AF_INET, mbuf, port); - if (mbuf_may_pull(mbuf, sizeof(struct ipv4_hdr)) != 0) + if (mbuf_may_pull(mbuf, sizeof(struct rte_ipv4_hdr)) != 0) goto inhdr_error; iph = ip4_hdr(mbuf); hlen = ip4_hdrlen(mbuf); - if (((iph->version_ihl) >> 4) != 4 || hlen < sizeof(struct ipv4_hdr)) + if (((iph->version_ihl) >> 4) != 4 || hlen < sizeof(struct rte_ipv4_hdr)) goto inhdr_error; if (mbuf_may_pull(mbuf, hlen) != 0) @@ -421,7 +421,7 @@ static int ipv4_rcv(struct rte_mbuf *mbuf, struct netif_port *port) goto drop; } } - mbuf->userdata = NULL; + mbuf_userdata_reset(mbuf); mbuf->l3_len = hlen; #ifdef CONFIG_DPVS_IP_HEADER_DEBUG @@ -436,7 +436,7 @@ static int ipv4_rcv(struct rte_mbuf *mbuf, struct netif_port *port) if (unlikely(!ich)) goto drop; if (ich->type == ICMP_ECHOREPLY || ich->type == ICMP_ECHO) { - rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct ether_hdr)); + rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct rte_ether_hdr)); icmp_recv_proc(mbuf); return EDPVS_OK; } @@ -456,7 +456,7 @@ static int ipv4_rcv(struct rte_mbuf *mbuf, struct netif_port *port) } static struct pkt_type ip4_pkt_type = { - //.type = rte_cpu_to_be_16(ETHER_TYPE_IPv4), + //.type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4), .func = ipv4_rcv, .port = NULL, }; @@ -486,7 +486,7 @@ int ipv4_init(void) if ((err = ipv4_frag_init()) != EDPVS_OK) return err; - ip4_pkt_type.type = htons(ETHER_TYPE_IPv4); + ip4_pkt_type.type = htons(RTE_ETHER_TYPE_IPV4); if ((err = netif_register_pkt(&ip4_pkt_type)) != EDPVS_OK) { ipv4_frag_term(); return err; @@ -509,7 +509,7 @@ int ipv4_term(void) return EDPVS_OK; } -uint32_t ip4_select_id(struct ipv4_hdr *iph) +uint32_t ip4_select_id(struct rte_ipv4_hdr *iph) { uint32_t hash, id; rte_atomic32_t *p_id; @@ -526,8 +526,9 @@ uint32_t ip4_select_id(struct ipv4_hdr *iph) int ipv4_local_out(struct rte_mbuf *mbuf) { - struct ipv4_hdr *iph = ip4_hdr(mbuf); - struct route_entry *rt = mbuf->userdata; + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); + struct route_entry *rt = MBUF_USERDATA(mbuf, + struct route_entry *, MBUF_FIELD_ROUTE); iph->total_length = htons(mbuf->pkt_len); @@ -543,7 +544,7 @@ int ipv4_local_out(struct rte_mbuf *mbuf) int ipv4_xmit(struct rte_mbuf *mbuf, const struct flow4 *fl4) { struct route_entry *rt; - struct ipv4_hdr *iph; + struct rte_ipv4_hdr *iph; if (!mbuf || !fl4) { if (mbuf) @@ -559,9 +560,9 @@ int ipv4_xmit(struct rte_mbuf *mbuf, const struct flow4 *fl4) IP4_INC_STATS(outnoroutes); return EDPVS_NOROUTE; } - mbuf->userdata = (void *)rt; + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = (void *)rt; - iph = (struct ipv4_hdr *)rte_pktmbuf_prepend(mbuf, sizeof(struct ipv4_hdr)); + iph = (struct rte_ipv4_hdr *)rte_pktmbuf_prepend(mbuf, sizeof(struct rte_ipv4_hdr)); if (!iph) { rte_pktmbuf_free(mbuf); route4_put(rt); diff --git a/src/ipv4_frag.c b/src/ipv4_frag.c index d08a7fa88..ce9f06078 100644 --- a/src/ipv4_frag.c +++ b/src/ipv4_frag.c @@ -172,7 +172,7 @@ static struct ipv4_frag ip4_frags[DPVS_MAX_LCORE]; int ipv4_reassamble(struct rte_mbuf *mbuf) { struct rte_mbuf *asm_mbuf, *next, *seg, *prev; - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); assert(mbuf->l3_len > 0); @@ -199,7 +199,7 @@ int ipv4_reassamble(struct rte_mbuf *mbuf) rte_pktmbuf_free(asm_mbuf); return EDPVS_NOMEM; } - seg->userdata = NULL; + mbuf_userdata_reset(seg); for (prev = asm_mbuf; prev; prev = prev->next) if (prev->next == mbuf) break; @@ -259,15 +259,16 @@ int ipv4_reassamble(struct rte_mbuf *mbuf) int ipv4_fragment(struct rte_mbuf *mbuf, unsigned int mtu, int (*output)(struct rte_mbuf *)) { - struct ipv4_hdr *iph = ip4_hdr(mbuf); - struct route_entry *rt = mbuf->userdata; + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); + struct route_entry *rt = MBUF_USERDATA(mbuf, + struct route_entry *, MBUF_FIELD_ROUTE); struct rte_mbuf *frag; unsigned int left, len, hlen; int offset, err, from; void *to; assert(rt); - if (iph->fragment_offset & IPV4_HDR_DF_FLAG) { + if (iph->fragment_offset & RTE_IPV4_HDR_DF_FLAG) { icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); err = EDPVS_FRAG; @@ -295,11 +296,12 @@ int ipv4_fragment(struct rte_mbuf *mbuf, unsigned int mtu, err = EDPVS_NOMEM; goto out; } - frag->userdata = NULL; + mbuf_userdata_reset(frag); /* copy metadata from orig pkt */ route4_get(rt); - frag->userdata = rt; /* no need to hold before consume mbuf */ + /* no need to hold before consume mbuf */ + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; frag->port = mbuf->port; frag->ol_flags = 0; /* do not offload csum for frag */ frag->l2_len = mbuf->l2_len; @@ -330,7 +332,7 @@ int ipv4_fragment(struct rte_mbuf *mbuf, unsigned int mtu, /* TODO: if (offset == 0) ip_fragment_options(frag); */ if (left > 0) - iph->fragment_offset |= htons(IPV4_HDR_MF_FLAG); + iph->fragment_offset |= htons(RTE_IPV4_HDR_MF_FLAG); offset += len; from += len; diff --git a/src/ipv6/icmp6.c b/src/ipv6/icmp6.c index da894a2a5..20e4a2c4d 100644 --- a/src/ipv6/icmp6.c +++ b/src/ipv6/icmp6.c @@ -56,7 +56,7 @@ uint16_t icmp6_csum(struct ip6_hdr *iph, struct icmp6_hdr *ich) hdr.ip6_dst = iph->ip6_dst; csum = rte_raw_cksum(ich, l4_len); - csum += rte_ipv6_phdr_cksum((struct ipv6_hdr *)&hdr, 0); + csum += rte_ipv6_phdr_cksum((struct rte_ipv6_hdr *)&hdr, 0); csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); csum = (~csum) & 0xffff; @@ -75,7 +75,7 @@ void icmp6_send_csum(struct ip6_hdr *shdr, struct icmp6_hdr *ich) l4_len = ntohs(shdr->ip6_plen); csum = rte_raw_cksum(ich, l4_len); - csum += rte_ipv6_phdr_cksum((struct ipv6_hdr *)shdr, 0); + csum += rte_ipv6_phdr_cksum((struct rte_ipv6_hdr *)shdr, 0); csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); csum = (~csum) & 0xffff; @@ -221,7 +221,7 @@ void icmp6_send(struct rte_mbuf *imbuf, int type, int code, uint32_t info) RTE_LOG(DEBUG, ICMP6, "%s: no memory.\n", __func__); return; } - mbuf->userdata = NULL; + mbuf_userdata_reset(mbuf); assert(rte_pktmbuf_headroom(mbuf) >= 128); /* for L2/L3 */ ich = (struct icmp6_hdr*)rte_pktmbuf_append(mbuf, sizeof(struct icmp6_hdr));; if (!ich) { @@ -298,7 +298,7 @@ static int icmp6_echo_reply(struct rte_mbuf *mbuf, struct ip6_hdr *iph, static int icmp6_rcv(struct rte_mbuf *mbuf) { - struct ip6_hdr *iph = mbuf->userdata; + struct ip6_hdr *iph = MBUF_USERDATA(mbuf, struct ip6_hdr *, MBUF_FIELD_PROTO); struct icmp6_hdr *ich; assert(iph); diff --git a/src/ipv6/ipv6.c b/src/ipv6/ipv6.c index b9323363c..f7d3a0f11 100644 --- a/src/ipv6/ipv6.c +++ b/src/ipv6/ipv6.c @@ -159,14 +159,14 @@ static int ip6_local_in_fin(struct rte_mbuf *mbuf) * and set it to IPv6 fixed header for upper layer. */ if (!ipv6_addr_is_multicast(&hdr->ip6_dst)) { - struct route6 *rt = mbuf->userdata; + struct route6 *rt = MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE); if (rt) { route6_put(rt); - mbuf->userdata = NULL; + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = NULL; } } - mbuf->userdata = (void *)hdr; + MBUF_USERDATA(mbuf, struct ip6_hdr *, MBUF_FIELD_PROTO) = hdr; nexthdr = hdr->ip6_nxt; /* parse extension headers */ @@ -292,7 +292,7 @@ static inline unsigned int ip6_mtu_forward(struct route6 *rt) static int ip6_fragment(struct rte_mbuf *mbuf, uint32_t mtu, int (*out)(struct rte_mbuf *)) { - struct route6 *rt = mbuf->userdata; + struct route6 *rt = MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE); /* TODO: */ @@ -319,16 +319,16 @@ static int ip6_output_fin2(struct rte_mbuf *mbuf) return EDPVS_INVAL; } - dev = mbuf->userdata; + dev = MBUF_USERDATA(mbuf, struct netif_port *, MBUF_FIELD_ROUTE); /* only support linklocal! */ nexthop = &hdr->ip6_dst; } else { - rt = mbuf->userdata; + rt = MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE); dev = rt->rt6_dev; nexthop = ip6_rt_nexthop(rt, &hdr->ip6_dst); } - mbuf->packet_type = ETHER_TYPE_IPv6; + mbuf->packet_type = RTE_ETHER_TYPE_IPV6; err = neigh_output(AF_INET6, (union inet_addr *)nexthop, mbuf, dev); @@ -344,9 +344,9 @@ static int ip6_output_fin(struct rte_mbuf *mbuf) struct ip6_hdr *hdr = ip6_hdr(mbuf); if (ipv6_addr_is_multicast(&hdr->ip6_dst)) - mtu = ((struct netif_port *)mbuf->userdata)->mtu; + mtu = MBUF_USERDATA(mbuf, struct netif_port *, MBUF_FIELD_ROUTE)->mtu; else - mtu = ((struct route6 *)mbuf->userdata)->rt6_mtu; + mtu = MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)->rt6_mtu; if (mbuf->pkt_len > mtu) return ip6_fragment(mbuf, mtu, ip6_output_fin2); @@ -361,9 +361,9 @@ int ip6_output(struct rte_mbuf *mbuf) struct ip6_hdr *hdr = ip6_hdr(mbuf); if (ipv6_addr_is_multicast(&hdr->ip6_dst)) { - dev = mbuf->userdata; + dev = MBUF_USERDATA(mbuf, struct netif_port *, MBUF_FIELD_ROUTE); } else { - rt = mbuf->userdata; + rt = MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE); dev = rt->rt6_dev; } @@ -389,12 +389,11 @@ int ip6_local_out(struct rte_mbuf *mbuf) struct ip6_hdr *hdr = ip6_hdr(mbuf); if (ipv6_addr_is_multicast(&hdr->ip6_dst)) - dev = mbuf->userdata; + dev = MBUF_USERDATA(mbuf, struct netif_port *, MBUF_FIELD_ROUTE); else - dev = ((struct route6 *)mbuf->userdata)->rt6_dev; + dev = MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)->rt6_dev; - return INET_HOOK(AF_INET6, INET_HOOK_LOCAL_OUT, mbuf, NULL, - dev, ip6_output); + return INET_HOOK(AF_INET6, INET_HOOK_LOCAL_OUT, mbuf, NULL, dev, ip6_output); } static int ip6_forward_fin(struct rte_mbuf *mbuf) @@ -408,7 +407,7 @@ static int ip6_forward_fin(struct rte_mbuf *mbuf) static int ip6_forward(struct rte_mbuf *mbuf) { struct ip6_hdr *hdr = ip6_hdr(mbuf); - struct route6 *rt = mbuf->userdata; + struct route6 *rt = MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE); int addrtype; uint32_t mtu; @@ -502,7 +501,7 @@ static int ip6_rcv_fin(struct rte_mbuf *mbuf) * someday, we may use extended mbuf if have more L3 info * then route need to be saved into mbuf. */ - mbuf->userdata = (void *)rt; + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt; if (rt->rt6_flags & RTF_LOCALIN) { return ip6_local_in(mbuf); @@ -521,7 +520,7 @@ static int ip6_rcv_fin(struct rte_mbuf *mbuf) kni: if (rt) { route6_put(rt); - mbuf->userdata = NULL; + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = NULL; } return EDPVS_KNICONTINUE; } @@ -613,7 +612,7 @@ static int ip6_rcv(struct rte_mbuf *mbuf, struct netif_port *dev) * @userdata is used to save route info in L3. */ mbuf->l3_len = sizeof(*hdr); - mbuf->userdata = NULL; + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = NULL; /* hop-by-hop option header */ if (hdr->ip6_nxt == NEXTHDR_HOP) { @@ -655,7 +654,7 @@ int ipv6_init(void) return err; /* htons, cpu_to_be16 not work when struct initialization :( */ - ip6_pkt_type.type = htons(ETHER_TYPE_IPv6); + ip6_pkt_type.type = htons(RTE_ETHER_TYPE_IPV6); err = netif_register_pkt(&ip6_pkt_type); if (err) @@ -720,7 +719,8 @@ int ipv6_xmit(struct rte_mbuf *mbuf, struct flow6 *fl6) return EDPVS_NOTSUPP; } assert(fl6->fl6_oif); - mbuf->userdata = (void *)fl6->fl6_oif; + /* use mbuf userdata type MBUF_FIELD_ROUTE for saving spaces */ + MBUF_USERDATA(mbuf, struct netif_port *, MBUF_FIELD_ROUTE) = fl6->fl6_oif; dev = fl6->fl6_oif; } else { @@ -731,7 +731,7 @@ int ipv6_xmit(struct rte_mbuf *mbuf, struct flow6 *fl6) rte_pktmbuf_free(mbuf); return EDPVS_NOROUTE; } - mbuf->userdata = (void *)rt; + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt; dev = rt->rt6_dev; } @@ -868,7 +868,7 @@ uint16_t ip6_phdr_cksum(struct ip6_hdr *ip6h, uint64_t ol_flags, } /*FIXME: what if NEXTHDR_ROUTING is not the first exthdr? */ - csum = rte_ipv6_phdr_cksum((struct ipv6_hdr *)ip6h, ol_flags); + csum = rte_ipv6_phdr_cksum((struct rte_ipv6_hdr *)ip6h, ol_flags); /* restore original ip6h header */ ip6h->ip6_nxt = ip6nxt; @@ -905,7 +905,7 @@ uint16_t ip6_udptcp_cksum(struct ip6_hdr *ip6h, const void *l4_hdr, } /*FIXME: what if NEXTHDR_ROUTING is not the first exthdr? */ - csum = rte_ipv6_udptcp_cksum((struct ipv6_hdr *)ip6h, l4_hdr); + csum = rte_ipv6_udptcp_cksum((struct rte_ipv6_hdr *)ip6h, l4_hdr); /* restore original ip6h header */ ip6h->ip6_nxt = ip6nxt; diff --git a/src/ipv6/ipv6_exthdrs.c b/src/ipv6/ipv6_exthdrs.c index ac33d3231..6f35cc3f2 100644 --- a/src/ipv6/ipv6_exthdrs.c +++ b/src/ipv6/ipv6_exthdrs.c @@ -93,7 +93,7 @@ int ip6_skip_exthdr(const struct rte_mbuf *imbuf, int start, __u8 *nexthdrp) */ static int ip6_dummy_hdr_rcv(struct rte_mbuf *mbuf) { - struct ip6_hdr *hdr = mbuf->userdata; + struct ip6_hdr *hdr = MBUF_USERDATA(mbuf, struct ip6_hdr *, MBUF_FIELD_PROTO); struct ip6_ext *exthdr; if (mbuf_may_pull(mbuf, 8) != 0) diff --git a/src/ipv6/ndisc.c b/src/ipv6/ndisc.c index d5b6b4a35..41eff7e71 100644 --- a/src/ipv6/ndisc.c +++ b/src/ipv6/ndisc.c @@ -197,7 +197,7 @@ static struct rte_mbuf *ndisc_build_mbuf(struct netif_port *dev, { struct rte_mbuf *mbuf; struct icmp6_hdr *icmp6hdr; - struct ipv6_hdr iph; + struct rte_ipv6_hdr iph; int len; uint8_t *opt; @@ -211,7 +211,7 @@ static struct rte_mbuf *ndisc_build_mbuf(struct netif_port *dev, RTE_LOG(ERR, NEIGHBOUR, "mbuf_pool alloc failed\n"); return NULL; } - mbuf->userdata = NULL; + mbuf_userdata_reset(mbuf); icmp6hdr = (struct icmp6_hdr *)rte_pktmbuf_append(mbuf, sizeof(*icmp6h)); rte_memcpy(icmp6hdr, icmp6h, sizeof(*icmp6h)); @@ -356,8 +356,8 @@ static int ndisc_recv_ns(struct rte_mbuf *mbuf, struct netif_port *dev) int hashkey = 0; uint32_t ndoptlen = 0; - struct in6_addr *saddr = &((struct ip6_hdr *)mbuf->userdata)->ip6_src; - struct in6_addr *daddr = &((struct ip6_hdr *)mbuf->userdata)->ip6_dst; + struct in6_addr *saddr = &MBUF_USERDATA(mbuf, struct ip6_hdr *, MBUF_FIELD_PROTO)->ip6_src; + struct in6_addr *daddr = &MBUF_USERDATA(mbuf, struct ip6_hdr *, MBUF_FIELD_PROTO)->ip6_dst; struct nd_msg *msg = rte_pktmbuf_mtod(mbuf, struct nd_msg *); int dad = ipv6_addr_any(saddr); @@ -440,12 +440,12 @@ static int ndisc_recv_ns(struct rte_mbuf *mbuf, struct netif_port *dev) hashkey = neigh_hashkey(AF_INET6, (union inet_addr *)saddr, dev); neigh = neigh_lookup_entry(AF_INET6, (union inet_addr *)saddr, dev, hashkey); if (neigh && !(neigh->flag & NEIGHBOUR_STATIC)) { - neigh_edit(neigh, (struct ether_addr *)lladdr); + neigh_edit(neigh, (struct rte_ether_addr *)lladdr); neigh_entry_state_trans(neigh, 1); neigh_sync_core(neigh, 1, NEIGH_ENTRY); } else { neigh = neigh_add_table(AF_INET6, (union inet_addr *)saddr, - (struct ether_addr *)lladdr, dev, hashkey, 0); + (struct rte_ether_addr *)lladdr, dev, hashkey, 0); if (!neigh){ RTE_LOG(ERR, NEIGHBOUR, "[%s] add neighbour wrong\n", __func__); return EDPVS_NOMEM; @@ -468,12 +468,12 @@ static int ndisc_recv_na(struct rte_mbuf *mbuf, struct netif_port *dev) struct neighbour_entry *neigh; struct inet_ifaddr *ifa; int hashkey; - struct in6_addr *daddr = &((struct ip6_hdr *)mbuf->userdata)->ip6_dst; + struct in6_addr *daddr = &MBUF_USERDATA(mbuf, struct ip6_hdr *, MBUF_FIELD_PROTO)->ip6_dst; struct nd_msg *msg = rte_pktmbuf_mtod(mbuf, struct nd_msg *); uint32_t ndoptlen = mbuf->data_len - offsetof(struct nd_msg, opt); #ifdef CONFIG_NDISC_DEBUG - struct in6_addr *saddr = &((struct ip6_hdr *)mbuf->userdata)->ip6_src; + struct in6_addr *saddr = &MBUF_USERDATA(mbuf, struct ip6_hdr *, MBUF_FIELD_PROTO)->ip6_src; ndisc_show_addr(__func__, saddr, daddr); #endif @@ -526,12 +526,12 @@ static int ndisc_recv_na(struct rte_mbuf *mbuf, struct netif_port *dev) hashkey = neigh_hashkey(AF_INET6, (union inet_addr *)&msg->target, dev); neigh = neigh_lookup_entry(AF_INET6, (union inet_addr *)&msg->target, dev, hashkey); if (neigh && !(neigh->flag & NEIGHBOUR_STATIC)) { - neigh_edit(neigh, (struct ether_addr *)lladdr); + neigh_edit(neigh, (struct rte_ether_addr *)lladdr); neigh_entry_state_trans(neigh, 1); neigh_sync_core(neigh, 1, NEIGH_ENTRY); } else { neigh = neigh_add_table(AF_INET6, (union inet_addr *)&msg->target, - (struct ether_addr *)lladdr, dev, hashkey, 0); + (struct rte_ether_addr *)lladdr, dev, hashkey, 0); if (!neigh) { RTE_LOG(ERR, NEIGHBOUR, "[%s] add neighbour wrong\n", __func__); return EDPVS_NOMEM; @@ -548,7 +548,7 @@ int ndisc_rcv(struct rte_mbuf *mbuf, struct netif_port *dev) { struct nd_msg *msg; int ret; - struct ip6_hdr *ipv6_hdr = mbuf->userdata; + struct ip6_hdr *ipv6_hdr = MBUF_USERDATA(mbuf, struct ip6_hdr *, MBUF_FIELD_PROTO); if (mbuf_may_pull(mbuf, sizeof(struct icmp6_hdr)) != 0) { ret = EDPVS_NOMEM; diff --git a/src/ipv6/route6.c b/src/ipv6/route6.c index a5a7a1f11..d0ac2461e 100644 --- a/src/ipv6/route6.c +++ b/src/ipv6/route6.c @@ -137,7 +137,7 @@ static int rt6_setup_lcore(void *arg) tv.tv_sec = g_rt6_recycle_time, tv.tv_usec = 0, - global = (rte_lcore_id() == rte_get_master_lcore()); + global = (rte_lcore_id() == rte_get_main_lcore()); INIT_LIST_HEAD(&this_rt6_dustbin.routes); err = dpvs_timer_sched_period(&this_rt6_dustbin.tm, &tv, rt6_recycle, NULL, global); @@ -210,7 +210,7 @@ static int rt6_add_del(const struct dp_vs_route6_conf *cf) lcoreid_t cid; cid = rte_lcore_id(); - assert(cid == rte_get_master_lcore()); + assert(cid == rte_get_main_lcore()); /* for master */ switch (cf->ops) { @@ -412,8 +412,8 @@ int route6_init(void) return EDPVS_NOTEXIST; } - rte_eal_mp_remote_launch(rt6_setup_lcore, NULL, CALL_MASTER); - RTE_LCORE_FOREACH_SLAVE(cid) { + rte_eal_mp_remote_launch(rt6_setup_lcore, NULL, CALL_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { if ((err = rte_eal_wait_lcore(cid)) < 0) { RTE_LOG(ERR, RT6, "%s: fail to setup rt6 on lcore%d -- %s\n", __func__, cid, dpvs_strerror(err)); @@ -462,8 +462,8 @@ int route6_term(void) if (err != EDPVS_OK) RTE_LOG(WARNING, RT6, "%s:fail to unregister route6 msg!\n", __func__); - rte_eal_mp_remote_launch(rt6_destroy_lcore, NULL, CALL_MASTER); - RTE_LCORE_FOREACH_SLAVE(cid) { + rte_eal_mp_remote_launch(rt6_destroy_lcore, NULL, CALL_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { if ((err = rte_eal_wait_lcore(cid)) < 0) { RTE_LOG(WARNING, RT6, "%s: fail to destroy rt6 on lcore%d -- %s\n", __func__, cid, dpvs_strerror(err)); diff --git a/src/ipv6/route6_lpm.c b/src/ipv6/route6_lpm.c index c146d224f..7c0ce488b 100644 --- a/src/ipv6/route6_lpm.c +++ b/src/ipv6/route6_lpm.c @@ -118,7 +118,7 @@ static int rt6_lpm_setup_lcore(void *arg) .flags = 0, }; - if ((!(g_lcore_mask & (1<userdata = NULL; + MBUF_USERDATA(cloned_syn_mbuf, void *, MBUF_FIELD_ROUTE) = NULL; conn->packet_xmit(pp, conn, cloned_syn_mbuf); } } @@ -1476,7 +1476,7 @@ static int sockopt_conn_get_all(const struct ip_vs_conn_req *conn_req, } if ((conn_req->flag & GET_IPVS_CONN_FLAG_TEMPLATE) - && (cid == rte_get_master_lcore())) { /* persist conns */ + && (cid == rte_get_main_lcore())) { /* persist conns */ rte_spinlock_lock(&dp_vs_ct_lock); res = __lcore_conn_table_dump(dp_vs_ct_tbl); rte_spinlock_unlock(&dp_vs_ct_lock); @@ -1787,8 +1787,8 @@ int dp_vs_conn_init(void) * RTE_PER_LCORE() can only access own instances. * it make codes looks strange. */ - rte_eal_mp_remote_launch(conn_init_lcore, NULL, SKIP_MASTER); - RTE_LCORE_FOREACH_SLAVE(lcore) { + rte_eal_mp_remote_launch(conn_init_lcore, NULL, SKIP_MAIN); + RTE_LCORE_FOREACH_WORKER(lcore) { if ((err = rte_eal_wait_lcore(lcore)) < 0) { RTE_LOG(WARNING, IPVS, "%s: lcore %d: %s.\n", __func__, lcore, dpvs_strerror(err)); @@ -1827,8 +1827,8 @@ int dp_vs_conn_term(void) /* no API opposite to rte_mempool_create() */ - rte_eal_mp_remote_launch(conn_term_lcore, NULL, SKIP_MASTER); - RTE_LCORE_FOREACH_SLAVE(lcore) { + rte_eal_mp_remote_launch(conn_term_lcore, NULL, SKIP_MAIN); + RTE_LCORE_FOREACH_WORKER(lcore) { rte_eal_wait_lcore(lcore); } diff --git a/src/ipvs/ip_vs_core.c b/src/ipvs/ip_vs_core.c index 4a75a8de9..63b09ab9a 100644 --- a/src/ipvs/ip_vs_core.c +++ b/src/ipvs/ip_vs_core.c @@ -43,7 +43,7 @@ static inline int dp_vs_fill_iphdr(int af, struct rte_mbuf *mbuf, struct dp_vs_iphdr *iph) { if (af == AF_INET) { - struct ipv4_hdr *ip4h = ip4_hdr(mbuf); + struct rte_ipv4_hdr *ip4h = ip4_hdr(mbuf); iph->af = AF_INET; iph->len = ip4_hdrlen(mbuf); iph->proto = ip4h->next_proto_id; @@ -425,7 +425,7 @@ static int __xmit_outbound_icmp4(struct rte_mbuf *mbuf, { struct flow4 fl4; struct route_entry *rt = NULL; - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); /* no translation needed for DR/TUN. */ if (conn->dest->fwdmode != DPVS_FWD_MODE_FNAT && @@ -451,7 +451,7 @@ static int __xmit_outbound_icmp4(struct rte_mbuf *mbuf, } if ((mbuf->pkt_len > rt->mtu) - && (ip4_hdr(mbuf)->fragment_offset & IPV4_HDR_DF_FLAG)) { + && (ip4_hdr(mbuf)->fragment_offset & RTE_IPV4_HDR_DF_FLAG)) { route4_put(rt); icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, htonl(rt->mtu)); @@ -459,9 +459,9 @@ static int __xmit_outbound_icmp4(struct rte_mbuf *mbuf, return EDPVS_FRAG; } - if (unlikely(mbuf->userdata != NULL)) - route4_put((struct route_entry *)mbuf->userdata); - mbuf->userdata = rt; + if (unlikely(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) != NULL)) + route4_put(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; /* translation for outer L3, ICMP, and inner L3 and L4 */ dp_vs_xmit_icmp(mbuf, prot, conn, DPVS_CONN_DIR_OUTBOUND); @@ -507,9 +507,9 @@ static int __xmit_outbound_icmp6(struct rte_mbuf *mbuf, return EDPVS_FRAG; } - if (unlikely(mbuf->userdata != NULL)) - route6_put((struct route6 *)mbuf->userdata); - mbuf->userdata = rt6; + if (unlikely(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) != NULL)) + route6_put(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt6; /* translation for outer L3, ICMP, and inner L3 and L4 */ dp_vs_xmit_icmp(mbuf, prot, conn, DPVS_CONN_DIR_OUTBOUND); @@ -538,7 +538,7 @@ static int __xmit_inbound_icmp4(struct rte_mbuf *mbuf, { struct flow4 fl4; struct route_entry *rt = NULL; - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); /* no translation needed for DR/TUN. */ if (conn->dest->fwdmode != DPVS_FWD_MODE_NAT && @@ -564,7 +564,7 @@ static int __xmit_inbound_icmp4(struct rte_mbuf *mbuf, } if ((mbuf->pkt_len > rt->mtu) - && (ip4_hdr(mbuf)->fragment_offset & IPV4_HDR_DF_FLAG)) { + && (ip4_hdr(mbuf)->fragment_offset & RTE_IPV4_HDR_DF_FLAG)) { route4_put(rt); icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, htonl(rt->mtu)); @@ -572,9 +572,9 @@ static int __xmit_inbound_icmp4(struct rte_mbuf *mbuf, return EDPVS_FRAG; } - if (unlikely(mbuf->userdata != NULL)) - route4_put((struct route_entry *)mbuf->userdata); - mbuf->userdata = rt; + if (unlikely(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) != NULL)) + route4_put(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; /* translation for outer L3, ICMP, and inner L3 and L4 */ dp_vs_xmit_icmp(mbuf, prot, conn, DPVS_CONN_DIR_INBOUND); @@ -621,9 +621,9 @@ static int __xmit_inbound_icmp6(struct rte_mbuf *mbuf, return EDPVS_FRAG; } - if (unlikely(mbuf->userdata != NULL)) - route6_put((struct route6 *)mbuf->userdata); - mbuf->userdata = rt6; + if (unlikely(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) != NULL)) + route6_put(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt6; /* translation for outer L3, ICMP, and inner L3 and L4 */ dp_vs_xmit_icmp(mbuf, prot, conn, DPVS_CONN_DIR_INBOUND); @@ -650,8 +650,8 @@ static int xmit_inbound_icmp(struct rte_mbuf *mbuf, static int __dp_vs_in_icmp4(struct rte_mbuf *mbuf, int *related) { struct icmphdr *ich, _icmph; - struct ipv4_hdr *iph = ip4_hdr(mbuf); - struct ipv4_hdr *ciph, _ciph; + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *ciph, _ciph; struct dp_vs_iphdr dciph; struct dp_vs_proto *prot; struct dp_vs_conn *conn; @@ -696,7 +696,7 @@ static int __dp_vs_in_icmp4(struct rte_mbuf *mbuf, int *related) if (!prot) return INET_ACCEPT; - if (unlikely((ciph->fragment_offset & htons(IPV4_HDR_OFFSET_MASK)))) { + if (unlikely((ciph->fragment_offset & htons(RTE_IPV4_HDR_OFFSET_MASK)))) { RTE_LOG(WARNING, IPVS, "%s: frag needed.\n", __func__); return INET_DROP; } @@ -707,7 +707,7 @@ static int __dp_vs_in_icmp4(struct rte_mbuf *mbuf, int *related) * and restore it later. although it looks strange. */ rte_pktmbuf_adj(mbuf, off); - if (mbuf_may_pull(mbuf, sizeof(struct ipv4_hdr)) != 0) + if (mbuf_may_pull(mbuf, sizeof(struct rte_ipv4_hdr)) != 0) return INET_DROP; dp_vs_fill_iphdr(AF_INET, mbuf, &dciph); @@ -719,7 +719,7 @@ static int __dp_vs_in_icmp4(struct rte_mbuf *mbuf, int *related) */ if (cid != peer_cid) { /* recover mbuf.data_off to outer Ether header */ - rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct ether_hdr) + off); + rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct rte_ether_hdr) + off); return dp_vs_redirect_pkt(mbuf, peer_cid); } @@ -862,7 +862,7 @@ static int __dp_vs_in_icmp6(struct rte_mbuf *mbuf, int *related) */ if (cid != peer_cid) { /* recover mbuf.data_off to outer Ether header */ - rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct ether_hdr) + off); + rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct rte_ether_hdr) + off); return dp_vs_redirect_pkt(mbuf, peer_cid); } @@ -995,7 +995,7 @@ static int __dp_vs_in(void *priv, struct rte_mbuf *mbuf, */ if (cid != peer_cid) { /* recover mbuf.data_off to outer Ether header */ - rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct ether_hdr)); + rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct rte_ether_hdr)); return dp_vs_redirect_pkt(mbuf, peer_cid); } diff --git a/src/ipvs/ip_vs_dest.c b/src/ipvs/ip_vs_dest.c index e75a3c3b7..98d24be59 100644 --- a/src/ipvs/ip_vs_dest.c +++ b/src/ipvs/ip_vs_dest.c @@ -63,7 +63,7 @@ static void __dp_vs_dest_update(struct dp_vs_service *svc, if (udest->max_conn == 0 || udest->max_conn > dest->max_conn) dest->flags &= ~DPVS_DEST_F_OVERLOAD; - if (rte_lcore_id() != rte_get_master_lcore()) { + if (rte_lcore_id() != rte_get_main_lcore()) { dest->max_conn = udest->max_conn / num_lcores; dest->min_conn = udest->min_conn / num_lcores; } else { diff --git a/src/ipvs/ip_vs_laddr.c b/src/ipvs/ip_vs_laddr.c index 543bce257..9d4cad725 100644 --- a/src/ipvs/ip_vs_laddr.c +++ b/src/ipvs/ip_vs_laddr.c @@ -439,7 +439,7 @@ static int laddr_sockopt_set(sockoptid_t opt, const void *conf, size_t size) lcoreid_t cid = rte_lcore_id(); // send to slave core - if (cid == rte_get_master_lcore()) { + if (cid == rte_get_main_lcore()) { struct dpvs_msg *msg; msg = msg_make(set_opt_so2msg(opt), laddr_msg_seq(), DPVS_MSG_MULTICAST, cid, size, conf); @@ -608,7 +608,7 @@ static int laddr_sockopt_get(sockoptid_t opt, const void *conf, size_t size, return EDPVS_MSG_FAIL; } - if (cid == rte_get_master_lcore()) { + if (cid == rte_get_main_lcore()) { if (dp_vs_match_parse(laddr_conf->srange, laddr_conf->drange, laddr_conf->iifname, laddr_conf->oifname, laddr_conf->af_s, &match) != EDPVS_OK) { diff --git a/src/ipvs/ip_vs_nat64.c b/src/ipvs/ip_vs_nat64.c index 7ea4d637e..e9a827b60 100644 --- a/src/ipvs/ip_vs_nat64.c +++ b/src/ipvs/ip_vs_nat64.c @@ -25,7 +25,7 @@ int mbuf_6to4(struct rte_mbuf *mbuf, const struct in_addr *daddr) { struct ip6_hdr *ip6h = ip6_hdr(mbuf); - struct ipv4_hdr *ip4h; + struct rte_ipv4_hdr *ip4h; uint8_t next_prot; uint8_t ttl; @@ -43,14 +43,14 @@ int mbuf_6to4(struct rte_mbuf *mbuf, next_prot = ip6h->ip6_nxt; ttl = ip6h->ip6_hlim; - ip4h = (struct ipv4_hdr *)rte_pktmbuf_prepend(mbuf, sizeof(struct ipv4_hdr)); + ip4h = (struct rte_ipv4_hdr *)rte_pktmbuf_prepend(mbuf, sizeof(struct rte_ipv4_hdr)); if (!ip4h) return EDPVS_NOROOM; ip4h->version_ihl = ((4 << 4) | 5); ip4h->type_of_service = 0; ip4h->total_length = htons(mbuf->pkt_len); - ip4h->fragment_offset = htons(IPV4_HDR_DF_FLAG); + ip4h->fragment_offset = htons(RTE_IPV4_HDR_DF_FLAG); ip4h->time_to_live = ttl; ip4h->next_proto_id = next_prot; ip4h->hdr_checksum = 0; @@ -58,7 +58,7 @@ int mbuf_6to4(struct rte_mbuf *mbuf, ip4h->dst_addr = daddr->s_addr; ip4h->packet_id = 0; // NO FRAG, so 0 is OK? - mbuf->l3_len = sizeof(struct ipv4_hdr); + mbuf->l3_len = sizeof(struct rte_ipv4_hdr); return EDPVS_OK; } @@ -67,13 +67,13 @@ int mbuf_4to6(struct rte_mbuf *mbuf, const struct in6_addr *saddr, const struct in6_addr *daddr) { - struct ipv4_hdr *ip4h = ip4_hdr(mbuf); + struct rte_ipv4_hdr *ip4h = ip4_hdr(mbuf); struct ip6_hdr *ip6h; uint16_t plen; uint8_t hops; uint8_t next_prot; - if (mbuf->l3_len != sizeof(struct ipv4_hdr)) { + if (mbuf->l3_len != sizeof(struct rte_ipv4_hdr)) { return EDPVS_NOTSUPP; } if (rte_pktmbuf_adj(mbuf, mbuf->l3_len) == NULL) diff --git a/src/ipvs/ip_vs_proto_tcp.c b/src/ipvs/ip_vs_proto_tcp.c index 361d435b0..863746c67 100644 --- a/src/ipvs/ip_vs_proto_tcp.c +++ b/src/ipvs/ip_vs_proto_tcp.c @@ -139,7 +139,7 @@ inline struct tcphdr *tcp_hdr(const struct rte_mbuf *mbuf) * @th: pointer to the beginning of the L4 header * @return void */ -inline void tcp4_send_csum(struct ipv4_hdr *iph, struct tcphdr *th) +inline void tcp4_send_csum(struct rte_ipv4_hdr *iph, struct tcphdr *th) { th->check = 0; th->check = rte_ipv4_udptcp_cksum(iph, th); @@ -151,7 +151,7 @@ inline void tcp4_send_csum(struct ipv4_hdr *iph, struct tcphdr *th) * @th: pointer to the beginning of the L4 header * @return void */ -inline void tcp6_send_csum(struct ipv6_hdr *iph, struct tcphdr *th) { +inline void tcp6_send_csum(struct rte_ipv6_hdr *iph, struct tcphdr *th) { th->check = 0; th->check = ip6_udptcp_cksum((struct ip6_hdr *)iph, th, (void *)th - (void *)iph, IPPROTO_TCP); @@ -165,7 +165,7 @@ static inline int tcp_send_csum(int af, int iphdrlen, struct tcphdr *th, struct netif_port *dev = NULL; if (AF_INET6 == af) { - struct route6 *rt6 = mbuf->userdata; + struct route6 *rt6 = MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE); struct ip6_hdr *ip6h = ip6_hdr(mbuf); if (rt6 && rt6->rt6_dev) dev = rt6->rt6_dev; @@ -179,11 +179,11 @@ static inline int tcp_send_csum(int af, int iphdrlen, struct tcphdr *th, } else { if (mbuf_may_pull(mbuf, mbuf->pkt_len) != 0) return EDPVS_INVPKT; - tcp6_send_csum((struct ipv6_hdr *)ip6h, th); + tcp6_send_csum((struct rte_ipv6_hdr *)ip6h, th); } } else { /* AF_INET */ - struct route_entry *rt = mbuf->userdata; - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct route_entry *rt = MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); if (rt && rt->port) dev = rt->port; else if (conn->out_dev) @@ -319,9 +319,11 @@ static inline int tcp_in_add_toa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf, * check if we can add the new option */ /* skb length and tcp option length checking */ - if (tuplehash_out(conn).af == AF_INET && (rt = mbuf->userdata) != NULL) { + if (tuplehash_out(conn).af == AF_INET && (rt = MBUF_USERDATA(mbuf, + struct route_entry *, MBUF_FIELD_ROUTE)) != NULL) { mtu = rt->mtu; - } else if (tuplehash_out(conn).af == AF_INET6 && (rt6 = mbuf->userdata) != NULL) { + } else if (tuplehash_out(conn).af == AF_INET6 && (rt6 = MBUF_USERDATA(mbuf, + struct route6 *, MBUF_FIELD_ROUTE)) != NULL) { mtu = rt6->rt6_mtu; } else if (conn->in_dev) { /* no route for fast-xmit */ mtu = conn->in_dev->mtu; @@ -990,7 +992,7 @@ static int tcp_send_rst(struct dp_vs_proto *proto, struct rte_mempool *pool; struct rte_mbuf *mbuf = NULL; struct tcphdr *th; - struct ipv4_hdr *ip4h; + struct rte_ipv4_hdr *ip4h; struct ip6_hdr *ip6h; if (conn->state != DPVS_TCP_S_ESTABLISHED) { @@ -1005,7 +1007,7 @@ static int tcp_send_rst(struct dp_vs_proto *proto, mbuf = rte_pktmbuf_alloc(pool); if (!mbuf) return EDPVS_NOMEM; - mbuf->userdata = NULL; /* make sure "no route info" */ + mbuf_userdata_reset(mbuf); /* make sure "no route info" */ /* * reserve head room ? @@ -1041,8 +1043,8 @@ static int tcp_send_rst(struct dp_vs_proto *proto, /* IP header (before translation) */ if (dir == DPVS_CONN_DIR_INBOUND) { if (tuplehash_in(conn).af == AF_INET) { - ip4h = (struct ipv4_hdr *)rte_pktmbuf_prepend(mbuf, - sizeof(struct ipv4_hdr)); + ip4h = (struct rte_ipv4_hdr *)rte_pktmbuf_prepend(mbuf, + sizeof(struct rte_ipv4_hdr)); if (!ip4h) { rte_pktmbuf_free(mbuf); return EDPVS_NOROOM; @@ -1050,7 +1052,7 @@ static int tcp_send_rst(struct dp_vs_proto *proto, ip4h->version_ihl = 0x45; ip4h->total_length = htons(mbuf->pkt_len); ip4h->packet_id = 0; - ip4h->fragment_offset = htons(IPV4_HDR_DF_FLAG); + ip4h->fragment_offset = htons(RTE_IPV4_HDR_DF_FLAG); ip4h->time_to_live = 64; ip4h->next_proto_id = IPPROTO_TCP; ip4h->src_addr = conn->caddr.in.s_addr; @@ -1079,15 +1081,15 @@ static int tcp_send_rst(struct dp_vs_proto *proto, mbuf->l3_len = sizeof(*ip6h); - tcp6_send_csum((struct ipv6_hdr *)ip6h, th); + tcp6_send_csum((struct rte_ipv6_hdr *)ip6h, th); } conn->packet_xmit(proto, conn, mbuf); } else { if (tuplehash_out(conn).af == AF_INET) { - ip4h = (struct ipv4_hdr *)rte_pktmbuf_prepend(mbuf, - sizeof(struct ipv4_hdr)); + ip4h = (struct rte_ipv4_hdr *)rte_pktmbuf_prepend(mbuf, + sizeof(struct rte_ipv4_hdr)); if (!ip4h) { rte_pktmbuf_free(mbuf); return EDPVS_NOROOM; @@ -1095,7 +1097,7 @@ static int tcp_send_rst(struct dp_vs_proto *proto, ip4h->version_ihl = 0x45; ip4h->total_length = htons(mbuf->pkt_len); ip4h->packet_id = 0; - ip4h->fragment_offset = htons(IPV4_HDR_DF_FLAG); + ip4h->fragment_offset = htons(RTE_IPV4_HDR_DF_FLAG); ip4h->time_to_live = 64; ip4h->next_proto_id = IPPROTO_TCP; ip4h->src_addr = conn->daddr.in.s_addr; @@ -1124,7 +1126,7 @@ static int tcp_send_rst(struct dp_vs_proto *proto, mbuf->l3_len = sizeof(*ip6h); - tcp6_send_csum((struct ipv6_hdr *)ip6h, th); + tcp6_send_csum((struct rte_ipv6_hdr *)ip6h, th); } conn->packet_out_xmit(proto, conn, mbuf); diff --git a/src/ipvs/ip_vs_proto_udp.c b/src/ipvs/ip_vs_proto_udp.c index e098d2774..b868d47e9 100644 --- a/src/ipvs/ip_vs_proto_udp.c +++ b/src/ipvs/ip_vs_proto_udp.c @@ -63,20 +63,20 @@ static int udp_timeouts[DPVS_UDP_S_LAST + 1] = { [DPVS_UDP_S_LAST] = 2, }; -inline void udp4_send_csum(struct ipv4_hdr *iph, struct udp_hdr *uh) +inline void udp4_send_csum(struct rte_ipv4_hdr *iph, struct rte_udp_hdr *uh) { uh->dgram_cksum = 0; uh->dgram_cksum = rte_ipv4_udptcp_cksum(iph, uh); } -inline void udp6_send_csum(struct ipv6_hdr *iph, struct udp_hdr *uh) +inline void udp6_send_csum(struct rte_ipv6_hdr *iph, struct rte_udp_hdr *uh) { uh->dgram_cksum = 0; uh->dgram_cksum = ip6_udptcp_cksum((struct ip6_hdr *)iph, (struct udphdr *)uh, (void *)uh - (void *)iph, IPPROTO_UDP); } -static inline int udp_send_csum(int af, int iphdrlen, struct udp_hdr *uh, +static inline int udp_send_csum(int af, int iphdrlen, struct rte_udp_hdr *uh, const struct dp_vs_conn *conn, struct rte_mbuf *mbuf, const struct opphdr *opp) { @@ -88,28 +88,28 @@ static inline int udp_send_csum(int af, int iphdrlen, struct udp_hdr *uh, /* UDP checksum is mandatory for IPv6.[RFC 2460] */ struct ip6_hdr *ip6h = ip6_hdr(mbuf); if (unlikely(opp != NULL)) { - udp6_send_csum((struct ipv6_hdr*)ip6h, uh); + udp6_send_csum((struct rte_ipv6_hdr*)ip6h, uh); } else { - struct route6 *rt6 = mbuf->userdata; + struct route6 *rt6 = MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE); if (rt6 && rt6->rt6_dev) dev = rt6->rt6_dev; else if (conn->out_dev) dev = conn->out_dev; if (likely(dev && (dev->flag & NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD))) { mbuf->l3_len = iphdrlen; - mbuf->l4_len = sizeof(struct udp_hdr); + mbuf->l4_len = sizeof(struct rte_udp_hdr); mbuf->ol_flags |= (PKT_TX_UDP_CKSUM | PKT_TX_IPV6); uh->dgram_cksum = ip6_phdr_cksum(ip6h, mbuf->ol_flags, iphdrlen, IPPROTO_UDP); } else { if (mbuf_may_pull(mbuf, mbuf->pkt_len) != 0) return EDPVS_INVPKT; - udp6_send_csum((struct ipv6_hdr*)ip6h, uh); + udp6_send_csum((struct rte_ipv6_hdr*)ip6h, uh); } } } else { /* AF_INET */ /* UDP checksum is not mandatory for IPv4. */ - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); if (unlikely(opp != NULL)) { /* * XXX: UDP pseudo header need UDP length, but the common helper function @@ -123,14 +123,14 @@ static inline int udp_send_csum(int af, int iphdrlen, struct udp_hdr *uh, */ uh->dgram_cksum = 0; } else { - struct route_entry *rt = mbuf->userdata; + struct route_entry *rt = MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE); if (rt && rt->port) dev = rt->port; else if (conn->out_dev) dev = conn->out_dev; if (likely(dev && (dev->flag & NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD))) { mbuf->l3_len = iphdrlen; - mbuf->l4_len = sizeof(struct udp_hdr); + mbuf->l4_len = sizeof(struct rte_udp_hdr); mbuf->ol_flags |= (PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM | PKT_TX_IPV4); uh->dgram_cksum = rte_ipv4_phdr_cksum(iph, mbuf->ol_flags); } else { @@ -149,7 +149,7 @@ static int udp_conn_sched(struct dp_vs_proto *proto, struct dp_vs_conn **conn, int *verdict) { - struct udp_hdr *uh, _udph; + struct rte_udp_hdr *uh, _udph; struct dp_vs_service *svc; bool outwall = false; assert(proto && iph && mbuf && conn && verdict); @@ -199,7 +199,7 @@ udp_conn_lookup(struct dp_vs_proto *proto, struct rte_mbuf *mbuf, int *direct, bool reverse, bool *drop, lcoreid_t *peer_cid) { - struct udp_hdr *uh, _udph; + struct rte_udp_hdr *uh, _udph; struct dp_vs_conn *conn; assert(proto && iph && mbuf); @@ -285,7 +285,8 @@ static int send_standalone_uoa(const struct dp_vs_conn *conn, int iaf = tuplehash_in(conn).af; int oaf = tuplehash_out(conn).af; - assert(conn && ombuf && oiph && ouh && ombuf->userdata); + assert(conn && ombuf && oiph && ouh && + MBUF_USERDATA_CONST(ombuf, void *, MBUF_FIELD_ROUTE)); /* just in case */ if (unlikely(conn->dest->fwdmode != DPVS_FWD_MODE_FNAT)) @@ -294,7 +295,7 @@ static int send_standalone_uoa(const struct dp_vs_conn *conn, mbuf = rte_pktmbuf_alloc(ombuf->pool); if (unlikely(!mbuf)) return EDPVS_NOMEM; - mbuf->userdata = NULL; + MBUF_USERDATA(mbuf, void *, MBUF_FIELD_ROUTE) = NULL; int ipolen_uoa = (AF_INET6 == iaf) ? IPOLEN_UOA_IPV6 : IPOLEN_UOA_IPV4; @@ -315,7 +316,7 @@ static int send_standalone_uoa(const struct dp_vs_conn *conn, goto no_room; ((struct iphdr *)iph)->version = 4; ((struct iphdr *)iph)->tos = ((struct iphdr *)oiph)->tos; - ((struct iphdr *)iph)->id = ip4_select_id((struct ipv4_hdr *)iph); + ((struct iphdr *)iph)->id = ip4_select_id((struct rte_ipv4_hdr *)iph); ((struct iphdr *)iph)->frag_off = 0; ((struct iphdr *)iph)->ttl = ((struct iphdr *)oiph)->ttl; ((struct iphdr *)iph)->saddr = conn->laddr.in.s_addr; @@ -397,14 +398,16 @@ static int send_standalone_uoa(const struct dp_vs_conn *conn, * if udp checksum error here, may cause tcpdump & uoa moudule parse packets * correctly, however socket can not receive L4 data. */ - udp6_send_csum((struct ipv6_hdr *)iph, (struct udp_hdr*)uh); - mbuf->userdata = rt6 = (struct route6*)ombuf->userdata; + udp6_send_csum((struct rte_ipv6_hdr *)iph, (struct rte_udp_hdr*)uh); + rt6 = MBUF_USERDATA_CONST(ombuf, struct route6 *, MBUF_FIELD_ROUTE); + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt6; route6_get(rt6); return ip6_local_out(mbuf); } else { /* IPv4 */ struct route_entry *rt; uh->check = 0; /* rte_ipv4_udptcp_cksum fails if opp inserted. */ - mbuf->userdata = rt = (struct route_entry *)ombuf->userdata; + rt = MBUF_USERDATA_CONST(ombuf, struct route_entry *, MBUF_FIELD_ROUTE); + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; route4_get(rt); return ipv4_local_out(mbuf); } @@ -501,7 +504,7 @@ static int insert_opp_uoa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf, * basic header length (40 B) + payload length(including ext header) */ iphdrlen = ip6_hdrlen(mbuf); - if (iphdrlen != sizeof(struct ipv6_hdr)) + if (iphdrlen != sizeof(struct rte_ipv6_hdr)) goto standalone_uoa; iptot_len = sizeof(struct ip6_hdr) + ntohs(((struct ip6_hdr *)iph)->ip6_plen); @@ -618,7 +621,7 @@ static int udp_insert_uoa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf, return EDPVS_OK; } - rt = mbuf->userdata; + rt = MBUF_USERDATA(mbuf, void *, MBUF_FIELD_ROUTE); if (!rt) { RTE_LOG(ERR, IPVS, "%s: no route\n", __func__); return EDPVS_INVPKT; @@ -677,7 +680,7 @@ static int udp_fnat_in_handler(struct dp_vs_proto *proto, struct dp_vs_conn *conn, struct rte_mbuf *mbuf) { - struct udp_hdr *uh = NULL; + struct rte_udp_hdr *uh = NULL; struct opphdr *opp = NULL; void *iph = NULL; /* af/mbuf may be changed for nat64 which in af is ipv6 and out is ipv4 */ @@ -699,14 +702,14 @@ static int udp_fnat_in_handler(struct dp_vs_proto *proto, } /* cannot use mbuf_header_pointer() */ - if (unlikely(mbuf->data_len < iphdrlen + sizeof(struct udp_hdr))) + if (unlikely(mbuf->data_len < iphdrlen + sizeof(struct rte_udp_hdr))) return EDPVS_INVPKT; if (nxt_proto == IPPROTO_UDP) { - uh = (struct udp_hdr *)(iph + iphdrlen); + uh = (struct rte_udp_hdr *)(iph + iphdrlen); } else if (nxt_proto == IPPROTO_OPT) { opp = (struct opphdr *)(iph + iphdrlen); - uh = (struct udp_hdr *)((void *)opp + ntohs(opp->length)); + uh = (struct rte_udp_hdr *)((void *)opp + ntohs(opp->length)); } if (unlikely(!uh)) @@ -722,15 +725,15 @@ static int udp_fnat_out_handler(struct dp_vs_proto *proto, struct dp_vs_conn *conn, struct rte_mbuf *mbuf) { - struct udp_hdr *uh; + struct rte_udp_hdr *uh; /* af/mbuf may be changed for nat64 which in af is ipv6 and out is ipv4 */ int af = tuplehash_in(conn).af; int iphdrlen = ((AF_INET6 == af) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf)); /* cannot use mbuf_header_pointer() */ - if (unlikely(mbuf->data_len < iphdrlen + sizeof(struct udp_hdr))) + if (unlikely(mbuf->data_len < iphdrlen + sizeof(struct rte_udp_hdr))) return EDPVS_INVPKT; - uh = rte_pktmbuf_mtod_offset(mbuf, struct udp_hdr *, iphdrlen); + uh = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *, iphdrlen); if (unlikely(!uh)) return EDPVS_INVPKT; @@ -756,14 +759,14 @@ static int udp_snat_in_handler(struct dp_vs_proto *proto, struct dp_vs_conn *conn, struct rte_mbuf *mbuf) { - struct udp_hdr *uh; + struct rte_udp_hdr *uh; int af = conn->af; int iphdrlen = ((AF_INET6 == af) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf)); /* cannot use mbuf_header_pointer() */ - if (unlikely(mbuf->data_len < iphdrlen + sizeof(struct udp_hdr))) + if (unlikely(mbuf->data_len < iphdrlen + sizeof(struct rte_udp_hdr))) return EDPVS_INVPKT; - uh = rte_pktmbuf_mtod_offset(mbuf, struct udp_hdr *, iphdrlen); + uh = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *, iphdrlen); if (unlikely(!uh)) return EDPVS_INVPKT; @@ -776,14 +779,14 @@ static int udp_snat_out_handler(struct dp_vs_proto *proto, struct dp_vs_conn *conn, struct rte_mbuf *mbuf) { - struct udp_hdr *uh; + struct rte_udp_hdr *uh; int af = conn->af; int iphdrlen = ((AF_INET6 == af) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf)); /* cannot use mbuf_header_pointer() */ - if (unlikely(mbuf->data_len < iphdrlen + sizeof(struct udp_hdr))) + if (unlikely(mbuf->data_len < iphdrlen + sizeof(struct rte_udp_hdr))) return EDPVS_INVPKT; - uh = rte_pktmbuf_mtod_offset(mbuf, struct udp_hdr *, iphdrlen); + uh = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *, iphdrlen); if (unlikely(!uh)) return EDPVS_INVPKT; diff --git a/src/ipvs/ip_vs_service.c b/src/ipvs/ip_vs_service.c index 536f4091c..424f0eab5 100644 --- a/src/ipvs/ip_vs_service.c +++ b/src/ipvs/ip_vs_service.c @@ -199,8 +199,8 @@ static inline bool __service_in_range(int af, static struct dp_vs_service * __dp_vs_service_match_get4(const struct rte_mbuf *mbuf, bool *outwall, lcoreid_t cid) { - struct route_entry *rt = mbuf->userdata; - struct ipv4_hdr *iph = ip4_hdr(mbuf); /* ipv4 only */ + struct route_entry *rt = MBUF_USERDATA_CONST(mbuf, struct route_entry *, MBUF_FIELD_ROUTE); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); /* ipv4 only */ struct dp_vs_service *svc; union inet_addr saddr, daddr; __be16 _ports[2], *ports; @@ -267,7 +267,7 @@ __dp_vs_service_match_get4(const struct rte_mbuf *mbuf, bool *outwall, lcoreid_t static struct dp_vs_service * __dp_vs_service_match_get6(const struct rte_mbuf *mbuf, lcoreid_t cid) { - struct route6 *rt = mbuf->userdata; + struct route6 *rt = MBUF_USERDATA_CONST(mbuf, struct route6 *, MBUF_FIELD_ROUTE); struct ip6_hdr *iph = ip6_hdr(mbuf); uint8_t ip6nxt = iph->ip6_nxt; struct dp_vs_service *svc; @@ -299,7 +299,7 @@ __dp_vs_service_match_get6(const struct rte_mbuf *mbuf, lcoreid_t cid) if (!rt) return NULL; - /* set mbuf->userdata to @rt as side-effect is not good! + /* set mbuf userdata(MBUF_FIELD_ROUTE) to @rt as side-effect is not good! * although route will done again when out-xmit. */ if ((rt->rt6_flags & RTF_KNI) || (rt->rt6_flags & RTF_LOCALIN)) { route6_put(rt); @@ -914,13 +914,13 @@ static int dp_vs_service_set(sockoptid_t opt, const void *user, size_t len) struct in_addr *vip; lcoreid_t cid = rte_lcore_id(); - if (opt == DPVS_SO_SET_GRATARP && cid == rte_get_master_lcore()){ + if (opt == DPVS_SO_SET_GRATARP && cid == rte_get_main_lcore()){ vip = (struct in_addr *)user; return gratuitous_arp_send_vip(vip); } // send to slave core - if (cid == rte_get_master_lcore()) { + if (cid == rte_get_main_lcore()) { struct dpvs_msg *msg; msg = msg_make(set_opt_so2msg(opt), svc_msg_seq(), DPVS_MSG_MULTICAST, cid, len, user); @@ -1262,7 +1262,7 @@ static int dp_vs_service_get(sockoptid_t opt, const void *user, size_t len, void return EDPVS_MSG_FAIL; } - if (cid == rte_get_master_lcore()) { + if (cid == rte_get_main_lcore()) { output = rte_zmalloc("get_services", size, 0); if (unlikely(NULL == output)) { msg_destroy(&msg); @@ -1331,7 +1331,7 @@ static int dp_vs_service_get(sockoptid_t opt, const void *user, size_t len, void return EDPVS_MSG_FAIL; } - if (cid == rte_get_master_lcore()) { + if (cid == rte_get_main_lcore()) { svc = dp_vs_service_get_lcore(entry, cid); if (!svc) { msg_destroy(&msg); @@ -1426,7 +1426,7 @@ static int dp_vs_service_get(sockoptid_t opt, const void *user, size_t len, void return EDPVS_MSG_FAIL; } - if (cid == rte_get_master_lcore()) { + if (cid == rte_get_main_lcore()) { svc = dp_vs_service_get_lcore(&entry, cid); if (!svc) { msg_destroy(&msg); diff --git a/src/ipvs/ip_vs_synproxy.c b/src/ipvs/ip_vs_synproxy.c index 17efe7502..19e3003f5 100644 --- a/src/ipvs/ip_vs_synproxy.c +++ b/src/ipvs/ip_vs_synproxy.c @@ -633,7 +633,7 @@ static void syn_proxy_reuse_mbuf(int af, struct rte_mbuf *mbuf, } else { if (mbuf_may_pull(mbuf, mbuf->pkt_len) != 0) return; - tcp6_send_csum((struct ipv6_hdr*)ip6h, th); + tcp6_send_csum((struct rte_ipv6_hdr*)ip6h, th); } } else { uint32_t tmpaddr; @@ -649,17 +649,17 @@ static void syn_proxy_reuse_mbuf(int af, struct rte_mbuf *mbuf, if (likely(mbuf->ol_flags & PKT_TX_TCP_CKSUM)) { mbuf->l3_len = iphlen; mbuf->l4_len = (th->doff << 2); - th->check = rte_ipv4_phdr_cksum((struct ipv4_hdr*)iph, mbuf->ol_flags); + th->check = rte_ipv4_phdr_cksum((struct rte_ipv4_hdr*)iph, mbuf->ol_flags); } else { if (mbuf_may_pull(mbuf, mbuf->pkt_len) != 0) return; - tcp4_send_csum((struct ipv4_hdr*)iph, th); + tcp4_send_csum((struct rte_ipv4_hdr*)iph, th); } if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) iph->check = 0; else - ip4_send_csum((struct ipv4_hdr*)iph); + ip4_send_csum((struct rte_ipv4_hdr*)iph); } } @@ -682,8 +682,8 @@ int dp_vs_synproxy_syn_rcv(int af, struct rte_mbuf *mbuf, struct tcphdr *th, _tcph; struct dp_vs_synproxy_opt tcp_opt; struct netif_port *dev; - struct ether_hdr *eth; - struct ether_addr ethaddr; + struct rte_ether_hdr *eth; + struct rte_ether_addr ethaddr; th = mbuf_header_pointer(mbuf, iph->len, sizeof(_tcph), &_tcph); if (unlikely(NULL == th)) @@ -716,7 +716,7 @@ int dp_vs_synproxy_syn_rcv(int af, struct rte_mbuf *mbuf, /* mbuf will be reused and ether header will be set. * FIXME: to support non-ether packets. */ - if (mbuf->l2_len != sizeof(struct ether_hdr)) + if (mbuf->l2_len != sizeof(struct rte_ether_hdr)) goto syn_rcv_out; /* update statistics */ @@ -743,14 +743,14 @@ int dp_vs_synproxy_syn_rcv(int af, struct rte_mbuf *mbuf, /* set L2 header and send the packet out * It is noted that "ipv4_xmit" should not used here, * because mbuf is reused. */ - eth = (struct ether_hdr *)rte_pktmbuf_prepend(mbuf, mbuf->l2_len); + eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, mbuf->l2_len); if (unlikely(!eth)) { RTE_LOG(ERR, IPVS, "%s: no memory\n", __func__); goto syn_rcv_out; } - memcpy(ðaddr, ð->s_addr, sizeof(struct ether_addr)); - memcpy(ð->s_addr, ð->d_addr, sizeof(struct ether_addr)); - memcpy(ð->d_addr, ðaddr, sizeof(struct ether_addr)); + memcpy(ðaddr, ð->s_addr, sizeof(struct rte_ether_addr)); + memcpy(ð->s_addr, ð->d_addr, sizeof(struct rte_ether_addr)); + memcpy(ð->d_addr, ðaddr, sizeof(struct rte_ether_addr)); if (unlikely(EDPVS_OK != (ret = netif_xmit(mbuf, dev)))) { RTE_LOG(ERR, IPVS, "%s: netif_xmit failed -- %s\n", @@ -842,7 +842,7 @@ static int syn_proxy_send_rs_syn(int af, const struct tcphdr *th, //RTE_LOG(WARNING, IPVS, "%s: %s\n", __func__, dpvs_strerror(EDPVS_NOMEM)); return EDPVS_NOMEM; } - syn_mbuf->userdata = NULL; /* make sure "no route info" */ + mbuf_userdata_reset(syn_mbuf); /* make sure "no route info" */ /* Reserve space for tcp header */ tcp_hdr_size = (sizeof(struct tcphdr) + TCPOLEN_MAXSEG @@ -899,7 +899,7 @@ static int syn_proxy_send_rs_syn(int af, const struct tcphdr *th, struct iphdr *syn_iph; /* Reserve space for ipv4 header */ - syn_iph = (struct iphdr *)rte_pktmbuf_prepend(syn_mbuf, sizeof(struct ipv4_hdr)); + syn_iph = (struct iphdr *)rte_pktmbuf_prepend(syn_mbuf, sizeof(struct rte_ipv4_hdr)); if (!syn_iph) { rte_pktmbuf_free(syn_mbuf); //RTE_LOG(WARNING, IPVS, "%s:%s\n", __func__, dpvs_strerror(EDPVS_NOROOM)); @@ -909,7 +909,7 @@ static int syn_proxy_send_rs_syn(int af, const struct tcphdr *th, ack_iph = (struct iphdr *)ip4_hdr(mbuf); *((uint16_t *) syn_iph) = htons((4 << 12) | (5 << 8) | (ack_iph->tos & 0x1E)); syn_iph->tot_len = htons(syn_mbuf->pkt_len); - syn_iph->frag_off = htons(IPV4_HDR_DF_FLAG); + syn_iph->frag_off = htons(RTE_IPV4_HDR_DF_FLAG); syn_iph->ttl = 64; syn_iph->protocol = IPPROTO_TCP; syn_iph->saddr = ack_iph->saddr; @@ -930,7 +930,7 @@ static int syn_proxy_send_rs_syn(int af, const struct tcphdr *th, return EDPVS_NOMEM; } - syn_mbuf_cloned->userdata = NULL; + mbuf_userdata_reset(syn_mbuf_cloned); cp->syn_mbuf = syn_mbuf_cloned; sp_dbg_stats32_inc(sp_syn_saved); rte_atomic32_set(&cp->syn_retry_max, dp_vs_synproxy_ctrl_syn_retry); @@ -1029,7 +1029,7 @@ static int syn_proxy_build_tcp_rst(int af, struct rte_mbuf *mbuf, } else { if (mbuf_may_pull(mbuf, mbuf->pkt_len) != 0) return EDPVS_INVPKT; - tcp6_send_csum((struct ipv6_hdr*)ip6h, th); + tcp6_send_csum((struct rte_ipv6_hdr*)ip6h, th); } } else { uint32_t tmpaddr; @@ -1046,17 +1046,17 @@ static int syn_proxy_build_tcp_rst(int af, struct rte_mbuf *mbuf, if (likely(mbuf->ol_flags & PKT_TX_TCP_CKSUM)) { mbuf->l3_len = l3_len; mbuf->l4_len = l4_len; - th->check = rte_ipv4_phdr_cksum((struct ipv4_hdr*)ip4h, mbuf->ol_flags); + th->check = rte_ipv4_phdr_cksum((struct rte_ipv4_hdr*)ip4h, mbuf->ol_flags); } else { if (mbuf_may_pull(mbuf, mbuf->pkt_len) != 0) return EDPVS_INVPKT; - tcp4_send_csum((struct ipv4_hdr*)ip4h, th); + tcp4_send_csum((struct rte_ipv4_hdr*)ip4h, th); } if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) ip4h->check = 0; else - ip4_send_csum((struct ipv4_hdr*)ip4h); + ip4_send_csum((struct rte_ipv4_hdr*)ip4h); } return EDPVS_OK; @@ -1068,8 +1068,8 @@ static int syn_proxy_send_tcp_rst(int af, struct rte_mbuf *mbuf) { struct tcphdr *th; struct netif_port *dev; - struct ether_hdr *eth; - struct ether_addr ethaddr; + struct rte_ether_hdr *eth; + struct rte_ether_addr ethaddr; uint32_t l3_len, l4_len; void *l3_hdr; @@ -1096,19 +1096,19 @@ static int syn_proxy_send_tcp_rst(int af, struct rte_mbuf *mbuf) th, l3_len, l4_len)) return EDPVS_INVPKT; - if (mbuf->l2_len < sizeof(struct ether_hdr)) + if (mbuf->l2_len < sizeof(struct rte_ether_hdr)) return EDPVS_INVPKT; /* set L2 header and send the packet out * It is noted that "ipv4_xmit" should not used here, * because mbuf is reused. */ - eth = (struct ether_hdr *)rte_pktmbuf_prepend(mbuf, mbuf->l2_len); + eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, mbuf->l2_len); if (unlikely(!eth)) { RTE_LOG(ERR, IPVS, "%s: no memory\n", __func__); return EDPVS_NOMEM; } - memcpy(ðaddr, ð->s_addr, sizeof(struct ether_addr)); - memcpy(ð->s_addr, ð->d_addr, sizeof(struct ether_addr)); - memcpy(ð->d_addr, ðaddr, sizeof(struct ether_addr)); + memcpy(ðaddr, ð->s_addr, sizeof(struct rte_ether_addr)); + memcpy(ð->s_addr, ð->d_addr, sizeof(struct rte_ether_addr)); + memcpy(ð->d_addr, ðaddr, sizeof(struct rte_ether_addr)); dev = netif_port_get(mbuf->port); if (unlikely(!dev)) { @@ -1316,7 +1316,7 @@ static int syn_proxy_send_window_update(int af, struct rte_mbuf *mbuf, struct dp RTE_LOG(WARNING, IPVS, "%s: %s\n", __func__, dpvs_strerror(EDPVS_NOMEM)); return EDPVS_NOMEM; } - ack_mbuf->userdata = NULL; + mbuf_userdata_reset(ack_mbuf); ack_th = (struct tcphdr *)rte_pktmbuf_prepend(ack_mbuf, sizeof(struct tcphdr)); if (!ack_th) { @@ -1351,22 +1351,22 @@ static int syn_proxy_send_window_update(int af, struct rte_mbuf *mbuf, struct dp ack_ip6h->ip6_nxt = NEXTHDR_TCP; ack_mbuf->l3_len = sizeof(*ack_ip6h); } else { - struct ipv4_hdr *ack_iph; - struct ipv4_hdr *reuse_iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *ack_iph; + struct rte_ipv4_hdr *reuse_iph = ip4_hdr(mbuf); int pkt_ack_len = sizeof(struct tcphdr) + sizeof(struct iphdr); /* Reserve space for ipv4 header */ - ack_iph = (struct ipv4_hdr *)rte_pktmbuf_prepend(ack_mbuf, sizeof(struct ipv4_hdr)); + ack_iph = (struct rte_ipv4_hdr *)rte_pktmbuf_prepend(ack_mbuf, sizeof(struct rte_ipv4_hdr)); if (!ack_iph) { rte_pktmbuf_free(ack_mbuf); RTE_LOG(WARNING, IPVS, "%s:%s\n", __func__, dpvs_strerror(EDPVS_NOROOM)); return EDPVS_NOROOM; } - memcpy(ack_iph, reuse_iph, sizeof(struct ipv4_hdr)); + memcpy(ack_iph, reuse_iph, sizeof(struct rte_ipv4_hdr)); /* version and ip header length */ ack_iph->version_ihl = 0x45; ack_iph->type_of_service = 0; - ack_iph->fragment_offset = htons(IPV4_HDR_DF_FLAG); + ack_iph->fragment_offset = htons(RTE_IPV4_HDR_DF_FLAG); ack_iph->total_length = htons(pkt_ack_len); ack_mbuf->l3_len = sizeof(*ack_iph); } diff --git a/src/ipvs/ip_vs_whtlst.c b/src/ipvs/ip_vs_whtlst.c index e0be714bf..b017af0f3 100644 --- a/src/ipvs/ip_vs_whtlst.c +++ b/src/ipvs/ip_vs_whtlst.c @@ -151,7 +151,7 @@ static int dp_vs_whtlst_add(int af, uint8_t proto, const union inet_addr *vaddr, struct dpvs_msg *msg; struct dp_vs_whtlst_conf cf; - if (cid != rte_get_master_lcore()) { + if (cid != rte_get_main_lcore()) { RTE_LOG(INFO, SERVICE, "[%s] must set from master lcore\n", __func__); return EDPVS_NOTSUPP; } @@ -194,7 +194,7 @@ static int dp_vs_whtlst_del(int af, uint8_t proto, const union inet_addr *vaddr, struct dpvs_msg *msg; struct dp_vs_whtlst_conf cf; - if (cid != rte_get_master_lcore()) { + if (cid != rte_get_main_lcore()) { RTE_LOG(INFO, SERVICE, "[%s] must set from master lcore\n", __func__); return EDPVS_NOTSUPP; } @@ -444,8 +444,8 @@ int dp_vs_whtlst_init(void) rte_atomic32_set(&this_num_whtlsts, 0); - rte_eal_mp_remote_launch(whtlst_lcore_init, NULL, CALL_MASTER); - RTE_LCORE_FOREACH_SLAVE(cid) { + rte_eal_mp_remote_launch(whtlst_lcore_init, NULL, CALL_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { if ((err = rte_eal_wait_lcore(cid)) < 0) { RTE_LOG(WARNING, SERVICE, "%s: lcore %d: %s.\n", __func__, cid, dpvs_strerror(err)); @@ -497,8 +497,8 @@ int dp_vs_whtlst_term(void) if ((err = sockopt_unregister(&whtlst_sockopts)) != EDPVS_OK) return err; - rte_eal_mp_remote_launch(whtlst_lcore_term, NULL, CALL_MASTER); - RTE_LCORE_FOREACH_SLAVE(cid) { + rte_eal_mp_remote_launch(whtlst_lcore_term, NULL, CALL_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { if ((err = rte_eal_wait_lcore(cid)) < 0) { RTE_LOG(WARNING, SERVICE, "%s: lcore %d: %s.\n", __func__, cid, dpvs_strerror(err)); diff --git a/src/ipvs/ip_vs_xmit.c b/src/ipvs/ip_vs_xmit.c index 375313f60..3208eeb1e 100644 --- a/src/ipvs/ip_vs_xmit.c +++ b/src/ipvs/ip_vs_xmit.c @@ -37,16 +37,16 @@ static int __dp_vs_fast_xmit_fnat4(struct dp_vs_proto *proto, struct dp_vs_conn *conn, struct rte_mbuf *mbuf) { - struct ipv4_hdr *ip4h = ip4_hdr(mbuf); - struct ether_hdr *eth; - uint16_t packet_type = ETHER_TYPE_IPv4; + struct rte_ipv4_hdr *ip4h = ip4_hdr(mbuf); + struct rte_ether_hdr *eth; + uint16_t packet_type = RTE_ETHER_TYPE_IPV4; int err; if (unlikely(conn->in_dev == NULL)) return EDPVS_NOROUTE; - if (unlikely(is_zero_ether_addr(&conn->in_dmac) || - is_zero_ether_addr(&conn->in_smac))) + if (unlikely(rte_is_zero_ether_addr(&conn->in_dmac) || + rte_is_zero_ether_addr(&conn->in_smac))) return EDPVS_NOTSUPP; /* pre-handler before translation */ @@ -78,10 +78,10 @@ static int __dp_vs_fast_xmit_fnat4(struct dp_vs_proto *proto, ip4_send_csum(ip4h); } - eth = (struct ether_hdr *)rte_pktmbuf_prepend(mbuf, - (uint16_t)sizeof(struct ether_hdr)); - ether_addr_copy(&conn->in_dmac, ð->d_addr); - ether_addr_copy(&conn->in_smac, ð->s_addr); + eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, + (uint16_t)sizeof(struct rte_ether_hdr)); + rte_ether_addr_copy(&conn->in_dmac, ð->d_addr); + rte_ether_addr_copy(&conn->in_smac, ð->s_addr); eth->ether_type = rte_cpu_to_be_16(packet_type); mbuf->packet_type = packet_type; @@ -98,15 +98,15 @@ static int __dp_vs_fast_xmit_fnat6(struct dp_vs_proto *proto, struct rte_mbuf *mbuf) { struct ip6_hdr *ip6h = ip6_hdr(mbuf); - struct ether_hdr *eth; - uint16_t packet_type = ETHER_TYPE_IPv6; + struct rte_ether_hdr *eth; + uint16_t packet_type = RTE_ETHER_TYPE_IPV6; int err; if (unlikely(conn->in_dev == NULL)) return EDPVS_NOROUTE; - if (unlikely(is_zero_ether_addr(&conn->in_dmac) || - is_zero_ether_addr(&conn->in_smac))) + if (unlikely(rte_is_zero_ether_addr(&conn->in_dmac) || + rte_is_zero_ether_addr(&conn->in_smac))) return EDPVS_NOTSUPP; /* pre-handler before translation */ @@ -131,10 +131,10 @@ static int __dp_vs_fast_xmit_fnat6(struct dp_vs_proto *proto, return err; } - eth = (struct ether_hdr *)rte_pktmbuf_prepend(mbuf, - (uint16_t)sizeof(struct ether_hdr)); - ether_addr_copy(&conn->in_dmac, ð->d_addr); - ether_addr_copy(&conn->in_smac, ð->s_addr); + eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, + (uint16_t)sizeof(struct rte_ether_hdr)); + rte_ether_addr_copy(&conn->in_dmac, ð->d_addr); + rte_ether_addr_copy(&conn->in_smac, ð->s_addr); eth->ether_type = rte_cpu_to_be_16(packet_type); mbuf->packet_type = packet_type; @@ -159,16 +159,16 @@ static int __dp_vs_fast_outxmit_fnat4(struct dp_vs_proto *proto, struct dp_vs_conn *conn, struct rte_mbuf *mbuf) { - struct ipv4_hdr *ip4h = ip4_hdr(mbuf); - struct ether_hdr *eth; - uint16_t packet_type = ETHER_TYPE_IPv4; + struct rte_ipv4_hdr *ip4h = ip4_hdr(mbuf); + struct rte_ether_hdr *eth; + uint16_t packet_type = RTE_ETHER_TYPE_IPV4; int err; if (unlikely(conn->out_dev == NULL)) return EDPVS_NOROUTE; - if (unlikely(is_zero_ether_addr(&conn->out_dmac) || - is_zero_ether_addr(&conn->out_smac))) + if (unlikely(rte_is_zero_ether_addr(&conn->out_dmac) || + rte_is_zero_ether_addr(&conn->out_smac))) return EDPVS_NOTSUPP; /* pre-handler before translation */ @@ -200,10 +200,10 @@ static int __dp_vs_fast_outxmit_fnat4(struct dp_vs_proto *proto, ip4_send_csum(ip4h); } - eth = (struct ether_hdr *)rte_pktmbuf_prepend(mbuf, - (uint16_t)sizeof(struct ether_hdr)); - ether_addr_copy(&conn->out_dmac, ð->d_addr); - ether_addr_copy(&conn->out_smac, ð->s_addr); + eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, + (uint16_t)sizeof(struct rte_ether_hdr)); + rte_ether_addr_copy(&conn->out_dmac, ð->d_addr); + rte_ether_addr_copy(&conn->out_smac, ð->s_addr); eth->ether_type = rte_cpu_to_be_16(packet_type); mbuf->packet_type = packet_type; @@ -220,15 +220,15 @@ static int __dp_vs_fast_outxmit_fnat6(struct dp_vs_proto *proto, struct rte_mbuf *mbuf) { struct ip6_hdr *ip6h = ip6_hdr(mbuf); - struct ether_hdr *eth; - uint16_t packet_type = ETHER_TYPE_IPv6; + struct rte_ether_hdr *eth; + uint16_t packet_type = RTE_ETHER_TYPE_IPV6; int err; if (unlikely(conn->out_dev == NULL)) return EDPVS_NOROUTE; - if (unlikely(is_zero_ether_addr(&conn->out_dmac) || - is_zero_ether_addr(&conn->out_smac))) + if (unlikely(rte_is_zero_ether_addr(&conn->out_dmac) || + rte_is_zero_ether_addr(&conn->out_smac))) return EDPVS_NOTSUPP; /* pre-handler before translation */ @@ -253,10 +253,10 @@ static int __dp_vs_fast_outxmit_fnat6(struct dp_vs_proto *proto, return err; } - eth = (struct ether_hdr *)rte_pktmbuf_prepend(mbuf, - (uint16_t)sizeof(struct ether_hdr)); - ether_addr_copy(&conn->out_dmac, ð->d_addr); - ether_addr_copy(&conn->out_smac, ð->s_addr); + eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, + (uint16_t)sizeof(struct rte_ether_hdr)); + rte_ether_addr_copy(&conn->out_dmac, ð->d_addr); + rte_ether_addr_copy(&conn->out_smac, ð->s_addr); eth->ether_type = rte_cpu_to_be_16(packet_type); mbuf->packet_type = packet_type; @@ -285,26 +285,26 @@ static void dp_vs_save_xmit_info(struct rte_mbuf *mbuf, struct dp_vs_proto *proto, struct dp_vs_conn *conn) { - struct ether_hdr *eth = NULL; + struct rte_ether_hdr *eth = NULL; struct netif_port *port = NULL; - if (!is_zero_ether_addr(&conn->out_dmac) && - !is_zero_ether_addr(&conn->out_smac)) + if (!rte_is_zero_ether_addr(&conn->out_dmac) && + !rte_is_zero_ether_addr(&conn->out_smac)) return; - if (unlikely(mbuf->l2_len != sizeof(struct ether_hdr))) + if (unlikely(mbuf->l2_len != sizeof(struct rte_ether_hdr))) return; port = netif_port_get(mbuf->port); if (port) conn->out_dev = port; - eth = (struct ether_hdr *)rte_pktmbuf_prepend(mbuf, mbuf->l2_len); + eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, mbuf->l2_len); - ether_addr_copy(ð->s_addr, &conn->out_dmac); - ether_addr_copy(ð->d_addr, &conn->out_smac); + rte_ether_addr_copy(ð->s_addr, &conn->out_dmac); + rte_ether_addr_copy(ð->d_addr, &conn->out_smac); - rte_pktmbuf_adj(mbuf, sizeof(struct ether_hdr)); + rte_pktmbuf_adj(mbuf, sizeof(struct rte_ether_hdr)); } /* @@ -314,26 +314,26 @@ static void dp_vs_save_outxmit_info(struct rte_mbuf *mbuf, struct dp_vs_proto *proto, struct dp_vs_conn *conn) { - struct ether_hdr *eth = NULL; + struct rte_ether_hdr *eth = NULL; struct netif_port *port = NULL; - if (!is_zero_ether_addr(&conn->in_dmac) && - !is_zero_ether_addr(&conn->in_smac)) + if (!rte_is_zero_ether_addr(&conn->in_dmac) && + !rte_is_zero_ether_addr(&conn->in_smac)) return; - if (mbuf->l2_len != sizeof(struct ether_hdr)) + if (mbuf->l2_len != sizeof(struct rte_ether_hdr)) return; port = netif_port_get(mbuf->port); if (port) conn->in_dev = port; - eth = (struct ether_hdr *)rte_pktmbuf_prepend(mbuf, mbuf->l2_len); + eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, mbuf->l2_len); - ether_addr_copy(ð->s_addr, &conn->in_dmac); - ether_addr_copy(ð->d_addr, &conn->in_smac); + rte_ether_addr_copy(ð->s_addr, &conn->in_dmac); + rte_ether_addr_copy(ð->d_addr, &conn->in_smac); - rte_pktmbuf_adj(mbuf, sizeof(struct ether_hdr)); + rte_pktmbuf_adj(mbuf, sizeof(struct rte_ether_hdr)); } /* @@ -393,7 +393,7 @@ static int __dp_vs_xmit_fnat4(struct dp_vs_proto *proto, struct rte_mbuf *mbuf) { struct flow4 fl4; - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); struct route_entry *rt; int err, mtu; @@ -408,10 +408,10 @@ static int __dp_vs_xmit_fnat4(struct dp_vs_proto *proto, * drop old route. just for safe, because * FNAT is PRE_ROUTING, should not have route. */ - if (unlikely(mbuf->userdata != NULL)) { - RTE_LOG(WARNING, IPVS, "%s: FNAT have route %p ?\n", - __func__, mbuf->userdata); - route4_put((struct route_entry *)mbuf->userdata); + if (unlikely(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) != NULL)) { + RTE_LOG(WARNING, IPVS, "%s: FNAT have route %p ?\n", __func__, + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); + route4_put(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); } memset(&fl4, 0, sizeof(struct flow4)); @@ -433,14 +433,14 @@ static int __dp_vs_xmit_fnat4(struct dp_vs_proto *proto, mtu = rt->mtu; if (mbuf->pkt_len > mtu - && (iph->fragment_offset & htons(IPV4_HDR_DF_FLAG))) { + && (iph->fragment_offset & htons(RTE_IPV4_HDR_DF_FLAG))) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, htonl(mtu)); err = EDPVS_FRAG; goto errout; } - mbuf->userdata = rt; + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; /* after route lookup and before translation */ if (xmit_ttl) { @@ -514,10 +514,10 @@ static int __dp_vs_xmit_fnat6(struct dp_vs_proto *proto, * drop old route. just for safe, because * FNAT is PRE_ROUTING, should not have route. */ - if (unlikely(mbuf->userdata != NULL)) { + if (unlikely(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) != NULL)) { RTE_LOG(WARNING, IPVS, "%s: FNAT have route %p ?\n", - __func__, mbuf->userdata); - route6_put((struct route6 *)mbuf->userdata); + __func__, MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); + route6_put(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); } memset(&fl6, 0, sizeof(struct flow6)); @@ -546,7 +546,7 @@ static int __dp_vs_xmit_fnat6(struct dp_vs_proto *proto, goto errout; } - mbuf->userdata = rt6; + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt6; /* after route lookup and before translation */ if (xmit_ttl) { @@ -599,7 +599,7 @@ static int __dp_vs_xmit_fnat64(struct dp_vs_proto *proto, { struct flow4 fl4; struct ip6_hdr *ip6h = ip6_hdr(mbuf); - struct ipv4_hdr *ip4h; + struct rte_ipv4_hdr *ip4h; uint32_t pkt_len; struct route_entry *rt; int err, mtu; @@ -608,10 +608,10 @@ static int __dp_vs_xmit_fnat64(struct dp_vs_proto *proto, * drop old route. just for safe, because * FNAT is PRE_ROUTING, should not have route. */ - if (unlikely(mbuf->userdata != NULL)) { + if (unlikely(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) != NULL)) { RTE_LOG(WARNING, IPVS, "%s: FNAT have route %p ?\n", - __func__, mbuf->userdata); - route6_put((struct route6 *)mbuf->userdata); + __func__, MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); + route6_put(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); } memset(&fl4, 0, sizeof(struct flow4)); @@ -644,7 +644,7 @@ static int __dp_vs_xmit_fnat64(struct dp_vs_proto *proto, goto errout; } - mbuf->userdata = rt; + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; /* after route lookup and before translation */ if (xmit_ttl) { if (unlikely(ip6h->ip6_hops <= 1)) { @@ -718,7 +718,7 @@ static int __dp_vs_out_xmit_fnat4(struct dp_vs_proto *proto, struct rte_mbuf *mbuf) { struct flow4 fl4; - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); struct route_entry *rt; int err, mtu; @@ -733,8 +733,8 @@ static int __dp_vs_out_xmit_fnat4(struct dp_vs_proto *proto, * drop old route. just for safe, because * FNAT is PRE_ROUTING, should not have route. */ - if (unlikely(mbuf->userdata != NULL)) - route4_put((struct route_entry *)mbuf->userdata); + if (MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) != NULL) + route4_put(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); memset(&fl4, 0, sizeof(struct flow4)); fl4.fl4_daddr = conn->caddr.in; @@ -755,14 +755,14 @@ static int __dp_vs_out_xmit_fnat4(struct dp_vs_proto *proto, mtu = rt->mtu; if (mbuf->pkt_len > mtu - && (iph->fragment_offset & htons(IPV4_HDR_DF_FLAG))) { + && (iph->fragment_offset & htons(RTE_IPV4_HDR_DF_FLAG))) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, htonl(mtu)); err = EDPVS_FRAG; goto errout; } - mbuf->userdata = rt; + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; /* after route lookup and before translation */ if (xmit_ttl) { @@ -836,8 +836,8 @@ static int __dp_vs_out_xmit_fnat6(struct dp_vs_proto *proto, * drop old route. just for safe, because * FNAT is PRE_ROUTING, should not have route. */ - if (unlikely(mbuf->userdata != NULL)) - route6_put((struct route6 *)mbuf->userdata); + if (unlikely(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) != NULL)) + route6_put(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); memset(&fl6, 0, sizeof(struct flow6)); fl6.fl6_daddr = conn->caddr.in6; @@ -863,7 +863,7 @@ static int __dp_vs_out_xmit_fnat6(struct dp_vs_proto *proto, goto errout; } - mbuf->userdata = rt6; + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt6; /* after route lookup and before translation */ if (xmit_ttl) { @@ -915,7 +915,7 @@ static int __dp_vs_out_xmit_fnat46(struct dp_vs_proto *proto, struct rte_mbuf *mbuf) { struct flow6 fl6; - struct ipv4_hdr *ip4h = ip4_hdr(mbuf); + struct rte_ipv4_hdr *ip4h = ip4_hdr(mbuf); uint32_t pkt_len; struct route6 *rt6; int err, mtu; @@ -924,10 +924,10 @@ static int __dp_vs_out_xmit_fnat46(struct dp_vs_proto *proto, * drop old route. just for safe, because * FNAT is PRE_ROUTING, should not have route. */ - if (unlikely(mbuf->userdata != NULL)) { - RTE_LOG(WARNING, IPVS, "%s: FNAT have route %p ?\n", - __func__, mbuf->userdata); - route4_put((struct route_entry *)mbuf->userdata); + if (unlikely(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) != NULL)) { + RTE_LOG(WARNING, IPVS, "%s: FNAT have route %p ?\n", __func__, + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); + route4_put(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); } memset(&fl6, 0, sizeof(struct flow6)); @@ -953,14 +953,14 @@ static int __dp_vs_out_xmit_fnat46(struct dp_vs_proto *proto, mtu = rt6->rt6_mtu; pkt_len = mbuf_nat4to6_len(mbuf); if (pkt_len > mtu - && (ip4h->fragment_offset & htons(IPV4_HDR_DF_FLAG))) { + && (ip4h->fragment_offset & htons(RTE_IPV4_HDR_DF_FLAG))) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, htonl(mtu)); err = EDPVS_FRAG; goto errout; } - mbuf->userdata = rt6; + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt6; /* after route lookup and before translation */ if (xmit_ttl) { if (unlikely(ip4h->time_to_live <= 1)) { @@ -1026,10 +1026,10 @@ static void __dp_vs_xmit_icmp4(struct rte_mbuf *mbuf, struct dp_vs_proto *prot, struct dp_vs_conn *conn, int dir) { - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); struct icmphdr *icmph = (struct icmphdr *) ((unsigned char *)ip4_hdr(mbuf) + ip4_hdrlen(mbuf)); - struct ipv4_hdr *ciph = (struct ipv4_hdr *)(icmph + 1); + struct rte_ipv4_hdr *ciph = (struct rte_ipv4_hdr *)(icmph + 1); int fullnat = (conn->dest->fwdmode == DPVS_FWD_MODE_FNAT); uint16_t csum; @@ -1067,7 +1067,7 @@ static void __dp_vs_xmit_icmp4(struct rte_mbuf *mbuf, if (ciph->next_proto_id == IPPROTO_TCP || ciph->next_proto_id == IPPROTO_UDP) { uint16_t *ports = (void *)ciph + \ - ((ciph->version_ihl & IPV4_HDR_IHL_MASK)<<2); + ((ciph->version_ihl & RTE_IPV4_HDR_IHL_MASK)<<2); if (fullnat) { if (dir == DPVS_CONN_DIR_INBOUND) { @@ -1200,7 +1200,7 @@ static void __dp_vs_xmit_icmp6(struct rte_mbuf *mbuf, icmp6h->icmp6_cksum = 0; l4_len = ntohs(ip6h->ip6_plen); csum = rte_raw_cksum(icmp6h, l4_len); - csum += rte_ipv6_phdr_cksum((struct ipv6_hdr *)ip6h, 0); + csum += rte_ipv6_phdr_cksum((struct rte_ipv6_hdr *)ip6h, 0); csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); csum = (~csum) & 0xffff; @@ -1230,14 +1230,14 @@ static int __dp_vs_xmit_dr4(struct dp_vs_proto *proto, struct rte_mbuf *mbuf) { struct flow4 fl4; - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); struct route_entry *rt; int err, mtu; - if (unlikely(mbuf->userdata != NULL)) { - RTE_LOG(WARNING, IPVS, "%s: Already have route %p ?\n", - __func__, mbuf->userdata); - route4_put((struct route_entry *)mbuf->userdata); + if (unlikely(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) != NULL)) { + RTE_LOG(WARNING, IPVS, "%s: Already have route %p ?\n", __func__, + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); + route4_put(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); } memset(&fl4, 0, sizeof(struct flow4)); @@ -1255,14 +1255,14 @@ static int __dp_vs_xmit_dr4(struct dp_vs_proto *proto, mtu = rt->mtu; if (mbuf->pkt_len > mtu - && (iph->fragment_offset & htons(IPV4_HDR_DF_FLAG))) { + && (iph->fragment_offset & htons(RTE_IPV4_HDR_DF_FLAG))) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, htonl(mtu)); err = EDPVS_FRAG; goto errout; } - mbuf->packet_type = ETHER_TYPE_IPv4; + mbuf->packet_type = RTE_ETHER_TYPE_IPV4; err = neigh_output(AF_INET, (union inet_addr *)&conn->daddr.in, mbuf, rt->port); route4_put(rt); return err; @@ -1283,10 +1283,10 @@ static int __dp_vs_xmit_dr6(struct dp_vs_proto *proto, struct route6 *rt6; int err, mtu; - if (unlikely(mbuf->userdata != NULL)) { - RTE_LOG(WARNING, IPVS, "%s: Already have route %p ?\n", - __func__, mbuf->userdata); - route6_put((struct route6 *)mbuf->userdata); + if (unlikely(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) != NULL)) { + RTE_LOG(WARNING, IPVS, "%s: Already have route %p ?\n", __func__, + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); + route6_put(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); } memset(&fl6, 0, sizeof(struct flow6)); @@ -1309,7 +1309,7 @@ static int __dp_vs_xmit_dr6(struct dp_vs_proto *proto, goto errout; } - mbuf->packet_type = ETHER_TYPE_IPv6; + mbuf->packet_type = RTE_ETHER_TYPE_IPV6; err = neigh_output(AF_INET6, (union inet_addr *)&conn->daddr.in6, mbuf, rt6->rt6_dev); route6_put(rt6); return err; @@ -1338,7 +1338,7 @@ static int __dp_vs_xmit_snat4(struct dp_vs_proto *proto, struct rte_mbuf *mbuf) { struct flow4 fl4; - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); struct route_entry *rt; int err, mtu; @@ -1347,10 +1347,10 @@ static int __dp_vs_xmit_snat4(struct dp_vs_proto *proto, * inbound SNAT traffic is hooked at PRE_ROUTING, * should not have route. */ - if (unlikely(mbuf->userdata != NULL)) { - RTE_LOG(WARNING, IPVS, "%s: SNAT have route %p ?\n", - __func__, mbuf->userdata); - route4_put((struct route_entry *)mbuf->userdata); + if (unlikely(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) != NULL)) { + RTE_LOG(WARNING, IPVS, "%s: SNAT have route %p ?\n", __func__, + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); + route4_put(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); } /* @@ -1371,14 +1371,14 @@ static int __dp_vs_xmit_snat4(struct dp_vs_proto *proto, mtu = rt->mtu; if (mbuf->pkt_len > mtu - && (iph->fragment_offset & htons(IPV4_HDR_DF_FLAG))) { + && (iph->fragment_offset & htons(RTE_IPV4_HDR_DF_FLAG))) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, htonl(mtu)); err = EDPVS_FRAG; goto errout; } - mbuf->userdata = rt; + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; /* after route lookup and before translation */ if (xmit_ttl) { @@ -1432,10 +1432,10 @@ static int __dp_vs_xmit_snat6(struct dp_vs_proto *proto, * inbound SNAT traffic is hooked at PRE_ROUTING, * should not have route. */ - if (unlikely(mbuf->userdata != NULL)) { - RTE_LOG(WARNING, IPVS, "%s: SNAT have route %p ?\n", - __func__, mbuf->userdata); - route6_put((struct route6 *)mbuf->userdata); + if (unlikely(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) != NULL)) { + RTE_LOG(WARNING, IPVS, "%s: SNAT have route %p ?\n", __func__, + MBUF_USERDATA(mbuf, struct route6*, MBUF_FIELD_ROUTE)); + route6_put(MBUF_USERDATA(mbuf, struct route6*, MBUF_FIELD_ROUTE)); } /* @@ -1461,7 +1461,7 @@ static int __dp_vs_xmit_snat6(struct dp_vs_proto *proto, goto errout; } - mbuf->userdata = rt6; + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt6; /* after route lookup and before translation */ if (xmit_ttl) { @@ -1512,8 +1512,8 @@ static int __dp_vs_out_xmit_snat4(struct dp_vs_proto *proto, { int err; struct flow4 fl4; - struct route_entry *rt = mbuf->userdata; - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct route_entry *rt = MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); if (!rt) { memset(&fl4, 0, sizeof(struct flow4)); @@ -1535,13 +1535,13 @@ static int __dp_vs_out_xmit_snat4(struct dp_vs_proto *proto, goto errout; } } - mbuf->userdata = rt; + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; dp_vs_conn_cache_rt(conn, rt, false); } if (mbuf->pkt_len > rt->mtu && - (iph->fragment_offset & htons(IPV4_HDR_DF_FLAG))) { + (iph->fragment_offset & htons(RTE_IPV4_HDR_DF_FLAG))) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, htonl(rt->mtu)); @@ -1591,15 +1591,15 @@ static int dp_vs_fast_xmit_nat(struct dp_vs_proto *proto, struct dp_vs_conn *conn, struct rte_mbuf *mbuf) { - struct ipv4_hdr *iph = ip4_hdr(mbuf); - struct ether_hdr *eth; + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ether_hdr *eth; int err; if (unlikely(conn->in_dev == NULL)) return EDPVS_NOROUTE; - if (unlikely(is_zero_ether_addr(&conn->in_dmac) || - is_zero_ether_addr(&conn->in_smac))) + if (unlikely(rte_is_zero_ether_addr(&conn->in_dmac) || + rte_is_zero_ether_addr(&conn->in_smac))) return EDPVS_NOTSUPP; iph->hdr_checksum = 0; @@ -1617,12 +1617,12 @@ static int dp_vs_fast_xmit_nat(struct dp_vs_proto *proto, ip4_send_csum(iph); } - eth = (struct ether_hdr *)rte_pktmbuf_prepend(mbuf, - (uint16_t)sizeof(struct ether_hdr)); - ether_addr_copy(&conn->in_dmac, ð->d_addr); - ether_addr_copy(&conn->in_smac, ð->s_addr); - eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); - mbuf->packet_type = ETHER_TYPE_IPv4; + eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, + (uint16_t)sizeof(struct rte_ether_hdr)); + rte_ether_addr_copy(&conn->in_dmac, ð->d_addr); + rte_ether_addr_copy(&conn->in_smac, ð->s_addr); + eth->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); + mbuf->packet_type = RTE_ETHER_TYPE_IPV4; err = netif_xmit(mbuf, conn->in_dev); if (err != EDPVS_OK) @@ -1636,15 +1636,15 @@ static int dp_vs_fast_outxmit_nat(struct dp_vs_proto *proto, struct dp_vs_conn *conn, struct rte_mbuf *mbuf) { - struct ipv4_hdr *iph = ip4_hdr(mbuf); - struct ether_hdr *eth; + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ether_hdr *eth; int err; if (unlikely(conn->out_dev == NULL)) return EDPVS_NOROUTE; - if (unlikely(is_zero_ether_addr(&conn->out_dmac) || - is_zero_ether_addr(&conn->out_smac))) + if (unlikely(rte_is_zero_ether_addr(&conn->out_dmac) || + rte_is_zero_ether_addr(&conn->out_smac))) return EDPVS_NOTSUPP; iph->hdr_checksum = 0; @@ -1662,12 +1662,12 @@ static int dp_vs_fast_outxmit_nat(struct dp_vs_proto *proto, ip4_send_csum(iph); } - eth = (struct ether_hdr *)rte_pktmbuf_prepend(mbuf, - (uint16_t)sizeof(struct ether_hdr)); - ether_addr_copy(&conn->out_dmac, ð->d_addr); - ether_addr_copy(&conn->out_smac, ð->s_addr); - eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); - mbuf->packet_type = ETHER_TYPE_IPv4; + eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, + (uint16_t)sizeof(struct rte_ether_hdr)); + rte_ether_addr_copy(&conn->out_dmac, ð->d_addr); + rte_ether_addr_copy(&conn->out_smac, ð->s_addr); + eth->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); + mbuf->packet_type = RTE_ETHER_TYPE_IPV4; err = netif_xmit(mbuf, conn->out_dev); if (err != EDPVS_OK) @@ -1683,7 +1683,7 @@ static int __dp_vs_out_xmit_snat6(struct dp_vs_proto *proto, { int err; struct flow6 fl6; - struct route6 *rt6 = mbuf->userdata; + struct route6 *rt6 = MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE); struct ip6_hdr *ip6h = ip6_hdr(mbuf); if (!rt6) { @@ -1696,7 +1696,7 @@ static int __dp_vs_out_xmit_snat6(struct dp_vs_proto *proto, goto errout; } - mbuf->userdata = rt6; + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt6; dp_vs_conn_cache_rt6(conn, rt6, false); } @@ -1756,7 +1756,7 @@ static int __dp_vs_xmit_nat4(struct dp_vs_proto *proto, struct rte_mbuf *mbuf) { struct flow4 fl4; - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); struct route_entry *rt; int err, mtu; @@ -1771,10 +1771,10 @@ static int __dp_vs_xmit_nat4(struct dp_vs_proto *proto, * drop old route. just for safe, because * NAT is PREROUTING, should not have route. */ - if (unlikely(mbuf->userdata != NULL)) { - RTE_LOG(WARNING, IPVS, "%s: NAT have route %p ?\n", - __func__, mbuf->userdata); - route4_put((struct route_entry*)mbuf->userdata); + if (unlikely(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) != NULL)) { + RTE_LOG(WARNING, IPVS, "%s: NAT have route %p ?\n", __func__, + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); + route4_put(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); } memset(&fl4, 0, sizeof(struct flow4)); @@ -1791,7 +1791,7 @@ static int __dp_vs_xmit_nat4(struct dp_vs_proto *proto, mtu = rt->mtu; if (mbuf->pkt_len > mtu - && (iph->fragment_offset & htons(IPV4_HDR_DF_FLAG))) { + && (iph->fragment_offset & htons(RTE_IPV4_HDR_DF_FLAG))) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, htonl(rt->mtu)); @@ -1799,7 +1799,7 @@ static int __dp_vs_xmit_nat4(struct dp_vs_proto *proto, goto errout; } - mbuf->userdata = rt; + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; /* after route lookup and before translation */ if (xmit_ttl) { @@ -1852,10 +1852,10 @@ static int __dp_vs_xmit_nat6(struct dp_vs_proto *proto, * drop old route. just for safe, because * NAT is PREROUTING, should not have route. */ - if (unlikely(mbuf->userdata != NULL)) { - RTE_LOG(WARNING, IPVS, "%s: NAT have route %p ?\n", - __func__, mbuf->userdata); - route6_put((struct route6*)mbuf->userdata); + if (unlikely(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) != NULL)) { + RTE_LOG(WARNING, IPVS, "%s: NAT have route %p ?\n", __func__, + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); + route6_put(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); } memset(&fl6, 0, sizeof(struct flow6)); @@ -1877,7 +1877,7 @@ static int __dp_vs_xmit_nat6(struct dp_vs_proto *proto, goto errout; } - mbuf->userdata = rt6; + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt6; /* after route lookup and before translation */ if (xmit_ttl) { @@ -1927,7 +1927,7 @@ static int __dp_vs_out_xmit_nat4(struct dp_vs_proto *proto, struct rte_mbuf *mbuf) { struct flow4 fl4; - struct ipv4_hdr *iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *iph = ip4_hdr(mbuf); struct route_entry *rt; int err, mtu; @@ -1942,10 +1942,10 @@ static int __dp_vs_out_xmit_nat4(struct dp_vs_proto *proto, * drop old route. just for safe, because * NAT is PREROUTING, should not have route. */ - if (unlikely(mbuf->userdata != NULL)) { - RTE_LOG(WARNING, IPVS, "%s: NAT have route %p ?\n", - __func__, mbuf->userdata); - route4_put((struct route_entry*)mbuf->userdata); + if (unlikely(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) != NULL)) { + RTE_LOG(WARNING, IPVS, "%s: NAT have route %p ?\n", __func__, + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); + route4_put(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); } memset(&fl4, 0, sizeof(struct flow4)); @@ -1962,7 +1962,7 @@ static int __dp_vs_out_xmit_nat4(struct dp_vs_proto *proto, mtu = rt->mtu; if (mbuf->pkt_len > mtu - && (iph->fragment_offset & htons(IPV4_HDR_DF_FLAG))) { + && (iph->fragment_offset & htons(RTE_IPV4_HDR_DF_FLAG))) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__); icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, htonl(rt->mtu)); @@ -1971,7 +1971,7 @@ static int __dp_vs_out_xmit_nat4(struct dp_vs_proto *proto, goto errout; } - mbuf->userdata = rt; + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; /* after route lookup and before translation */ if (xmit_ttl) { @@ -2024,10 +2024,10 @@ static int __dp_vs_out_xmit_nat6(struct dp_vs_proto *proto, * drop old route. just for safe, because * NAT is PREROUTING, should not have route. */ - if (unlikely(mbuf->userdata != NULL)) { - RTE_LOG(WARNING, IPVS, "%s: NAT have route %p ?\n", - __func__, mbuf->userdata); - route6_put((struct route6*)mbuf->userdata); + if (unlikely(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) != NULL)) { + RTE_LOG(WARNING, IPVS, "%s: NAT have route %p ?\n", __func__, + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); + route6_put(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); } memset(&fl6, 0, sizeof(struct flow6)); @@ -2049,7 +2049,7 @@ static int __dp_vs_out_xmit_nat6(struct dp_vs_proto *proto, goto errout; } - mbuf->userdata = rt6; + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt6; /* after route lookup and before translation */ if (xmit_ttl) { @@ -2103,20 +2103,20 @@ static int __dp_vs_xmit_tunnel4(struct dp_vs_proto *proto, struct rte_mbuf *mbuf) { struct flow4 fl4; - struct ipv4_hdr *new_iph, *old_iph = ip4_hdr(mbuf); + struct rte_ipv4_hdr *new_iph, *old_iph = ip4_hdr(mbuf); struct route_entry *rt; uint8_t tos = old_iph->type_of_service; - uint16_t df = old_iph->fragment_offset & htons(IPV4_HDR_DF_FLAG); + uint16_t df = old_iph->fragment_offset & htons(RTE_IPV4_HDR_DF_FLAG); int err, mtu; /* * drop old route. just for safe, because * TUNNEL is PREROUTING, should not have route. */ - if (unlikely(mbuf->userdata != NULL)) { - RTE_LOG(WARNING, IPVS, "%s: TUNNEL have route %p ?\n", - __func__, mbuf->userdata); - route4_put((struct route_entry*)mbuf->userdata); + if (unlikely(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) != NULL)) { + RTE_LOG(WARNING, IPVS, "%s: TUNNEL have route %p ?\n", __func__, + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); + route4_put(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE)); } memset(&fl4, 0, sizeof(struct flow4)); @@ -2131,9 +2131,9 @@ static int __dp_vs_xmit_tunnel4(struct dp_vs_proto *proto, dp_vs_conn_cache_rt(conn, rt, true); mtu = rt->mtu; - mbuf->userdata = rt; + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; - new_iph = (struct ipv4_hdr*)rte_pktmbuf_prepend(mbuf, sizeof(struct ipv4_hdr)); + new_iph = (struct rte_ipv4_hdr*)rte_pktmbuf_prepend(mbuf, sizeof(struct rte_ipv4_hdr)); if (!new_iph) { RTE_LOG(WARNING, IPVS, "%s: mbuf has not enough headroom" " space for ipvs tunnel\n", __func__); @@ -2149,7 +2149,7 @@ static int __dp_vs_xmit_tunnel4(struct dp_vs_proto *proto, goto errout; } - memset(new_iph, 0, sizeof(struct ipv4_hdr)); + memset(new_iph, 0, sizeof(struct rte_ipv4_hdr)); new_iph->version_ihl = 0x45; new_iph->type_of_service = tos; new_iph->total_length = htons(mbuf->pkt_len); @@ -2194,10 +2194,10 @@ static int __dp_vs_xmit_tunnel6(struct dp_vs_proto *proto, * drop old route. just for safe, because * TUNNEL is PREROUTING, should not have route. */ - if (unlikely(mbuf->userdata != NULL)) { - RTE_LOG(WARNING, IPVS, "%s: TUNNEL have route %p ?\n", - __func__, mbuf->userdata); - route6_put((struct route6*)mbuf->userdata); + if (MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) != NULL) { + RTE_LOG(WARNING, IPVS, "%s: TUNNEL have route %p ?\n", __func__, + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); + route6_put(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); } memset(&fl6, 0, sizeof(struct flow6)); @@ -2211,7 +2211,7 @@ static int __dp_vs_xmit_tunnel6(struct dp_vs_proto *proto, dp_vs_conn_cache_rt6(conn, rt6, true); mtu = rt6->rt6_mtu; - mbuf->userdata = rt6; + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt6; new_ip6h = (struct ip6_hdr*)rte_pktmbuf_prepend(mbuf, sizeof(struct ip6_hdr)); if (!new_ip6h) { @@ -2262,17 +2262,17 @@ static int __dp_vs_xmit_tunnel_6o4(struct dp_vs_proto *proto, int err, mtu; struct flow4 fl4; struct route_entry *rt; - struct ipv4_hdr *new_iph; + struct rte_ipv4_hdr *new_iph; struct ip6_hdr *old_ip6h = ip6_hdr(mbuf); /* * drop old route. just for safe, because * TUNNEL is PREROUTING, should not have route. */ - if (unlikely(mbuf->userdata != NULL)) { - RTE_LOG(WARNING, IPVS, "%s: TUNNEL have route %p ?\n", - __func__, mbuf->userdata); - route6_put((struct route6*)mbuf->userdata); + if (unlikely(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) != NULL)) { + RTE_LOG(WARNING, IPVS, "%s: TUNNEL have route %p ?\n", __func__, + MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); + route6_put(MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE)); } memset(&fl4, 0, sizeof(struct flow4)); @@ -2287,9 +2287,9 @@ static int __dp_vs_xmit_tunnel_6o4(struct dp_vs_proto *proto, dp_vs_conn_cache_rt(conn, rt, true); mtu = rt->mtu; - mbuf->userdata = rt; + MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) = rt; - new_iph = (struct ipv4_hdr*)rte_pktmbuf_prepend(mbuf, sizeof(struct ipv4_hdr)); + new_iph = (struct rte_ipv4_hdr*)rte_pktmbuf_prepend(mbuf, sizeof(struct rte_ipv4_hdr)); if (!new_iph) { RTE_LOG(WARNING, IPVS, "%s: mbuf has not enough headroom" " space for ipvs tunnel\n", __func__); @@ -2304,11 +2304,11 @@ static int __dp_vs_xmit_tunnel_6o4(struct dp_vs_proto *proto, goto errout; } - memset(new_iph, 0, sizeof(struct ipv4_hdr)); + memset(new_iph, 0, sizeof(struct rte_ipv4_hdr)); new_iph->version_ihl = 0x45; new_iph->type_of_service = 0; new_iph->total_length = htons(mbuf->pkt_len); - new_iph->fragment_offset = htons(IPV4_HDR_DF_FLAG); + new_iph->fragment_offset = htons(RTE_IPV4_HDR_DF_FLAG); new_iph->time_to_live = old_ip6h->ip6_hlim; new_iph->next_proto_id = IPPROTO_IPV6; new_iph->src_addr = rt->src.s_addr; diff --git a/src/kni.c b/src/kni.c index 15b618904..cca74ac81 100644 --- a/src/kni.c +++ b/src/kni.c @@ -83,17 +83,17 @@ static void kni_fill_conf(const struct netif_port *dev, const char *ifname, } static int kni_mc_list_cmp_set(struct netif_port *dev, - struct ether_addr *addrs, size_t naddr) + struct rte_ether_addr *addrs, size_t naddr) { int err = EDPVS_INVAL, i, j; - struct ether_addr addrs_old[NETIF_MAX_HWADDR]; + struct rte_ether_addr addrs_old[NETIF_MAX_HWADDR]; size_t naddr_old; char mac[64]; struct mc_change_list { - size_t naddr; - struct ether_addr addrs[NETIF_MAX_HWADDR*2]; + size_t naddr; + struct rte_ether_addr addrs[NETIF_MAX_HWADDR*2]; /* state: 0 - unchanged, 1 - added, 2 deleted. */ - int states[NETIF_MAX_HWADDR*2]; + int states[NETIF_MAX_HWADDR*2]; } chg_lst = {0}; rte_rwlock_write_lock(&dev->dev_lock); @@ -116,7 +116,7 @@ static int kni_mc_list_cmp_set(struct netif_port *dev, /* add all addrs from netlink(linux) to change-list and * assume they're all new added by default. */ for (i = 0; i < naddr; i++) { - ether_addr_copy(&addrs[i], &chg_lst.addrs[i]); + rte_ether_addr_copy(&addrs[i], &chg_lst.addrs[i]); chg_lst.states[i] = 1; RTE_LOG(DEBUG, Kni, " new [%02d] %s\n", i, @@ -140,7 +140,7 @@ static int kni_mc_list_cmp_set(struct netif_port *dev, /* deleted */ assert(chg_lst.naddr < NETIF_MAX_HWADDR * 2); - ether_addr_copy(&addrs_old[i], &chg_lst.addrs[chg_lst.naddr]); + rte_ether_addr_copy(&addrs_old[i], &chg_lst.addrs[chg_lst.naddr]); chg_lst.states[chg_lst.naddr] = 2; chg_lst.naddr++; } @@ -188,7 +188,7 @@ static int kni_update_maddr(struct netif_port *dev) char line[1024]; int ifindex, users, st; /* @st for static */ char ifname[IFNAMSIZ], hexa[256]; /* hex address */ - struct ether_addr ma_list[NETIF_MAX_HWADDR]; + struct rte_ether_addr ma_list[NETIF_MAX_HWADDR]; int n_ma; fp = fopen("/proc/net/dev_mcast", "r"); @@ -370,7 +370,7 @@ int kni_add_dev(struct netif_port *dev, const char *kniname) err = linux_set_if_mac(conf.name, (unsigned char *)&dev->addr); if (err != EDPVS_OK) { char mac[18]; - ether_format_addr(mac, sizeof(mac), &dev->addr); + rte_ether_format_addr(mac, sizeof(mac), &dev->addr); RTE_LOG(WARNING, Kni, "%s: fail to set mac %s for %s: %s\n", __func__, mac, conf.name, strerror(errno)); } diff --git a/src/log.c b/src/log.c index 6e45964e7..e0b9dd2d7 100644 --- a/src/log.c +++ b/src/log.c @@ -34,7 +34,6 @@ lcoreid_t g_dpvs_log_core = 0; log_stats_t log_stats_info[DPVS_MAX_LCORE]; struct rte_ring *log_ring; bool g_dpvs_log_async_mode = 0; -extern struct rte_logs rte_logs; static struct rte_mempool *dp_vs_log_pool; static int log_pool_size = DPVS_LOG_POOL_SIZE_DEF; static int log_pool_cache = DPVS_LOG_CACHE_SIZE_DEF; @@ -194,7 +193,7 @@ int dpvs_log(uint32_t level, uint32_t logtype, const char *func, int line, const int len = 0; int off = g_dpvs_log_time_off; - if (level > rte_logs.level) + if (level > rte_log_get_global_level()) return -1; va_start(ap, format); @@ -261,7 +260,7 @@ static int log_slave_process(void) { struct dpvs_log *msg_log; int ret = EDPVS_OK; - FILE *f = rte_logs.file; + FILE *f = rte_log_get_stream(); /* dequeue LOG from ring, no lock for ring and w_buf */ while (0 == rte_ring_dequeue(log_ring, (void **)&msg_log)) { @@ -297,7 +296,7 @@ static void log_signal_handler(int signum) signum); } log_slave_process(); - log_buf_flush(rte_logs.file); + log_buf_flush(rte_log_get_stream()); signal(signum, SIG_DFL); kill(getpid(), signum); } @@ -306,14 +305,14 @@ static int __log_slave_init(void) { char ring_name[16]; int lcore_id; - FILE *f = rte_logs.file; + FILE *f = rte_log_get_stream(); char log_pool_name[32]; if (f != NULL) { g_dpvs_log_time_off = LOG_SYS_TIME_LEN; } - RTE_LCORE_FOREACH_SLAVE(lcore_id) { + RTE_LCORE_FOREACH_WORKER(lcore_id) { if (rte_eal_get_lcore_state(lcore_id) == FINISHED) { rte_eal_wait_lcore(lcore_id); dpvs_log_thread_lcore_set(lcore_id); diff --git a/src/main.c b/src/main.c index bff5caca7..627c4fe30 100644 --- a/src/main.c +++ b/src/main.c @@ -68,6 +68,8 @@ extern int log_slave_init(void); dpvs_scheduler_init, dpvs_scheduler_term), \ DPVS_MODULE(MODULE_GLOBAL_DATA, "global data", \ global_data_init, global_data_term), \ + DPVS_MODULE(MODULE_MBUF, "mbuf", \ + mbuf_init, NULL), \ DPVS_MODULE(MODULE_CFG, "config file", \ cfgfile_init, cfgfile_term), \ DPVS_MODULE(MODULE_PDUMP, "pdump", \ diff --git a/src/mbuf.c b/src/mbuf.c index a2f1b8e3a..99a911571 100644 --- a/src/mbuf.c +++ b/src/mbuf.c @@ -21,6 +21,7 @@ * it includes some mbuf related functions beyond dpdk mbuf API. */ #include +#include #include "mbuf.h" #include "inet.h" #include "ipv4.h" @@ -29,6 +30,19 @@ #define EMBUF #define RTE_LOGTYPE_EMBUF RTE_LOGTYPE_USER1 +#define MBUF_DYNFIELDS_MAX 8 +static int mbuf_dynfields_offset[MBUF_DYNFIELDS_MAX]; + +void *mbuf_userdata(struct rte_mbuf *mbuf, mbuf_usedata_field_t field) +{ + return (void *)mbuf + mbuf_dynfields_offset[field]; +} + +void *mbuf_userdata_const(const struct rte_mbuf *mbuf, mbuf_usedata_field_t field) +{ + return (void *)mbuf + mbuf_dynfields_offset[field]; +} + /** * mbuf_may_pull - pull bits from segments to heading mbuf if needed. * see pskb_may_pull() && __pskb_pull_tail(). @@ -107,7 +121,7 @@ void mbuf_copy_metadata(struct rte_mbuf *mi, struct rte_mbuf *m) mi->nb_segs = 1; mi->ol_flags = m->ol_flags & (~IND_ATTACHED_MBUF); mi->packet_type = m->packet_type; - mi->userdata = NULL; + mbuf_userdata_reset(mi); __rte_mbuf_sanity_check(mi, 1); __rte_mbuf_sanity_check(m, 0); @@ -153,7 +167,7 @@ inline void dp_vs_mbuf_dump(const char *msg, int af, const struct rte_mbuf *mbuf { char stime[SYS_TIME_STR_LEN]; char sbuf[64], dbuf[64]; - struct ipv4_hdr *iph; + struct rte_ipv4_hdr *iph; union inet_addr saddr, daddr; __be16 _ports[2], *ports; @@ -176,3 +190,34 @@ inline void dp_vs_mbuf_dump(const char *msg, int af, const struct rte_mbuf *mbuf ntohs(ports[1])); } #endif + +int mbuf_init(void) +{ + int i, offset; + + const struct rte_mbuf_dynfield rte_mbuf_userdata_fields[] = { + [ MBUF_FIELD_PROTO ] = { + .name = "route", + .size = sizeof(mbuf_userdata_field_proto_t), + .align = RTE_CACHE_LINE_SIZE, + }, + [ MBUF_FIELD_ROUTE ] = { + .name = "protocol", + .size = sizeof(mbuf_userdata_field_route_t), + .align = RTE_CACHE_LINE_SIZE, + }, + }; + + for (i = 0; i < NELEMS(rte_mbuf_userdata_fields); i++) { + if (rte_mbuf_userdata_fields[i].size == 0) + continue; + offset = rte_mbuf_dynfield_register(&rte_mbuf_userdata_fields[i]); + if (offset < 0) { + RTE_LOG(ERR, MBUF, "fail to register dynfield[%d] in mbuf!\n", i); + return EDPVS_NOROOM; + } + mbuf_dynfields_offset[i] = offset; + } + + return EDPVS_OK; +} diff --git a/src/mempool.c b/src/mempool.c index e3d5bdac5..ca852b4bc 100644 --- a/src/mempool.c +++ b/src/mempool.c @@ -88,7 +88,7 @@ struct dpvs_mempool *dpvs_mempool_create(char *name, uint32_t obj_num; struct dpvs_mempool *mp; - if (rte_lcore_id() != rte_get_master_lcore()) { + if (rte_lcore_id() != rte_get_main_lcore()) { RTE_LOG(WARNING, DPVS_MPOOL, "%s could be called on master lcore only!", __func__); return NULL; } @@ -152,7 +152,7 @@ void dpvs_mempool_destroy(struct dpvs_mempool *mp) if (unlikely(!mp)) return; - if (rte_lcore_id() != rte_get_master_lcore()) { + if (rte_lcore_id() != rte_get_main_lcore()) { RTE_LOG(WARNING, DPVS_MPOOL, "%s could be called on master lcore only!", __func__); return; } diff --git a/src/neigh.c b/src/neigh.c index 35130e8bb..acd0287f4 100644 --- a/src/neigh.c +++ b/src/neigh.c @@ -51,12 +51,12 @@ struct neighbour_mbuf_entry { } __rte_cache_aligned; struct raw_neigh { - int af; - union inet_addr ip_addr; - struct ether_addr eth_addr; - struct netif_port *port; - bool add; - uint8_t flag; + int af; + union inet_addr ip_addr; + struct rte_ether_addr eth_addr; + struct netif_port * port; + bool add; + uint8_t flag; } __rte_cache_aligned; struct nud_state { @@ -152,7 +152,7 @@ static struct raw_neigh* neigh_ring_clone_entry(const struct neighbour_entry* ne static int neigh_send_arp(struct netif_port *port, uint32_t src_ip, uint32_t dst_ip); -static inline char *eth_addr_itoa(const struct ether_addr *src, char *dst, size_t size) +static inline char *eth_addr_itoa(const struct rte_ether_addr *src, char *dst, size_t size) { snprintf(dst, size, "%02x:%02x:%02x:%02x:%02x:%02x", src->addr_bytes[0], @@ -165,18 +165,18 @@ static inline char *eth_addr_itoa(const struct ether_addr *src, char *dst, size_ } #ifdef CONFIG_DPVS_NEIGH_DEBUG -static void dump_arp_hdr(const char *msg, const struct arp_hdr *ah, portid_t port) +static void dump_arp_hdr(const char *msg, const struct rte_arp_hdr *ah, portid_t port) { - const struct arp_ipv4 *aip4; + const struct rte_arp_ipv4 *aip4; char sha[18], tha[18]; char sip[16], tip[16]; lcoreid_t lcore; lcore = rte_lcore_id(); fprintf(stderr, "%s lcore %d port%d arp hlen %u plen %u op %u", - msg ? msg : "", lcore, port, ah->arp_hln, ah->arp_pln, ntohs(ah->arp_op)); + msg ? msg : "", lcore, port, ah->arp_hlen, ah->arp_plen, ntohs(ah->arp_opcode)); - if (ah->arp_pro == htons(ETHER_TYPE_IPv4)) { + if (ah->arp_protocol == htons(RTE_ETHER_TYPE_IPV4)) { aip4 = &ah->arp_data; eth_addr_itoa(&aip4->arp_sha, sha, sizeof(sha)); eth_addr_itoa(&aip4->arp_tha, tha, sizeof(tha)); @@ -347,7 +347,7 @@ struct neighbour_entry *neigh_lookup_entry(int af, const union inet_addr *key, return NULL; } -int neigh_edit(struct neighbour_entry *neighbour, struct ether_addr *eth_addr) +int neigh_edit(struct neighbour_entry *neighbour, struct rte_ether_addr *eth_addr) { rte_memcpy(&neighbour->eth_addr, eth_addr, 6); @@ -355,7 +355,7 @@ int neigh_edit(struct neighbour_entry *neighbour, struct ether_addr *eth_addr) } struct neighbour_entry *neigh_add_table(int af, const union inet_addr *ipaddr, - const struct ether_addr *eth_addr, + const struct rte_ether_addr *eth_addr, struct netif_port *port, unsigned int hashkey, int flag) { @@ -415,21 +415,21 @@ static void neigh_fill_mac(struct neighbour_entry *neighbour, const struct in6_addr *target, struct netif_port *port) { - struct ether_hdr *eth; - struct ether_addr mult_eth; + struct rte_ether_hdr *eth; + struct rte_ether_addr mult_eth; uint16_t pkt_type; - m->l2_len = sizeof(struct ether_hdr); - eth = (struct ether_hdr *)rte_pktmbuf_prepend(m, (uint16_t)sizeof(struct ether_hdr)); + m->l2_len = sizeof(struct rte_ether_hdr); + eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(m, (uint16_t)sizeof(struct rte_ether_hdr)); if (!neighbour && target) { ipv6_mac_mult(target, &mult_eth); - ether_addr_copy(&mult_eth, ð->d_addr); + rte_ether_addr_copy(&mult_eth, ð->d_addr); } else { - ether_addr_copy(&neighbour->eth_addr, ð->d_addr); + rte_ether_addr_copy(&neighbour->eth_addr, ð->d_addr); } - ether_addr_copy(&port->addr, ð->s_addr); + rte_ether_addr_copy(&port->addr, ð->s_addr); pkt_type = (uint16_t)m->packet_type; eth->ether_type = rte_cpu_to_be_16(pkt_type); } @@ -494,8 +494,8 @@ static void neigh_state_confirm(struct neighbour_entry *neighbour) int neigh_resolve_input(struct rte_mbuf *m, struct netif_port *port) { - struct arp_hdr *arp = rte_pktmbuf_mtod(m, struct arp_hdr *); - struct ether_hdr *eth; + struct rte_arp_hdr *arp = rte_pktmbuf_mtod(m, struct rte_arp_hdr *); + struct rte_ether_hdr *eth; uint32_t ipaddr; struct neighbour_entry *neighbour = NULL; unsigned int hashkey; @@ -506,27 +506,27 @@ int neigh_resolve_input(struct rte_mbuf *m, struct netif_port *port) return EDPVS_KNICONTINUE; inet_addr_ifa_put(ifa); - eth = (struct ether_hdr *)rte_pktmbuf_prepend(m, - (uint16_t)sizeof(struct ether_hdr)); + eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(m, + (uint16_t)sizeof(struct rte_ether_hdr)); - if (rte_be_to_cpu_16(arp->arp_op) == ARP_OP_REQUEST) { - ether_addr_copy(ð->s_addr, ð->d_addr); + if (rte_be_to_cpu_16(arp->arp_opcode) == RTE_ARP_OP_REQUEST) { + rte_ether_addr_copy(ð->s_addr, ð->d_addr); rte_memcpy(ð->s_addr, &port->addr, 6); - arp->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); + arp->arp_opcode = rte_cpu_to_be_16(RTE_ARP_OP_REPLY); - ether_addr_copy(&arp->arp_data.arp_sha, &arp->arp_data.arp_tha); - ether_addr_copy(ð->s_addr, &arp->arp_data.arp_sha); + rte_ether_addr_copy(&arp->arp_data.arp_sha, &arp->arp_data.arp_tha); + rte_ether_addr_copy(ð->s_addr, &arp->arp_data.arp_sha); ipaddr = arp->arp_data.arp_sip; arp->arp_data.arp_sip = arp->arp_data.arp_tip; arp->arp_data.arp_tip = ipaddr; - m->l2_len = sizeof(struct ether_hdr); - m->l3_len = sizeof(struct arp_hdr); + m->l2_len = sizeof(struct rte_ether_hdr); + m->l3_len = sizeof(struct rte_arp_hdr); netif_xmit(m, port); return EDPVS_OK; - } else if (arp->arp_op == htons(ARP_OP_REPLY)) { + } else if (arp->arp_opcode == htons(RTE_ARP_OP_REPLY)) { ipaddr = arp->arp_data.arp_sip; hashkey = neigh_hashkey(AF_INET, (union inet_addr *)&ipaddr, port); neighbour = neigh_lookup_entry(AF_INET, (union inet_addr *)&ipaddr, @@ -554,8 +554,8 @@ int neigh_resolve_input(struct rte_mbuf *m, struct netif_port *port) static int neigh_send_arp(struct netif_port *port, uint32_t src_ip, uint32_t dst_ip) { struct rte_mbuf *m; - struct ether_hdr *eth; - struct arp_hdr *arp; + struct rte_ether_hdr *eth; + struct rte_arp_hdr *arp; uint32_t addr; @@ -563,16 +563,16 @@ static int neigh_send_arp(struct netif_port *port, uint32_t src_ip, uint32_t dst if (unlikely(m == NULL)) { return EDPVS_NOMEM; } - m->userdata = NULL; + mbuf_userdata_reset(m); - eth = rte_pktmbuf_mtod(m, struct ether_hdr *); - arp = (struct arp_hdr *)ð[1]; + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + arp = (struct rte_arp_hdr *)ð[1]; memset(ð->d_addr, 0xFF, 6); - ether_addr_copy(&port->addr, ð->s_addr); - eth->ether_type = htons(ETHER_TYPE_ARP); + rte_ether_addr_copy(&port->addr, ð->s_addr); + eth->ether_type = htons(RTE_ETHER_TYPE_ARP); - memset(arp, 0, sizeof(struct arp_hdr)); + memset(arp, 0, sizeof(struct rte_arp_hdr)); rte_memcpy(&arp->arp_data.arp_sha, &port->addr, 6); addr = src_ip; inetAddrCopy(&arp->arp_data.arp_sip, &addr); @@ -581,15 +581,15 @@ static int neigh_send_arp(struct netif_port *port, uint32_t src_ip, uint32_t dst addr = dst_ip; inetAddrCopy(&arp->arp_data.arp_tip, &addr); - arp->arp_hrd = htons(ARP_HRD_ETHER); - arp->arp_pro = htons(ETHER_TYPE_IPv4); - arp->arp_hln = 6; - arp->arp_pln = 4; - arp->arp_op = htons(ARP_OP_REQUEST); - m->pkt_len = 60; - m->data_len = 60; - m->l2_len = sizeof(struct ether_hdr); - m->l3_len = sizeof(struct arp_hdr); + arp->arp_hardware = htons(RTE_ARP_HRD_ETHER); + arp->arp_protocol = htons(RTE_ETHER_TYPE_IPV4); + arp->arp_hlen = 6; + arp->arp_plen = 4; + arp->arp_opcode = htons(RTE_ARP_OP_REQUEST); + m->pkt_len = 60; + m->data_len = 60; + m->l2_len = sizeof(struct rte_ether_hdr); + m->l3_len = sizeof(struct rte_arp_hdr); memset(&arp[1], 0, 18); @@ -735,7 +735,7 @@ int neigh_gratuitous_arp(struct in_addr *src_ip, struct netif_port *port) } static struct pkt_type arp_pkt_type = { - //.type = rte_cpu_to_be_16(ETHER_TYPE_ARP), + //.type = rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP), .func = neigh_resolve_input, .port = NULL, }; @@ -874,7 +874,7 @@ static void neigh_fill_param(struct dp_vs_neigh_conf *param, param->af = entry->af; param->ip_addr = entry->ip_addr; param->flag = entry->flag; - ether_addr_copy(&entry->eth_addr, ¶m->eth_addr); + rte_ether_addr_copy(&entry->eth_addr, ¶m->eth_addr); param->que_num = entry->que_num; param->state = entry->state; param->cid = cid; @@ -1132,7 +1132,7 @@ static int arp_init(void) master_cid = rte_lcore_id(); - arp_pkt_type.type = rte_cpu_to_be_16(ETHER_TYPE_ARP); + arp_pkt_type.type = rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP); if ((err = netif_register_pkt(&arp_pkt_type)) != EDPVS_OK) return err; if ((err = sockopt_register(&neigh_sockopts)) != EDPVS_OK) diff --git a/src/netif.c b/src/netif.c index 6ca922049..0e06f51e4 100644 --- a/src/netif.c +++ b/src/netif.c @@ -160,7 +160,7 @@ bool is_lcore_id_valid(lcoreid_t cid) if (unlikely(cid >= DPVS_MAX_LCORE)) return false; - return ((cid == rte_get_master_lcore()) || + return ((cid == rte_get_main_lcore()) || (cid == g_kni_lcore_id) || (g_slave_lcore_mask & (1L << cid)) || (g_isol_rx_lcore_mask & (1L << cid))); @@ -171,7 +171,7 @@ static bool is_lcore_id_fwd(lcoreid_t cid) if (unlikely(cid >= DPVS_MAX_LCORE)) return false; - return ((cid == rte_get_master_lcore()) || + return ((cid == rte_get_main_lcore()) || (g_slave_lcore_mask & (1L << cid))); } @@ -943,24 +943,24 @@ static void netif_cfgfile_term(void) #include static inline int parse_ether_hdr(struct rte_mbuf *mbuf, uint16_t port, uint16_t queue) { - struct ether_hdr *eth_hdr; + struct rte_ether_hdr *eth_hdr; char saddr[18], daddr[18]; - eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); - ether_format_addr(saddr, sizeof(saddr), ð_hdr->s_addr); - ether_format_addr(daddr, sizeof(daddr), ð_hdr->d_addr); + eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); + rte_ether_format_addr(saddr, sizeof(saddr), ð_hdr->s_addr); + rte_ether_format_addr(daddr, sizeof(daddr), ð_hdr->d_addr); RTE_LOG(INFO, NETIF, "[%s] lcore=%u port=%u queue=%u ethtype=%0x saddr=%s daddr=%s\n", __func__, rte_lcore_id(), port, queue, rte_be_to_cpu_16(eth_hdr->ether_type), saddr, daddr); return EDPVS_OK; } -static inline int is_ipv4_pkt_valid(struct ipv4_hdr *iph, uint32_t link_len) +static inline int is_ipv4_pkt_valid(struct rte_ipv4_hdr *iph, uint32_t link_len) { if (((iph->version_ihl) >> 4) != 4) return EDPVS_INVAL; if ((iph->version_ihl & 0xf) < 5) return EDPVS_INVAL; - if (rte_cpu_to_be_16(iph->total_length) < sizeof(struct ipv4_hdr)) + if (rte_cpu_to_be_16(iph->total_length) < sizeof(struct rte_ipv4_hdr)) return EDPVS_INVAL; return EDPVS_OK; } @@ -969,14 +969,14 @@ static void parse_ipv4_hdr(struct rte_mbuf *mbuf, uint16_t port, uint16_t queue) { char saddr[16], daddr[16]; uint16_t lcore; - struct ipv4_hdr *iph; - struct udp_hdr *uh; + struct rte_ipv4_hdr *iph; + struct rte_udp_hdr *uh; - iph = rte_pktmbuf_mtod_offset(mbuf, struct ipv4_hdr *, sizeof(struct ether_hdr)); + iph = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *, sizeof(struct rte_ether_hdr)); if (is_ipv4_pkt_valid(iph, mbuf->pkt_len) < 0) return; - uh = rte_pktmbuf_mtod_offset(mbuf, struct udp_hdr *, sizeof(struct ether_hdr) + - (IPV4_HDR_IHL_MASK & iph->version_ihl) * sizeof(uint32_t)); + uh = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *, sizeof(struct rte_ether_hdr) + + (RTE_IPV4_HDR_IHL_MASK & iph->version_ihl) * sizeof(uint32_t)); lcore = rte_lcore_id(); if (!inet_ntop(AF_INET, &iph->src_addr, saddr, sizeof(saddr))) @@ -986,7 +986,7 @@ static void parse_ipv4_hdr(struct rte_mbuf *mbuf, uint16_t port, uint16_t queue) RTE_LOG(INFO, NETIF, "[%s] lcore=%u port=%u queue=%u ipv4_hl=%u tos=%u tot=%u " "id=%u ttl=%u prot=%u src=%s dst=%s sport=%04x|%u dport=%04x|%u\n", - __func__, lcore, port, queue, IPV4_HDR_IHL_MASK & iph->version_ihl, + __func__, lcore, port, queue, RTE_IPV4_HDR_IHL_MASK & iph->version_ihl, iph->type_of_service, ntohs(iph->total_length), ntohs(iph->packet_id), iph->time_to_live, iph->next_proto_id, saddr, daddr, uh->src_port, ntohs(uh->src_port), uh->dst_port, ntohs(uh->dst_port)); @@ -995,12 +995,12 @@ static void parse_ipv4_hdr(struct rte_mbuf *mbuf, uint16_t port, uint16_t queue) __rte_unused static void pkt_send_back(struct rte_mbuf *mbuf, struct netif_port *port) { - struct ether_hdr *ehdr; - struct ether_addr eaddr; - ehdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr*); - ether_addr_copy(&ehdr->s_addr, &eaddr); - ether_addr_copy(&ehdr->d_addr, &ehdr->s_addr); - ether_addr_copy(&eaddr, &ehdr->d_addr); + struct rte_ether_hdr *ehdr; + struct rte_ether_addr eaddr; + ehdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr*); + rte_ether_addr_copy(&ehdr->s_addr, &eaddr); + rte_ether_addr_copy(&ehdr->d_addr, &ehdr->s_addr); + rte_ether_addr_copy(&eaddr, &ehdr->d_addr); netif_xmit(mbuf, port); } #endif @@ -1307,7 +1307,7 @@ static void build_lcore_index(void) { int i, idx = 0; - g_lcore_index[idx++] = rte_get_master_lcore(); + g_lcore_index[idx++] = rte_get_main_lcore(); for (i = 0; i < DPVS_MAX_LCORE; i++) if (g_lcore_role[i] == LCORE_ROLE_FWD_WORKER) @@ -1329,7 +1329,7 @@ static void lcore_role_init(void) /* invalidate the disabled cores */ g_lcore_role[cid] = LCORE_ROLE_MAX; - cid = rte_get_master_lcore(); + cid = rte_get_main_lcore(); assert(g_lcore_role[cid] == LCORE_ROLE_IDLE); g_lcore_role[cid] = LCORE_ROLE_MASTER; @@ -1717,7 +1717,7 @@ static int build_port_queue_lcore_map(void) dev = netif_port_get(pid); if (dev) { - ether_format_addr(pql_map[pid].mac_addr, + rte_ether_format_addr(pql_map[pid].mac_addr, sizeof(pql_map[pid].mac_addr), &dev->addr); } } @@ -1877,7 +1877,7 @@ int netif_print_lcore_queue_conf(lcoreid_t cid, char *buf, int *len, bool has_ti if (unlikely(!buf || !len || *len <= 0)) return EDPVS_INVAL; - if (unlikely(rte_get_master_lcore() == cid)) { + if (unlikely(rte_get_main_lcore() == cid)) { buf[0] = '\0'; *len = 0; return EDPVS_OK; @@ -2127,9 +2127,9 @@ int netif_hard_xmit(struct rte_mbuf *mbuf, struct netif_port *dev) cid = rte_lcore_id(); if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) - mbuf->l2_len = sizeof(struct ether_hdr); + mbuf->l2_len = sizeof(struct rte_ether_hdr); - if (rte_get_master_lcore() == cid) { // master thread + if (rte_get_main_lcore() == cid) { // master thread struct dpvs_msg *msg; struct master_xmit_msg_data msg_data; @@ -2140,7 +2140,7 @@ int netif_hard_xmit(struct rte_mbuf *mbuf, struct netif_port *dev) msg_data.mbuf = mbuf; msg_data.dev = dev; - msg = msg_make(MSG_TYPE_MASTER_XMIT, 0, DPVS_MSG_UNICAST, rte_get_master_lcore(), + msg = msg_make(MSG_TYPE_MASTER_XMIT, 0, DPVS_MSG_UNICAST, rte_get_main_lcore(), sizeof(struct master_xmit_msg_data), &msg_data); if (unlikely(NULL == msg)) { rte_pktmbuf_free(mbuf); @@ -2165,9 +2165,9 @@ int netif_hard_xmit(struct rte_mbuf *mbuf, struct netif_port *dev) /* port id is determined by routing */ pid = dev->id; /* qindex is hashed by physical address of mbuf */ - qindex = (((uint32_t) mbuf->buf_physaddr) >> 8) % + qindex = (((uint32_t) mbuf->buf_iova) >> 8) % (lcore_conf[lcore2index[cid]].pqs[port2index[cid][pid]].ntxq); - //RTE_LOG(DEBUG, NETIF, "tx-queue hash(%x) = %d\n", ((uint32_t)mbuf->buf_physaddr) >> 8, qindex); + //RTE_LOG(DEBUG, NETIF, "tx-queue hash(%x) = %d\n", ((uint32_t)mbuf->buf_iova) >> 8, qindex); txq = &lcore_conf[lcore2index[cid]].pqs[port2index[cid][pid]].txqs[qindex]; /* No space left in txq mbufs, transmit cached mbufs immediately */ @@ -2212,14 +2212,14 @@ int netif_xmit(struct rte_mbuf *mbuf, struct netif_port *dev) return netif_hard_xmit(mbuf, dev); } -static inline eth_type_t eth_type_parse(const struct ether_hdr *eth_hdr, +static inline eth_type_t eth_type_parse(const struct rte_ether_hdr *eth_hdr, const struct netif_port *dev) { if (eth_addr_equal(&dev->addr, ð_hdr->d_addr)) return ETH_PKT_HOST; - if (is_multicast_ether_addr(ð_hdr->d_addr)) { - if (is_broadcast_ether_addr(ð_hdr->d_addr)) + if (rte_is_multicast_ether_addr(ð_hdr->d_addr)) { + if (rte_is_broadcast_ether_addr(ð_hdr->d_addr)) return ETH_PKT_BROADCAST; else return ETH_PKT_MULTICAST; @@ -2246,12 +2246,12 @@ static int netif_deliver_mbuf(struct netif_port *dev, lcoreid_t cid, struct rte_mbuf *mbuf, bool pkts_from_ring) { int ret = EDPVS_OK; - struct ether_hdr *eth_hdr; + struct rte_ether_hdr *eth_hdr; assert(mbuf->port <= NETIF_MAX_PORTS); assert(dev != NULL); - eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); /* reuse mbuf.packet_type, it was RTE_PTYPE_XXX */ mbuf->packet_type = eth_type_parse(eth_hdr, dev); @@ -2281,13 +2281,13 @@ static int netif_deliver_mbuf(struct netif_port *dev, lcoreid_t cid, int netif_rcv_mbuf(struct netif_port *dev, lcoreid_t cid, struct rte_mbuf *mbuf, bool pkts_from_ring) { - struct ether_hdr *eth_hdr; + struct rte_ether_hdr *eth_hdr; struct pkt_type *pt; int err; uint16_t data_off; bool forward2kni; - eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); /* * do not drop pkt to other hosts (ETH_PKT_OTHERHOST) * since virtual devices may have different MAC with @@ -2306,7 +2306,7 @@ int netif_rcv_mbuf(struct netif_port *dev, lcoreid_t cid, struct rte_mbuf *mbuf, dev = netif_port_get(mbuf->port); if (unlikely(!dev)) goto drop; - eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); } forward2kni = (dev->flag & NETIF_PORT_FLAG_FORWARD2KNI) ? true : false; @@ -2320,16 +2320,16 @@ int netif_rcv_mbuf(struct netif_port *dev, lcoreid_t cid, struct rte_mbuf *mbuf, } /* clone arp pkt to every queue */ - if (unlikely(pt->type == rte_cpu_to_be_16(ETHER_TYPE_ARP) && !pkts_from_ring)) { + if (unlikely(pt->type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP) && !pkts_from_ring)) { uint8_t i; - struct arp_hdr *arp; + struct rte_arp_hdr *arp; struct rte_mbuf *mbuf_clone; - arp = rte_pktmbuf_mtod_offset(mbuf, struct arp_hdr *, sizeof(struct ether_hdr)); - if (rte_be_to_cpu_16(arp->arp_op) == ARP_OP_REPLY) { + arp = rte_pktmbuf_mtod_offset(mbuf, struct rte_arp_hdr *, sizeof(struct rte_ether_hdr)); + if (rte_be_to_cpu_16(arp->arp_opcode) == RTE_ARP_OP_REPLY) { for (i = 0; i < DPVS_MAX_LCORE; i++) { if ((i == cid) || (!is_lcore_id_fwd(i)) - || (i == rte_get_master_lcore())) + || (i == rte_get_main_lcore())) continue; /* rte_pktmbuf_clone will not clone pkt.data, just copy pointer! */ mbuf_clone = rte_pktmbuf_clone(mbuf, pktmbuf_pool[rte_socket_id()]); @@ -2351,11 +2351,11 @@ int netif_rcv_mbuf(struct netif_port *dev, lcoreid_t cid, struct rte_mbuf *mbuf, } } - mbuf->l2_len = sizeof(struct ether_hdr); + mbuf->l2_len = sizeof(struct rte_ether_hdr); /* Remove ether_hdr at the beginning of an mbuf */ data_off = mbuf->data_off; - if (unlikely(NULL == rte_pktmbuf_adj(mbuf, sizeof(struct ether_hdr)))) + if (unlikely(NULL == rte_pktmbuf_adj(mbuf, sizeof(struct rte_ether_hdr)))) goto drop; err = pt->func(mbuf, dev); @@ -2632,7 +2632,7 @@ static int update_bond_macaddr(struct netif_port *port) if (kni_dev_exist(port)) { ret = linux_set_if_mac(port->kni.name, (unsigned char *)&port->addr); if (ret == EDPVS_OK) - ether_addr_copy(&port->addr, &port->kni.addr); + rte_ether_addr_copy(&port->addr, &port->kni.addr); } return ret; @@ -2868,7 +2868,7 @@ struct netif_port *netif_alloc(size_t priv_size, const char *namefmt, snprintf(dev->name, sizeof(dev->name), "%s", namefmt); dev->socket = SOCKET_ID_ANY; - dev->hw_header_len = sizeof(struct ether_hdr); /* default */ + dev->hw_header_len = sizeof(struct rte_ether_hdr); /* default */ if (setup) setup(dev); @@ -2947,7 +2947,7 @@ static int bond_set_mc_list(struct netif_port *dev) static int dpdk_set_mc_list(struct netif_port *dev) { - struct ether_addr addrs[NETIF_MAX_HWADDR]; + struct rte_ether_addr addrs[NETIF_MAX_HWADDR]; int err; int ret; size_t naddr = NELEMS(addrs); @@ -3065,7 +3065,7 @@ static struct netif_port* netif_rte_port_alloc(portid_t id, int nrxq, port->nrxq = nrxq; // update after port_rx_queues_get(); port->ntxq = ntxq; // update after port_tx_queues_get(); port->socket = rte_eth_dev_socket_id(id); - port->hw_header_len = sizeof(struct ether_hdr); + port->hw_header_len = sizeof(struct rte_ether_hdr); if (port->socket == SOCKET_ID_ANY) port->socket = rte_socket_id(); port->mbuf_pool = pktmbuf_pool[port->socket]; @@ -3609,8 +3609,8 @@ int netif_port_start(struct netif_port *port) /* add in6_addr multicast address */ int cid = 0; - rte_eal_mp_remote_launch(idev_add_mcast_init, port, CALL_MASTER); - RTE_LCORE_FOREACH_SLAVE(cid) { + rte_eal_mp_remote_launch(idev_add_mcast_init, port, CALL_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { if ((ret = rte_eal_wait_lcore(cid)) < 0) { RTE_LOG(WARNING, NETIF, "%s: lcore %d: multicast address add failed for device %s\n", __func__, cid, port->name); @@ -4058,7 +4058,7 @@ int netif_init(void) netif_port_init(); netif_lcore_init(); - g_master_lcore_id = rte_get_master_lcore(); + g_master_lcore_id = rte_get_main_lcore(); netif_get_slave_lcores(&g_slave_lcore_num, &g_slave_lcore_mask); netif_get_isol_rx_lcores(&g_isol_rx_lcore_num, &g_isol_rx_lcore_mask); @@ -4333,7 +4333,7 @@ static int get_port_basic(struct netif_port *port, void **out, size_t *out_len) strncpy(get->name, port->name, sizeof(get->name)); get->nrxq = port->nrxq; get->ntxq = port->ntxq; - ether_format_addr(get->addr, sizeof(get->addr), &port->addr); + rte_ether_format_addr(get->addr, sizeof(get->addr), &port->addr); get->socket_id = port->socket; get->mtu = port->mtu; @@ -4606,10 +4606,10 @@ static int get_bond_status(struct netif_port *port, void **out, size_t *out_len) get->slaves[i].is_active = 1; if (slaves[i] == primary) get->slaves[i].is_primary = 1; - ether_format_addr(&get->slaves[i].macaddr[0], sizeof(get->slaves[i].macaddr) - 1, &sport->addr); + rte_ether_format_addr(&get->slaves[i].macaddr[0], sizeof(get->slaves[i].macaddr) - 1, &sport->addr); } - ether_format_addr(get->macaddr, sizeof(get->macaddr), &mport->addr); + rte_ether_format_addr(get->macaddr, sizeof(get->macaddr), &mport->addr); xmit_policy = rte_eth_bond_xmit_policy_get(port->id); switch (xmit_policy) { @@ -4730,7 +4730,7 @@ static int set_lcore(const netif_lcore_set_t *lcore_cfg) static int set_port(struct netif_port *port, const netif_nic_set_t *port_cfg) { - struct ether_addr ea; + struct rte_ether_addr ea; assert(port_cfg); if (port_cfg->promisc_on) { @@ -4793,7 +4793,7 @@ static int set_port(struct netif_port *port, const netif_nic_set_t *port_cfg) (unsigned *)&ea.addr_bytes[3], (unsigned *)&ea.addr_bytes[4], (unsigned *)&ea.addr_bytes[5]); - if (is_valid_assigned_ether_addr(&ea)) { + if (rte_is_valid_assigned_ether_addr(&ea)) { if (port->type == PORT_TYPE_BOND_MASTER) { if (rte_eth_bond_mac_address_set(port->id, &ea) < 0) { RTE_LOG(WARNING, NETIF, "fail to set %s's macaddr to be %s\n", diff --git a/src/netif_addr.c b/src/netif_addr.c index 4bc6c66ed..da93f2e7e 100644 --- a/src/netif_addr.c +++ b/src/netif_addr.c @@ -26,7 +26,7 @@ #include "kni.h" static int __netif_hw_addr_add(struct netif_hw_addr_list *list, - const struct ether_addr *addr) + const struct rte_ether_addr *addr) { struct netif_hw_addr *ha; @@ -41,7 +41,7 @@ static int __netif_hw_addr_add(struct netif_hw_addr_list *list, if (!ha) return EDPVS_NOMEM; - ether_addr_copy(addr, &ha->addr); + rte_ether_addr_copy(addr, &ha->addr); rte_atomic32_set(&ha->refcnt, 1); ha->sync_cnt = 0; list_add_tail(&ha->list, &list->addrs); @@ -51,7 +51,7 @@ static int __netif_hw_addr_add(struct netif_hw_addr_list *list, } static int __netif_hw_addr_del(struct netif_hw_addr_list *list, - const struct ether_addr *addr) + const struct rte_ether_addr *addr) { struct netif_hw_addr *ha, *n; @@ -210,17 +210,17 @@ static int __netif_hw_addr_unsync_multiple(struct netif_hw_addr_list *to, return EDPVS_INVAL; } -int __netif_mc_add(struct netif_port *dev, const struct ether_addr *addr) +int __netif_mc_add(struct netif_port *dev, const struct rte_ether_addr *addr) { return __netif_hw_addr_add(&dev->mc, addr); } -int __netif_mc_del(struct netif_port *dev, const struct ether_addr *addr) +int __netif_mc_del(struct netif_port *dev, const struct rte_ether_addr *addr) { return __netif_hw_addr_del(&dev->mc, addr); } -int netif_mc_add(struct netif_port *dev, const struct ether_addr *addr) +int netif_mc_add(struct netif_port *dev, const struct rte_ether_addr *addr) { int err; @@ -233,7 +233,7 @@ int netif_mc_add(struct netif_port *dev, const struct ether_addr *addr) return err; } -int netif_mc_del(struct netif_port *dev, const struct ether_addr *addr) +int netif_mc_del(struct netif_port *dev, const struct rte_ether_addr *addr) { int err; @@ -272,7 +272,7 @@ void netif_mc_init(struct netif_port *dev) } int __netif_mc_dump(struct netif_port *dev, - struct ether_addr *addrs, size_t *naddr) + struct rte_ether_addr *addrs, size_t *naddr) { struct netif_hw_addr *ha; int off = 0; @@ -281,14 +281,14 @@ int __netif_mc_dump(struct netif_port *dev, return EDPVS_NOROOM; list_for_each_entry(ha, &dev->mc.addrs, list) - ether_addr_copy(&ha->addr, &addrs[off++]); + rte_ether_addr_copy(&ha->addr, &addrs[off++]); *naddr = off; return EDPVS_OK; } int netif_mc_dump(struct netif_port *dev, - struct ether_addr *addrs, size_t *naddr) + struct rte_ether_addr *addrs, size_t *naddr) { int err; @@ -302,7 +302,7 @@ int netif_mc_dump(struct netif_port *dev, int __netif_mc_print(struct netif_port *dev, char *buf, int *len, int *pnaddr) { - struct ether_addr addrs[NETIF_MAX_HWADDR]; + struct rte_ether_addr addrs[NETIF_MAX_HWADDR]; size_t naddr = NELEMS(addrs); int err, i; int strlen = 0; diff --git a/src/pdump.c b/src/pdump.c index 9fecdec33..4587c8eac 100644 --- a/src/pdump.c +++ b/src/pdump.c @@ -33,7 +33,7 @@ int pdump_init(void) #ifdef CONFIG_DPVS_PDUMP if (g_dpvs_pdump) { /* initialize packet capture framework */ - err = rte_pdump_init(NULL); + err = rte_pdump_init(); } #endif diff --git a/src/route.c b/src/route.c index 81d704792..ed0321466 100644 --- a/src/route.c +++ b/src/route.c @@ -345,7 +345,7 @@ static int route_add_del(bool add, struct in_addr* dest, struct dpvs_msg *msg; struct dp_vs_route_conf cf; - if (cid != rte_get_master_lcore()) { + if (cid != rte_get_main_lcore()) { RTE_LOG(INFO, ROUTE, "[%s] must set from master lcore\n", __func__); return EDPVS_NOTSUPP; } @@ -759,8 +759,8 @@ int route_init(void) rte_atomic32_set(&this_num_routes, 0); rte_atomic32_set(&this_num_out_routes, 0); /* master core also need routes */ - rte_eal_mp_remote_launch(route_lcore_init, NULL, CALL_MASTER); - RTE_LCORE_FOREACH_SLAVE(cid) { + rte_eal_mp_remote_launch(route_lcore_init, NULL, CALL_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { if ((err = rte_eal_wait_lcore(cid)) < 0) { RTE_LOG(WARNING, ROUTE, "%s: lcore %d: %s.\n", __func__, cid, dpvs_strerror(err)); @@ -806,8 +806,8 @@ int route_term(void) if ((err = sockopt_unregister(&route_sockopts)) != EDPVS_OK) return err; - rte_eal_mp_remote_launch(route_lcore_term, NULL, CALL_MASTER); - RTE_LCORE_FOREACH_SLAVE(cid) { + rte_eal_mp_remote_launch(route_lcore_term, NULL, CALL_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { if ((err = rte_eal_wait_lcore(cid)) < 0) { RTE_LOG(WARNING, ROUTE, "%s: lcore %d: %s.\n", __func__, cid, dpvs_strerror(err)); diff --git a/src/sa_pool.c b/src/sa_pool.c index 366f8d196..1495c2f56 100644 --- a/src/sa_pool.c +++ b/src/sa_pool.c @@ -201,7 +201,7 @@ int sa_pool_create(struct inet_ifaddr *ifa, uint16_t low, uint16_t high) lcoreid_t cid = rte_lcore_id(); if (cid > 64 || !((sa_lcore_mask & (1UL << cid)))) { - if (cid == rte_get_master_lcore()) + if (cid == rte_get_main_lcore()) return EDPVS_OK; /* no sapool on master */ return EDPVS_INVAL; } @@ -268,7 +268,7 @@ int sa_pool_destroy(struct inet_ifaddr *ifa) lcoreid_t cid = rte_lcore_id(); if (cid > 64 || !((sa_lcore_mask & (1UL << cid)))) { - if (cid == rte_get_master_lcore()) + if (cid == rte_get_main_lcore()) return EDPVS_OK; return EDPVS_INVAL; } @@ -729,7 +729,7 @@ int sa_pool_init(void) for (cid = 0; cid < DPVS_MAX_LCORE; cid++) { if (cid >= 64 || !(sa_lcore_mask & (1L << cid))) continue; - assert(rte_lcore_is_enabled(cid) && cid != rte_get_master_lcore()); + assert(rte_lcore_is_enabled(cid) && cid != rte_get_main_lcore()); sa_flows[cid].mask = ~((~0x0) << shift); sa_flows[cid].lcore = cid; diff --git a/src/scheduler.c b/src/scheduler.c index 1e8cb7444..d446f378b 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -243,5 +243,5 @@ int dpvs_lcore_start(int is_master) { if (is_master) return dpvs_job_loop(NULL); - return rte_eal_mp_remote_launch(dpvs_job_loop, NULL, SKIP_MASTER); + return rte_eal_mp_remote_launch(dpvs_job_loop, NULL, SKIP_MAIN); } diff --git a/src/tc/cls_match.c b/src/tc/cls_match.c index 0a2772769..267f063f9 100644 --- a/src/tc/cls_match.c +++ b/src/tc/cls_match.c @@ -49,7 +49,7 @@ static int match_classify(struct tc_cls *cls, struct rte_mbuf *mbuf, { struct match_cls_priv *priv = tc_cls_priv(cls); struct dp_vs_match *m = &priv->match; - struct ether_hdr *eh = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + struct rte_ether_hdr *eh = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); struct iphdr *iph = NULL; struct ip6_hdr *ip6h = NULL; struct tcphdr *th; diff --git a/src/tc/sch_pfifo_fast.c b/src/tc/sch_pfifo_fast.c index 855bbfebd..26759aba4 100644 --- a/src/tc/sch_pfifo_fast.c +++ b/src/tc/sch_pfifo_fast.c @@ -60,7 +60,7 @@ static int pfifo_fast_enqueue(struct Qsch *sch, struct rte_mbuf *mbuf) struct pfifo_fast_priv *priv; struct tc_mbuf_head *qh; - struct ether_hdr *eh = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + struct rte_ether_hdr *eh = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); struct iphdr *iph = NULL; struct ip6_hdr *ip6h = NULL; struct vlan_ethhdr *veh; diff --git a/src/tc/tc.c b/src/tc/tc.c index 3d295bee5..3fd61e0bb 100644 --- a/src/tc/tc.c +++ b/src/tc/tc.c @@ -128,7 +128,7 @@ struct rte_mbuf *tc_hook(struct netif_tc *tc, struct rte_mbuf *mbuf, if (flags & QSCH_F_INGRESS) { sch = tc->qsch_ingress; /* mbuf->packet_type was not set by DPVS for ingress */ - pkt_type = rte_pktmbuf_mtod(mbuf, struct ether_hdr *)->ether_type; + pkt_type = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *)->ether_type; } else { sch = tc->qsch; pkt_type = rte_cpu_to_be_16(mbuf->packet_type); diff --git a/src/timer.c b/src/timer.c index a7a195db4..26d744f93 100644 --- a/src/timer.c +++ b/src/timer.c @@ -427,8 +427,8 @@ int dpvs_timer_init(void) int err; /* per-lcore timer */ - rte_eal_mp_remote_launch(timer_lcore_init, NULL, SKIP_MASTER); - RTE_LCORE_FOREACH_SLAVE(cid) { + rte_eal_mp_remote_launch(timer_lcore_init, NULL, SKIP_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { err = rte_eal_wait_lcore(cid); if (err < 0) { RTE_LOG(ERR, DTIMER, "%s: lcore %d: %s.\n", @@ -438,7 +438,7 @@ int dpvs_timer_init(void) } /* global timer */ - return timer_init_schedler(&g_timer_sched, rte_get_master_lcore()); + return timer_init_schedler(&g_timer_sched, rte_get_main_lcore()); } int dpvs_timer_term(void) @@ -447,8 +447,8 @@ int dpvs_timer_term(void) int err; /* per-lcore timer */ - rte_eal_mp_remote_launch(timer_lcore_term, NULL, SKIP_MASTER); - RTE_LCORE_FOREACH_SLAVE(cid) { + rte_eal_mp_remote_launch(timer_lcore_term, NULL, SKIP_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { err = rte_eal_wait_lcore(cid); if (err < 0) { RTE_LOG(WARNING, DTIMER, "%s: lcore %d: %s.\n", @@ -464,7 +464,7 @@ static inline struct timer_scheduler *this_lcore_sched(bool global) { /* any lcore (including master and slaves) can use global timer, * but only slave lcores can use per-lcore timer. */ - if (!global && rte_lcore_id() == rte_get_master_lcore()) { + if (!global && rte_lcore_id() == rte_get_main_lcore()) { RTE_LOG(ERR, DTIMER, "try get per-lcore timer from master\n"); return NULL; } diff --git a/src/vlan.c b/src/vlan.c index 1f312fab5..46b44679c 100644 --- a/src/vlan.c +++ b/src/vlan.c @@ -78,7 +78,7 @@ static int alloc_vlan_info(struct netif_port *dev) static int vlan_xmit(struct rte_mbuf *mbuf, struct netif_port *dev) { struct vlan_dev_priv *vlan = netif_priv(dev); - struct ether_hdr *ethhdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + struct rte_ether_hdr *ethhdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); unsigned int len; int err; @@ -175,7 +175,7 @@ static void vlan_setup(struct netif_port *dev) { dev->netif_ops = &vlan_netif_ops; dev->mtu = VLAN_ETH_DATA_LEN; - dev->hw_header_len = sizeof(struct ether_hdr) + VLAN_HLEN; + dev->hw_header_len = sizeof(struct rte_ether_hdr) + VLAN_HLEN; } /* @ifname is optional or vlan dev name will be auto generated. */ @@ -233,7 +233,7 @@ int vlan_add_dev(struct netif_port *real_dev, const char *ifname, dev->flag &= ~NETIF_PORT_FLAG_TX_TCP_CSUM_OFFLOAD; dev->flag &= ~NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD; dev->type = PORT_TYPE_VLAN; - ether_addr_copy(&real_dev->addr, &dev->addr); + rte_ether_addr_copy(&real_dev->addr, &dev->addr); vlan = netif_priv(dev); memset(vlan, 0, sizeof(*vlan)); @@ -358,8 +358,8 @@ static inline int vlan_untag_mbuf(struct rte_mbuf *mbuf) if (mbuf->ol_flags & PKT_RX_VLAN_STRIPPED) return EDPVS_OK; - if (unlikely(mbuf_may_pull(mbuf, sizeof(struct ether_hdr) + \ - sizeof(struct vlan_hdr)) != 0)) + if (unlikely(mbuf_may_pull(mbuf, sizeof(struct rte_ether_hdr) + \ + sizeof(struct rte_vlan_hdr)) != 0)) return EDPVS_INVPKT; /* the data_off of mbuf is still at ethernet header. */ @@ -384,7 +384,7 @@ int vlan_rcv(struct rte_mbuf *mbuf, struct netif_port *real_dev) { struct netif_port *dev; struct vlan_dev_priv *vlan; - struct ether_hdr *ehdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + struct rte_ether_hdr *ehdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); int err; err = vlan_untag_mbuf(mbuf); diff --git a/tools/dpip/eal_mem.c b/tools/dpip/eal_mem.c index 3331eef92..e861a02a7 100644 --- a/tools/dpip/eal_mem.c +++ b/tools/dpip/eal_mem.c @@ -37,13 +37,13 @@ static void list_eal_mem_seg_info(eal_all_mem_seg_ret_t *all_eal_mem_seg_ret) int i = 0; printf("%-10s %16s %16s %20s %20s %10s %10s %20s\n", - "socket_id", "phys_addr(Hex)", "virt_addr(Hex)", "len(KB)", + "socket_id", "iova(Hex)", "virt_addr(Hex)", "len(KB)", "hugepage_size(KB)","nchannel", "nrank", "free_len(KB)"); for (i = 0; i < all_eal_mem_seg_ret->seg_num; i++) { seg_ret = &all_eal_mem_seg_ret->seg_info[i]; printf("%-10d %16lx %16lx %20lu %20lu %10u %10u %20lu\n", - seg_ret->socket_id, seg_ret->phys_addr, seg_ret->virt_addr, + seg_ret->socket_id, seg_ret->iova, seg_ret->virt_addr, seg_ret->len / 1024, seg_ret->hugepage_sz / 1024, seg_ret->nchannel, seg_ret->nrank, seg_ret->free_seg_len / 1024); @@ -77,13 +77,13 @@ static void list_eal_mem_zone_info(eal_all_mem_zone_ret_t *all_eal_mem_zone_ret) int i = 0; printf("%-8s %32s %16s %16s %20s %20s %10s\n", "zone_id", - "zone_name", "phys_addr(Hex)", "virt_addr(Hex)", "len(KB)", "hugepage_size(KB)", + "zone_name", "iova(Hex)", "virt_addr(Hex)", "len(KB)", "hugepage_size(KB)", "socket_id"); for (i = 0; i < all_eal_mem_zone_ret->zone_num; i++) { zone_ret = &all_eal_mem_zone_ret->zone_info[i]; printf("%-8d %32s %16lx %16lx %20lu %20lu %10d\n", i, - zone_ret->name, zone_ret->phys_addr, zone_ret->virt_addr, + zone_ret->name, zone_ret->iova, zone_ret->virt_addr, zone_ret->len / 1024, zone_ret->hugepage_sz / 1024, zone_ret->socket_id); } } From fa7bc4bc410cee00845ac10805bdbba303f36f3f Mon Sep 17 00:00:00 2001 From: ywc689 Date: Sat, 8 May 2021 17:41:51 +0800 Subject: [PATCH 16/41] refactor Makefile and fix some bugs after merging dpdk 20.11 Signed-off-by: ywc689 --- README.md | 2 +- include/mbuf.h | 2 +- src/Makefile | 10 ++----- src/dpdk.mk | 74 ++++++++++++++++++-------------------------------- src/mbuf.c | 4 +-- 5 files changed, 33 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index d255c50ee..2f1f71aa5 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,7 @@ $ make # or "make -j40" to speed up. $ make install ``` -> Build dependencies may be needed, such as `automake`, `libnl3`, `libnl-genl-3.0`, `openssl`, `popt` and `numactl`. You can install the missing dependencies by using the package manager of the system, e.g., `yum install popt-devel` (CentOS). +> Build dependencies may be needed, such as `pkg-config`(latest version),`automake`, `libnl3`, `libnl-genl-3.0`, `openssl`, `popt` and `numactl`. You can install the missing dependencies by using the package manager of the system, e.g., `yum install popt-devel` (CentOS). Output files are installed to `dpvs/bin`. diff --git a/include/mbuf.h b/include/mbuf.h index 577de0ff2..a08cf1bdc 100644 --- a/include/mbuf.h +++ b/include/mbuf.h @@ -152,7 +152,7 @@ void *mbuf_userdata_const(const struct rte_mbuf *, mbuf_usedata_field_t); static inline void mbuf_userdata_reset(struct rte_mbuf *m) { - memset(m, 0, sizeof(m->dynfield1)); + memset((void *)m->dynfield1, 0, sizeof(m->dynfield1)); } int mbuf_init(void); diff --git a/src/Makefile b/src/Makefile index f9abe073a..63d5e0646 100644 --- a/src/Makefile +++ b/src/Makefile @@ -34,14 +34,8 @@ DATE_STRING := $(shell date +%Y.%m.%d.%H:%M:%S) # same path of THIS Makefile SRCDIR := $(dir $(realpath $(firstword $(MAKEFILE_LIST)))) -ifeq ($(shell pkg-config --exists libdpdk && echo 0),0) -else -ifeq ($(RTE_SDK),) - $(error "The variable RTE_SDK is not defined.") -endif -include $(RTE_SDK)/mk/rte.vars.mk -LIBS += -lpthread -lnuma -lrt -lm -ldl -lcrypto -lpcap -endif +# Addtional libs below are needed when using dynamic link. +# LIBS += -lpthread -lnuma -lrt -lm -ldl -lcrypto -lpcap include $(SRCDIR)/config.mk include $(SRCDIR)/dpdk.mk diff --git a/src/dpdk.mk b/src/dpdk.mk index cd9d995a2..e3c4c2759 100644 --- a/src/dpdk.mk +++ b/src/dpdk.mk @@ -14,56 +14,36 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # -ifeq ($(shell pkg-config --exists libdpdk && echo 0),0) - -CFLAGS += -DALLOW_EXPERIMENTAL_API $(shell pkg-config --cflags libdpdk) -LIBS += $(shell pkg-config --static --libs libdpdk) - -# FIXME: DPVS would link if not specified the following PMD libraries. -LIBS += -l:librte_bus_vdev.a -l:librte_net_bond.a - -else - -ifeq ($(RTE_SDK),) -$(error "The variable RTE_SDK is not defined.") -endif -# default target, may be overriden. -RTE_TARGET ?= build - -DPDKDIR := $(RTE_SDK)/$(RTE_TARGET) - -INCDIRS += -I $(DPDKDIR)/include - -CFLAGS += -include $(DPDKDIR)/include/rte_config.h - -LIBS += -L $(DPDKDIR)/lib -LIBS += -Wl,--no-as-needed -fvisibility=default -Wl,--whole-archive - -LIBS += -lrte_pmd_vmxnet3_uio -lrte_pmd_i40e -lrte_pmd_ixgbe -lrte_pmd_ena \ - -lrte_pmd_e1000 -lrte_pmd_bnxt -lrte_pmd_ring -lrte_pmd_bond \ - -lrte_ethdev -lrte_ip_frag -lrte_hash -lrte_kvargs -lrte_mbuf \ - -lrte_eal -lrte_mempool -lrte_ring -lrte_cmdline -lrte_cfgfile \ - -lrte_kni -lrte_mempool_ring -lrte_timer -lrte_net -lrte_pmd_virtio \ - -lrte_pci -lrte_bus_pci -lrte_bus_vdev -lrte_lpm -lrte_pdump \ - -ifeq ($(CONFIG_PDUMP), y) -LIBS += -lrte_acl -lrte_member -lrte_eventdev -lrte_reorder -lrte_cryptodev \ - -lrte_vhost -lrte_pmd_pcap - -ifneq ("$(wildcard $(RTE_SDK)/$(RTE_TARGET)/lib/librte_bus_vmbus.a)", "") - LIBS += -lrte_bus_vmbus -endif - -ifneq ("$(wildcard $(RTE_SDK)/$(RTE_TARGET)/lib/librte_pmd_netvsc.a)", "") - LIBS += -lrte_pmd_netvsc +# If the dpdklib isn't installed to the default location on your system, +# please specify PKG_CONFIG_PATH explicitly as below. +# +# LIBDPDKPC_PATH := /path/to/dpdk/build/lib/pkgconfig + +define PKG_CONFIG_ERR_MSG +DPDK library was not found. +If dpdk has installed already, please ensure the libdpdk.pc file could be found by `pkg-config`. +You may fix the problem by setting LIBDPDKPC_PATH (in file src/dpdk.mk) to the path of libdpdk.pc file explicitly +endef + +# It's noted that pkg-config version 0.29.2 is recommended, +# pkg-config 0.27.1 would mess up the ld flags when linking dpvs. +PKGCONFIG_VERSION=$(shell pkg-config pkg-config --version) +ifneq "v$(PKGCONFIG_VERSION)" "v0.29.2" +$(warning "The pkg-config version is $(PKGCONFIG_VERSION) but 0.29.2 is recommended.") +ifeq "v$(PKGCONFIG_VERSION)" "v0.27.1" +$(error "pkg-config version $(PKGCONFIG_VERSION) isn't supported by dpvs, please use 0.29.2 instead.") endif - endif -ifeq ($(CONFIG_MLX5), y) -LIBS += -lrte_pmd_mlx5 +ifeq ($(shell pkg-config --exists libdpdk && echo 0),0) +CFLAGS += -DALLOW_EXPERIMENTAL_API $(shell pkg-config --cflags libdpdk) +LIBS += $(shell pkg-config --static --libs libdpdk) +else +ifneq ($(wildcard $(LIBDPDKPC_PATH)),) +CFLAGS += -DALLOW_EXPERIMENTAL_API $(shell PKG_CONFIG_PATH=$(LIBDPDKPC_PATH) pkg-config --cflags libdpdk) +LIBS += $(shell PKG_CONFIG_PATH=$(LIBDPDKPC_PATH) pkg-config --static --libs libdpdk) +else +$(error $(PKG_CONFIG_ERR_MSG)) endif - -LIBS += -Wl,--no-whole-archive endif diff --git a/src/mbuf.c b/src/mbuf.c index 99a911571..ab3d930ac 100644 --- a/src/mbuf.c +++ b/src/mbuf.c @@ -199,12 +199,12 @@ int mbuf_init(void) [ MBUF_FIELD_PROTO ] = { .name = "route", .size = sizeof(mbuf_userdata_field_proto_t), - .align = RTE_CACHE_LINE_SIZE, + .align = 8, }, [ MBUF_FIELD_ROUTE ] = { .name = "protocol", .size = sizeof(mbuf_userdata_field_route_t), - .align = RTE_CACHE_LINE_SIZE, + .align = 8, }, }; From 0e37d91aa6f02a6bc8cee272ea6c9e9c3608bfe9 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Fri, 18 Jun 2021 18:47:33 +0800 Subject: [PATCH 17/41] patch: add patches for dpdk-stable-20.11.1 Signed-off-by: ywc689 --- ...link-event-for-multicast-driver-part.patch | 136 +++++ ...dump-change-dpdk-pdump-tool-for-dpvs.patch | 556 ++++++++++++++++++ ...ug-only-enable-dpdk-eal-memory-debug.patch | 61 ++ ...w-patch-ixgbe-fdir-rte_flow-for-dpvs.patch | 150 +++++ 4 files changed, 903 insertions(+) create mode 100644 patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch create mode 100644 patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch create mode 100644 patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch create mode 100644 patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch diff --git a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch new file mode 100644 index 000000000..fce646ee4 --- /dev/null +++ b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch @@ -0,0 +1,136 @@ +From fc25cda5bab943feac5455779fb6a6f00ee2a87d Mon Sep 17 00:00:00 2001 +From: wencyu +Date: Thu, 17 Jun 2021 20:39:55 +0800 +Subject: [PATCH 1/4] kni: use netlink event for multicast (driver part) + +Kni driver sends netlink event every time hw-multicast list updated by +kernel, the user kni app should capture the event and update multicast +to kni device. + +Original way is using rte_kni_request to pass hw-multicast to user kni +module. That method works but finally memory corruption found, which is +not easy to address. That's why we use netlink event instead. + +Signed-off-by: wencyu +--- + kernel/linux/kni/kni_net.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 76 insertions(+) + +diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c +index 4b75208..cde565e 100644 +--- a/kernel/linux/kni/kni_net.c ++++ b/kernel/linux/kni/kni_net.c +@@ -17,6 +17,8 @@ + #include + #include + #include ++#include ++#include + + #include + #include +@@ -128,6 +130,7 @@ + ret_val = wait_event_interruptible_timeout(kni->wq, + kni_fifo_count(kni->resp_q), 3 * HZ); + if (signal_pending(current) || ret_val <= 0) { ++ pr_err("%s: wait_event_interruptible timeout\n", __func__); + ret = -ETIME; + goto fail; + } +@@ -657,6 +660,77 @@ void kni_net_release_fifo_phy(struct kni_dev *kni) + return (ret == 0) ? req.result : ret; + } + ++static size_t ++kni_nlmsg_size(void) ++{ ++ return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) ++ + nla_total_size(4) /* IFA_ADDRESS */ ++ + nla_total_size(4) /* IFA_LOCAL */ ++ + nla_total_size(4) /* IFA_BROADCAST */ ++ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ ++ + nla_total_size(4) /* IFA_FLAGS */ ++ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ ++} ++ ++static void ++kni_net_set_rx_mode(struct net_device *dev) ++{ ++ /* ++ * send event to notify user (DPDK KNI app) that multicast list changed, ++ * so that it can monitor multicast join/leave and set HW mc-addrs to ++ * kni dev accordinglly. ++ * ++ * this event is just an notification, we do not save any mc-addr here ++ * (so attribute space for us). user kni app should get maddrs after ++ * receive this notification. ++ * ++ * I was expecting kernel send some rtnl event for multicast join/leave, ++ * but it doesn't. By checking the call-chain of SIOCADDMULTI (ip maddr, ++ * manages only hardware multicast) and IP_ADD_MEMBERSHIP (ip_mc_join_group, ++ * used to for IPv4 multicast), no rtnl event sent. ++ * ++ * so as workaround, modify kni driver here to send RTM_NEWADDR. ++ * it may not suitalbe to use this event for mcast, but that should works. ++ * hope that won't affect other listener to this event. ++ * ++ * previous solution was using rte_kni_request to pass hw-maddr list to user. ++ * it "works" for times but finally memory corruption found, which is ++ * not easy to address (lock was added and reviewed). That's why we use ++ * netlink event instead. ++ */ ++ struct sk_buff *skb; ++ struct net *net = dev_net(dev); ++ struct nlmsghdr *nlh; ++ struct ifaddrmsg *ifm; ++ ++ skb = nlmsg_new(kni_nlmsg_size(), GFP_ATOMIC); ++ if (!skb) ++ return; ++ ++ /* no other event for us ? */ ++ nlh = nlmsg_put(skb, 0, 0, RTM_NEWADDR, sizeof(*ifm), 0); ++ if (!nlh) { ++ kfree_skb(skb); ++ return; ++ } ++ ++ /* just send an notification so no other info */ ++ ifm = nlmsg_data(nlh); ++ memset(ifm, 0, sizeof(*ifm)); ++ ifm->ifa_family = AF_UNSPEC; ++ ifm->ifa_prefixlen = 0; ++ ifm->ifa_flags = 0; ++ ifm->ifa_scope = RT_SCOPE_NOWHERE; ++ ifm->ifa_index = 0; ++ ++ nlmsg_end(skb, nlh); ++ ++ /* other group ? */ ++ pr_debug("%s: rx-mode/multicast-list changed\n", __func__); ++ rtnl_notify(skb, net, 0, RTNLGRP_NOTIFY, NULL, GFP_ATOMIC); ++ return; ++} ++ + static void + kni_net_change_rx_flags(struct net_device *netdev, int flags) + { +@@ -757,6 +831,7 @@ void kni_net_release_fifo_phy(struct kni_dev *kni) + kni = netdev_priv(netdev); + ret = kni_net_process_request(kni, &req); + ++ pr_info("%s request returns %d!\n", __func__, ret); + return (ret == 0 ? req.result : ret); + } + +@@ -788,6 +863,7 @@ void kni_net_release_fifo_phy(struct kni_dev *kni) + .ndo_change_rx_flags = kni_net_change_rx_flags, + .ndo_start_xmit = kni_net_tx, + .ndo_change_mtu = kni_net_change_mtu, ++ .ndo_set_rx_mode = kni_net_set_rx_mode, + .ndo_tx_timeout = kni_net_tx_timeout, + .ndo_set_mac_address = kni_net_set_mac, + #ifdef HAVE_CHANGE_CARRIER_CB +-- +1.8.3.1 + diff --git a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch new file mode 100644 index 000000000..ea3dd924c --- /dev/null +++ b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch @@ -0,0 +1,556 @@ +From 6a99af8a3f9067a18211d15b4a65bcafa9430875 Mon Sep 17 00:00:00 2001 +From: wencyu +Date: Fri, 18 Jun 2021 10:20:45 +0800 +Subject: [PATCH 2/4] pdump: change dpdk-pdump tool for dpvs + +Signed-off-by: wencyu +--- + app/pdump/main.c | 167 ++++++++++++++++++++++++++++++++++++++++--- + lib/librte_pdump/rte_pdump.c | 145 +++++++++++++++++++++++++++++++++++-- + lib/librte_pdump/rte_pdump.h | 27 +++++++ + 3 files changed, 327 insertions(+), 12 deletions(-) + +diff --git a/app/pdump/main.c b/app/pdump/main.c +index b34bf33..9d14474 100644 +--- a/app/pdump/main.c ++++ b/app/pdump/main.c +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + + #define CMD_LINE_OPT_PDUMP "pdump" + #define CMD_LINE_OPT_PDUMP_NUM 256 +@@ -42,6 +43,14 @@ + #define PDUMP_MSIZE_ARG "mbuf-size" + #define PDUMP_NUM_MBUFS_ARG "total-num-mbufs" + ++#define PDUMP_HOST_ARG "host" ++#define PDUMP_SRC_ARG "src-host" ++#define PDUMP_DST_ARG "dst-host" ++#define PDUMP_PROTO_PORT_AGE "proto-port" ++#define PDUMP_SPORT_ARG "src-port" ++#define PDUMP_DPORT_ARG "dst-port" ++#define PDUMP_PROTO_ARG "proto" ++ + #define VDEV_NAME_FMT "net_pcap_%s_%d" + #define VDEV_PCAP_ARGS_FMT "tx_pcap=%s" + #define VDEV_IFACE_ARGS_FMT "tx_iface=%s" +@@ -97,6 +106,13 @@ enum pdump_by { + PDUMP_RING_SIZE_ARG, + PDUMP_MSIZE_ARG, + PDUMP_NUM_MBUFS_ARG, ++ PDUMP_HOST_ARG, ++ PDUMP_SRC_ARG, ++ PDUMP_DST_ARG, ++ PDUMP_PROTO_PORT_AGE, ++ PDUMP_SPORT_ARG, ++ PDUMP_DPORT_ARG, ++ PDUMP_PROTO_ARG, + NULL + }; + +@@ -130,6 +146,7 @@ struct pdump_tuples { + enum pcap_stream rx_vdev_stream_type; + enum pcap_stream tx_vdev_stream_type; + bool single_pdump_dev; ++ struct pdump_filter *filter; + + /* stats */ + struct pdump_stats stats; +@@ -158,6 +175,11 @@ struct parse_val { + "(queue=)," + "(rx-dev= |" + " tx-dev=," ++ "[host= | src-host= |" ++ "dst-host=]," ++ "[proto=support:tcp/udp/icmp]," ++ "[proto-port= |src-port= |" ++ "dst-port=]," + "[ring-size=default:16384]," + "[mbuf-size=default:2176]," + "[total-num-mbufs=default:65535]'\n", +@@ -244,6 +266,64 @@ struct parse_val { + } + + static int ++parse_host(const char *key __rte_unused, const char *value, void *extra_args) ++{ ++ struct pdump_tuples *pt = extra_args; ++ struct in_addr inaddr; ++ struct in6_addr inaddr6; ++ union addr addr; ++ int af = 0; ++ ++ if (inet_pton(AF_INET6, value, &inaddr6) > 0) { ++ af = AF_INET6; ++ addr.in6 = inaddr6; ++ } else if (inet_pton(AF_INET, value, &inaddr) > 0){ ++ af = AF_INET; ++ addr.in = inaddr; ++ } else { ++ printf("IP address invaled\n"); ++ return -EINVAL; ++ } ++ ++ if (pt->filter && pt->filter->af != 0 && af != pt->filter->af) { ++ printf("IPv4 and IPv6 conflict\n"); ++ return -EINVAL; ++ } else { ++ pt->filter->af = af; ++ } ++ ++ if (!strcmp(key, PDUMP_HOST_ARG)) { ++ rte_memcpy(&pt->filter->host_addr, &addr, sizeof(addr)); ++ } else if (!strcmp(key, PDUMP_SRC_ARG)) { ++ rte_memcpy(&pt->filter->s_addr, &addr, sizeof(addr)); ++ } else if (!strcmp(key, PDUMP_DST_ARG)) { ++ rte_memcpy(&pt->filter->d_addr, &addr, sizeof(addr)); ++ } ++ ++ return 0; ++} ++ ++static int ++parse_proto(const char *key __rte_unused, const char *value, void *extra_args) ++{ ++ struct pdump_tuples *pt = extra_args; ++ ++ if (!strcmp(value, "tcp")) { ++ pt->filter->proto = IPPROTO_TCP; ++ } else if (!strcmp(value, "udp")) { ++ pt->filter->proto = IPPROTO_UDP; ++ } else if (!strcmp(value, "icmp")) { ++ pt->filter->proto = IPPROTO_ICMP; ++ } else { ++ printf("invalid value:\"%s\" for key:\"%s\", " ++ "value must be tcp/udp/icmp\n", value, key); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int + parse_pdump(const char *optarg) + { + struct rte_kvargs *kvlist; +@@ -370,6 +450,75 @@ struct parse_val { + } else + pt->total_num_mbufs = MBUFS_PER_POOL; + ++ /* filter parsing and validation */ ++ pt->filter = rte_zmalloc("pdump_filter", ++ sizeof(struct pdump_filter), 0); ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_HOST_ARG); ++ if (cnt1 == 1) { ++ ret = rte_kvargs_process(kvlist, PDUMP_HOST_ARG, ++ &parse_host, pt); ++ if (ret < 0) ++ goto free_kvlist; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_SRC_ARG); ++ if (cnt1 == 1) { ++ ret = rte_kvargs_process(kvlist, PDUMP_SRC_ARG, ++ &parse_host, pt); ++ if (ret < 0) ++ goto free_kvlist; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_DST_ARG); ++ if (cnt1 == 1) { ++ ret = rte_kvargs_process(kvlist, PDUMP_DST_ARG, ++ &parse_host, pt); ++ if (ret < 0) ++ goto free_kvlist; ++ } ++ ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_PORT_AGE); ++ if (cnt1 == 1) { ++ v.min = 1; ++ v.max = UINT16_MAX; ++ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_PORT_AGE, ++ &parse_uint_value, &v); ++ if (ret < 0) ++ goto free_kvlist; ++ pt->filter->proto_port = (uint16_t) v.val; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_SPORT_ARG); ++ if (cnt1 == 1) { ++ v.min = 1; ++ v.max = UINT16_MAX; ++ ret = rte_kvargs_process(kvlist, PDUMP_SPORT_ARG, ++ &parse_uint_value, &v); ++ if (ret < 0) ++ goto free_kvlist; ++ pt->filter->s_port = (uint16_t) v.val; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_DPORT_ARG); ++ if (cnt1 == 1) { ++ v.min = 1; ++ v.max = UINT16_MAX; ++ ret = rte_kvargs_process(kvlist, PDUMP_DPORT_ARG, ++ &parse_uint_value, &v); ++ if (ret < 0) ++ goto free_kvlist; ++ pt->filter->d_port = (uint16_t) v.val; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_ARG); ++ if (cnt1 == 1) { ++ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_ARG, ++ &parse_proto, pt); ++ if (ret < 0) ++ goto free_kvlist; ++ } ++ + num_tuples++; + + free_kvlist: +@@ -510,6 +659,8 @@ struct parse_val { + rte_ring_free(pt->rx_ring); + if (pt->tx_ring) + rte_ring_free(pt->tx_ring); ++ if (pt->filter) ++ rte_free(pt->filter); + } + } + +@@ -837,20 +988,20 @@ struct parse_val { + pt->queue, + RTE_PDUMP_FLAG_RX, + pt->rx_ring, +- pt->mp, NULL); ++ pt->mp, pt->filter); + ret1 = rte_pdump_enable_by_deviceid( + pt->device_id, + pt->queue, + RTE_PDUMP_FLAG_TX, + pt->tx_ring, +- pt->mp, NULL); ++ pt->mp, pt->filter); + } else if (pt->dump_by_type == PORT_ID) { + ret = rte_pdump_enable(pt->port, pt->queue, + RTE_PDUMP_FLAG_RX, +- pt->rx_ring, pt->mp, NULL); ++ pt->rx_ring, pt->mp, pt->filter); + ret1 = rte_pdump_enable(pt->port, pt->queue, + RTE_PDUMP_FLAG_TX, +- pt->tx_ring, pt->mp, NULL); ++ pt->tx_ring, pt->mp, pt->filter); + } + } else if (pt->dir == RTE_PDUMP_FLAG_RX) { + if (pt->dump_by_type == DEVICE_ID) +@@ -858,22 +1009,22 @@ struct parse_val { + pt->device_id, + pt->queue, + pt->dir, pt->rx_ring, +- pt->mp, NULL); ++ pt->mp, pt->filter); + else if (pt->dump_by_type == PORT_ID) + ret = rte_pdump_enable(pt->port, pt->queue, + pt->dir, +- pt->rx_ring, pt->mp, NULL); ++ pt->rx_ring, pt->mp, pt->filter); + } else if (pt->dir == RTE_PDUMP_FLAG_TX) { + if (pt->dump_by_type == DEVICE_ID) + ret = rte_pdump_enable_by_deviceid( + pt->device_id, + pt->queue, + pt->dir, +- pt->tx_ring, pt->mp, NULL); ++ pt->tx_ring, pt->mp, pt->filter); + else if (pt->dump_by_type == PORT_ID) + ret = rte_pdump_enable(pt->port, pt->queue, + pt->dir, +- pt->tx_ring, pt->mp, NULL); ++ pt->tx_ring, pt->mp, pt->filter); + } + if (ret < 0 || ret1 < 0) { + cleanup_pdump_resources(); +diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c +index b3c8d5c..b73fb8f 100644 +--- a/lib/librte_pdump/rte_pdump.c ++++ b/lib/librte_pdump/rte_pdump.c +@@ -9,6 +9,10 @@ + #include + #include + #include ++#include ++#include ++#include ++#include + + #include "rte_pdump.h" + +@@ -69,6 +73,132 @@ struct pdump_response { + } rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT], + tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; + ++static int ++inet_addr_equal(int af, const union addr *a1, ++ const union addr *a2) ++{ ++ switch (af) { ++ case AF_INET: ++ return a1->in.s_addr == a2->in.s_addr; ++ case AF_INET6: ++ return memcmp(a1->in6.s6_addr, a2->in6.s6_addr, 16) == 0; ++ default: ++ return memcmp(a1, a2, sizeof(union addr)) == 0; ++ } ++} ++ ++static int ++inet_is_addr_any(int af, const union addr *addr) ++{ ++ switch (af) { ++ case AF_INET: ++ return addr->in.s_addr == htonl(INADDR_ANY); ++ case AF_INET6: ++ return IN6_ARE_ADDR_EQUAL(&addr->in6, &in6addr_any); ++ default: ++ return -1; ++ } ++ ++ return -1; ++} ++static int ++pdump_filter(struct rte_mbuf *m, struct pdump_filter *filter) ++{ ++ struct rte_ether_hdr *eth_hdr; ++ struct vlan_eth_hdr *vlan_eth_hdr; ++ union addr s_addr, d_addr; ++ int prepend = 0; ++ uint16_t type = 0; ++ uint16_t iph_len = 0; ++ uint8_t proto = 0; ++ ++ int af; ++ ++ if (filter->af == 0 && filter->s_port == 0 && ++ filter->d_port == 0 && filter->proto == 0 && ++ filter->proto_port == 0) ++ return 0; ++ ++ eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); ++ ++ if (eth_hdr->ether_type == htons(ETH_P_8021Q)) { ++ prepend += sizeof(struct vlan_eth_hdr); ++ vlan_eth_hdr = rte_pktmbuf_mtod(m, struct vlan_eth_hdr *); ++ type = vlan_eth_hdr->h_vlan_encapsulated_proto; ++ } else { ++ prepend += sizeof(struct rte_ether_hdr); ++ eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); ++ type = eth_hdr->ether_type; ++ } ++ ++ if (rte_pktmbuf_adj(m, prepend) == NULL) ++ goto prepend; ++ ++ if (type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { ++ struct rte_arp_hdr *arp = rte_pktmbuf_mtod(m, struct rte_arp_hdr *); ++ af = AF_INET; ++ s_addr.in.s_addr = arp->arp_data.arp_sip; ++ d_addr.in.s_addr = arp->arp_data.arp_tip; ++ } else if (type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) { ++ struct rte_ipv4_hdr *ip4 = rte_pktmbuf_mtod(m, struct rte_ipv4_hdr *); ++ af = AF_INET; ++ s_addr.in.s_addr = ip4->src_addr; ++ d_addr.in.s_addr = ip4->dst_addr; ++ proto = ip4->next_proto_id; ++ iph_len = (ip4->version_ihl & 0xf) << 2; ++ } else if (type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) { ++ struct rte_ipv6_hdr *ip6 = rte_pktmbuf_mtod(m, struct rte_ipv6_hdr *); ++ af = AF_INET6; ++ rte_memcpy(&s_addr.in6, &ip6->src_addr, 16); ++ rte_memcpy(&d_addr.in6, &ip6->dst_addr, 16); ++ proto = ip6->proto; ++ iph_len = sizeof(struct rte_ipv6_hdr); ++ } else { ++ goto prepend; ++ } ++ ++ /*filter*/ ++ if (!inet_is_addr_any(af, &filter->s_addr) && ++ !inet_addr_equal(af, &filter->s_addr, &s_addr)) ++ goto prepend; ++ if (!inet_is_addr_any(af, &filter->d_addr) && ++ !inet_addr_equal(af, &filter->d_addr, &d_addr)) ++ goto prepend; ++ if (!inet_is_addr_any(af, &filter->host_addr) && ++ !inet_addr_equal(af, &filter->host_addr, &s_addr) && ++ !inet_addr_equal(af, &filter->host_addr, &d_addr)) ++ goto prepend; ++ ++ if (filter->proto && filter->proto != proto) ++ goto prepend; ++ ++ if (filter->s_port || filter->d_port || filter->proto_port) { ++ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) ++ goto prepend; ++ struct rte_udp_hdr _uh; ++ const struct rte_udp_hdr *uh; ++ uh = rte_pktmbuf_read(m, iph_len, sizeof(_uh), &_uh); ++ if (uh == NULL) ++ goto prepend; ++ if (filter->s_port && filter->s_port != rte_cpu_to_be_16(uh->src_port)) ++ goto prepend; ++ ++ if (filter->d_port && filter->d_port != rte_cpu_to_be_16(uh->dst_port)) ++ goto prepend; ++ ++ if (filter->proto_port && ++ filter->proto_port != rte_cpu_to_be_16(uh->src_port) && ++ filter->proto_port != rte_cpu_to_be_16(uh->dst_port)) ++ goto prepend; ++ } ++ ++ rte_pktmbuf_prepend(m, prepend); ++ return 0; ++ ++prepend: ++ rte_pktmbuf_prepend(m, prepend); ++ return -1; ++} + + static inline void + pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params) +@@ -86,6 +216,8 @@ struct pdump_response { + ring = cbs->ring; + mp = cbs->mp; + for (i = 0; i < nb_pkts; i++) { ++ if (pdump_filter(pkts[i], cbs->filter) != 0) ++ continue; + p = rte_pktmbuf_copy(pkts[i], mp, 0, UINT32_MAX); + if (p) + dup_bufs[d_pkts++] = p; +@@ -122,7 +254,7 @@ struct pdump_response { + static int + pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, + struct rte_ring *ring, struct rte_mempool *mp, +- uint16_t operation) ++ struct pdump_filter *filter, uint16_t operation) + { + uint16_t qid; + struct pdump_rxtx_cbs *cbs = NULL; +@@ -140,6 +272,7 @@ struct pdump_response { + } + cbs->ring = ring; + cbs->mp = mp; ++ cbs->filter = filter; + cbs->cb = rte_eth_add_first_rx_callback(port, qid, + pdump_rx, cbs); + if (cbs->cb == NULL) { +@@ -176,7 +309,7 @@ struct pdump_response { + static int + pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, + struct rte_ring *ring, struct rte_mempool *mp, +- uint16_t operation) ++ struct pdump_filter *filter, uint16_t operation) + { + + uint16_t qid; +@@ -195,6 +328,7 @@ struct pdump_response { + } + cbs->ring = ring; + cbs->mp = mp; ++ cbs->filter = filter; + cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx, + cbs); + if (cbs->cb == NULL) { +@@ -238,6 +372,7 @@ struct pdump_response { + uint16_t operation; + struct rte_ring *ring; + struct rte_mempool *mp; ++ struct pdump_filter *filter; + + flags = p->flags; + operation = p->op; +@@ -253,6 +388,7 @@ struct pdump_response { + queue = p->data.en_v1.queue; + ring = p->data.en_v1.ring; + mp = p->data.en_v1.mp; ++ filter = p->data.en_v1.filter; + } else { + ret = rte_eth_dev_get_port_by_name(p->data.dis_v1.device, + &port); +@@ -265,6 +401,7 @@ struct pdump_response { + queue = p->data.dis_v1.queue; + ring = p->data.dis_v1.ring; + mp = p->data.dis_v1.mp; ++ filter = p->data.dis_v1.filter; + } + + /* validation if packet capture is for all queues */ +@@ -303,7 +440,7 @@ struct pdump_response { + if (flags & RTE_PDUMP_FLAG_RX) { + end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1; + ret = pdump_register_rx_callbacks(end_q, port, queue, ring, mp, +- operation); ++ filter, operation); + if (ret < 0) + return ret; + } +@@ -312,7 +449,7 @@ struct pdump_response { + if (flags & RTE_PDUMP_FLAG_TX) { + end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1; + ret = pdump_register_tx_callbacks(end_q, port, queue, ring, mp, +- operation); ++ filter, operation); + if (ret < 0) + return ret; + } +diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h +index 6b00fc1..3986b07 100644 +--- a/lib/librte_pdump/rte_pdump.h ++++ b/lib/librte_pdump/rte_pdump.h +@@ -15,6 +15,8 @@ + #include + #include + #include ++#include ++#include + + #ifdef __cplusplus + extern "C" { +@@ -29,6 +31,31 @@ enum { + RTE_PDUMP_FLAG_RXTX = (RTE_PDUMP_FLAG_RX|RTE_PDUMP_FLAG_TX) + }; + ++union addr { ++ struct in_addr in; ++ struct in6_addr in6; ++}; ++ ++struct pdump_filter { ++ int af; ++ union addr s_addr; ++ union addr d_addr; ++ union addr host_addr; //s_addr or d_addr ++ ++ uint8_t proto; ++ uint16_t proto_port; //s_port or d_port ++ uint16_t s_port; ++ uint16_t d_port; ++}; ++ ++struct vlan_eth_hdr { ++ unsigned char h_dest[ETH_ALEN]; ++ unsigned char h_source[ETH_ALEN]; ++ unsigned short h_vlan_proto; ++ unsigned short h_vlan_TCI; ++ unsigned short h_vlan_encapsulated_proto; ++}; ++ + /** + * Initialize packet capturing handling + * +-- +1.8.3.1 + diff --git a/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch b/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch new file mode 100644 index 000000000..04e37b323 --- /dev/null +++ b/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch @@ -0,0 +1,61 @@ +From 906f4690d1f3cadca260b23fd1c839d12db9e629 Mon Sep 17 00:00:00 2001 +From: wencyu +Date: Fri, 18 Jun 2021 11:43:07 +0800 +Subject: [PATCH 3/4] [for debug only] enable dpdk eal memory debug + +The patch is used for memory debug. To use the patch, configure meson with option +-Dc_args="-DRTE_MALLOC_DEBUG" when building dpdk. For example, + +meson -Dc_args="-DRTE_MALLOC_DEBUG" -Dbuildtype=debug -Dprefix=$(pwd)/dpdklib dpdkbuild +ninja -C dpdkbuild + +Signed-off-by: wencyu +--- + lib/librte_eal/common/rte_malloc.c | 4 ++++ + lib/librte_eal/include/rte_malloc.h | 15 +++++++++++++++ + 2 files changed, 19 insertions(+) + +diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c +index 9d39e58..2b6d1ab 100644 +--- a/lib/librte_eal/common/rte_malloc.c ++++ b/lib/librte_eal/common/rte_malloc.c +@@ -30,6 +30,10 @@ + #include "eal_memcfg.h" + #include "eal_private.h" + ++int rte_memmory_ok(void *addr) ++{ ++ return malloc_elem_cookies_ok(RTE_PTR_SUB(addr, MALLOC_ELEM_HEADER_LEN)); ++} + + /* Free the memory space back to heap */ + static void +diff --git a/lib/librte_eal/include/rte_malloc.h b/lib/librte_eal/include/rte_malloc.h +index 3af64f8..671e4f2 100644 +--- a/lib/librte_eal/include/rte_malloc.h ++++ b/lib/librte_eal/include/rte_malloc.h +@@ -248,6 +248,21 @@ struct rte_malloc_socket_stats { + __rte_alloc_size(2, 3); + + /** ++ * Check the header/tailer cookies of memory pointed to by the provided pointer. ++ * ++ * This pointer must have been returned by a previous call to ++ * rte_malloc(), rte_zmalloc(), rte_calloc() or rte_realloc(). ++ * ++ * @param ptr ++ * The pointer to memory to be checked. ++ * @return ++ * - true if the header/tailer cookies are OK. ++ * - Otherwise, false. ++ */ ++int ++rte_memmory_ok(void *ptr); ++ ++/** + * Frees the memory space pointed to by the provided pointer. + * + * This pointer must have been returned by a previous call to +-- +1.8.3.1 + diff --git a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch new file mode 100644 index 000000000..96e21e69a --- /dev/null +++ b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch @@ -0,0 +1,150 @@ +From 83ba9cf5e6eb111f8becc1e9c05301fabb40e16b Mon Sep 17 00:00:00 2001 +From: wencyu +Date: Fri, 18 Jun 2021 14:00:24 +0800 +Subject: [PATCH 4/4] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs + +1. Ignore fdir flow rule priority attribute. +2. Use different fdir soft-id for flow rules configured for the same queue. +3. Disable fdir mask settings by rte_flow. +4. Allow IPv6 to pass flow rule ETH item validation. + +Signed-off-by: wencyu +--- + drivers/net/ixgbe/ixgbe_flow.c | 62 ++++++++++++++++++++++++++++++++++-------- + 1 file changed, 51 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c +index 9aeb2e4..97d5ca0 100644 +--- a/drivers/net/ixgbe/ixgbe_flow.c ++++ b/drivers/net/ixgbe/ixgbe_flow.c +@@ -1419,11 +1419,8 @@ const struct rte_flow_action *next_no_void_action( + + /* not supported */ + if (attr->priority) { +- memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); +- rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, +- attr, "Not support priority."); +- return -rte_errno; ++ PMD_DRV_LOG(WARNING, "ixgbe flow doesn't support priority %d " ++ "(priority must be 0), ignore and continue....\n", attr->priority); + } + + /* check if the first not void action is QUEUE or DROP. */ +@@ -1642,7 +1639,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + * value. So, we need not do anything for the not provided fields later. + */ + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); +- memset(&rule->mask, 0xFF, sizeof(struct ixgbe_hw_fdir_mask)); ++ memset(&rule->mask, 0, sizeof(struct ixgbe_hw_fdir_mask)); /* mask default zero */ + rule->mask.vlan_tci_mask = 0; + rule->mask.flex_bytes_mask = 0; + +@@ -1760,6 +1757,8 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + } + } else { + if (item->type != RTE_FLOW_ITEM_TYPE_IPV4 && ++ /* Signature mode supports IPv6. */ ++ item->type != RTE_FLOW_ITEM_TYPE_IPV6 && + item->type != RTE_FLOW_ITEM_TYPE_VLAN) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, +@@ -1888,6 +1887,9 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + rule->ixgbe_fdir.formatted.flow_type = + IXGBE_ATR_FLOW_TYPE_IPV6; + ++ /* Update flow rule mode by global param. */ ++ rule->mode = dev->data->dev_conf.fdir_conf.mode; ++ + /** + * 1. must signature match + * 2. not support last +@@ -2748,12 +2750,45 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + return ixgbe_parse_fdir_act_attr(attr, actions, rule, error); + } + ++static inline int ++ixgbe_fdir_rule_patch(struct rte_eth_dev *dev, struct ixgbe_fdir_rule *rule) ++{ ++ static uint32_t softid[IXGBE_MAX_RX_QUEUE_NUM] = { 0 }; ++ ++ if (!rule) ++ return 0; ++ ++ if (!dev || !dev->data) ++ return -EINVAL; ++ if (rule->queue >= IXGBE_MAX_RX_QUEUE_NUM) ++ return -EINVAL; ++ ++ /* Soft-id for different rx-queue should be different. */ ++ rule->soft_id = softid[rule->queue]++; ++ ++ /* Disable mask config from rte_flow. ++ * FIXME: ++ * Ixgbe only supports one global mask, all the masks should be the same. ++ * Generally, fdir masks should be configured globally before port start. ++ * But the rte_flow configures masks at flow creation. So we disable fdir ++ * mask configs in rte_flow and configure it globally when port start. ++ * Refer to `ixgbe_dev_start/ixgbe_fdir_configure` for details. The global ++ * masks are configured into device initially with user specified params. ++ */ ++ rule->b_mask = 0; ++ ++ /* Use user-defined mode. */ ++ rule->mode = dev->data->dev_conf.fdir_conf.mode; ++ ++ return 0; ++} ++ + static int + ixgbe_parse_fdir_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], +- struct ixgbe_fdir_rule *rule, ++ struct ixgbe_fdir_rule *rule, bool b_patch, + struct rte_flow_error *error) + { + int ret; +@@ -2787,13 +2822,18 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + rule->ixgbe_fdir.formatted.dst_port != 0)) + return -ENOTSUP; + +- if (fdir_mode == RTE_FDIR_MODE_NONE || +- fdir_mode != rule->mode) ++ if (fdir_mode == RTE_FDIR_MODE_NONE) + return -ENOTSUP; + + if (rule->queue >= dev->data->nb_rx_queues) + return -ENOTSUP; + ++ if (ret) ++ return ret; ++ ++ if (b_patch) ++ return ixgbe_fdir_rule_patch(dev, rule); ++ + return ret; + } + +@@ -3128,7 +3168,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + + memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); + ret = ixgbe_parse_fdir_filter(dev, attr, pattern, +- actions, &fdir_rule, error); ++ actions, &fdir_rule, true, error); + if (!ret) { + /* A mask cannot be deleted. */ + if (fdir_rule.b_mask) { +@@ -3299,7 +3339,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) + + memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); + ret = ixgbe_parse_fdir_filter(dev, attr, pattern, +- actions, &fdir_rule, error); ++ actions, &fdir_rule, false, error); + if (!ret) + return 0; + +-- +1.8.3.1 + From 4cb29138877c7fbb5cd65d7b4f475c0d49f1df3f Mon Sep 17 00:00:00 2001 From: ywc689 Date: Fri, 18 Jun 2021 18:54:29 +0800 Subject: [PATCH 18/41] script: add helper script to facilitate dpdk build. Signed-off-by: ywc689 --- scripts/dpdk-build.sh | 110 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100755 scripts/dpdk-build.sh diff --git a/scripts/dpdk-build.sh b/scripts/dpdk-build.sh new file mode 100755 index 000000000..9e7741292 --- /dev/null +++ b/scripts/dpdk-build.sh @@ -0,0 +1,110 @@ +#!/bin/env bash +############################################################################ +# usage: $0 [-v dpdk-version] [-d] [-w work-directory] [-p patch-directory] + +build_options="-Denable_kmods=true" +debug_options="-Dbuildtype=debug -Dc_args=-DRTE_MALLOC_DEBUG" + +dpdkver=20.11.1 # default dpdk version (use stable version) +tarball=dpdk-${dpdkver}.tar.xz +srcdir=dpdk-stable-$dpdkver + +workdir=$(pwd)/dpdk # default work directory +patchdir=$(pwd)/patch/dpdk-stable-$dpdkver # default dpdk patch directory + + +function help() +{ + echo -e "\033[31musage: $0 [-d] [-w work-directory] [-p patch-directory]\033[0m" + echo -e "\033[31mOPTIONS:\033[0m" + echo -e "\033[31m -v specify the dpdk version, default $dpdkver\033[0m" + echo -e "\033[31m -d build dpdk libary with debug info\033[0m" + echo -e "\033[31m -w specify the work directory prefix, default $(pwd)\033[0m" + echo -e "\033[31m -p specify the dpdk patch directory, default $(pwd)/patch/dpdk-stable-$dpdkver\033[0m" +} + +function getfullpath() +{ + local dir=$(dirname $1) + local base=$(basename $1) + if test -d ${dir}; then + pushd ${dir} >/dev/null 2>&1 + echo ${PWD}/${base} + popd >/dev/null 2>&1 + return 0 + fi + return 1 +} + +function set_work_directory() +{ + [ ! -d $1 ] && return 1 + workdir=$(getfullpath $1)/dpdk +} + +function set_patch_directory() +{ + [ ! -d $1 ] && return 1 + patchdir=$(getfullpath $1) +} + +## parse args +while getopts "hw:p:dv:" OPT; do + case $OPT in + v) dpdkver=$OPTARG;; + w) set_work_directory $OPTARG ;; + p) set_patch_directory $OPTARG;; + d) build_options="${build_options} ${debug_options}";; + ?) help && exit 1;; + esac +done + +[ ! -d $workdir ] && mkdir $workdir +echo -e "\033[32mwork directory: $workdir\033[0m" + +[ ! -d $patchdir ] && echo -e "\033[31mdpdk patch file directory doesn't exist: $patchdir\033[0m" && exit 1 +echo -e "\033[32mdpdk patch directory: $patchdir\033[0m" + +echo -e "\033[32mbuild options: $build_options\033[0m" + +## prepare dpdk sources +cd $workdir +if [ ! -f $tarball ]; then + wget https://fast.dpdk.org/rel/$tarball -P $workdir + [ ! -f $tarball ] && echo -e "\033[31mfail to download $tarball\033[0m" && exit 1 +fi + +[ -d $workdir/$srcdir ] && echo -e "\033[33mremoving old source directory: $workdir/$srcdir\033[0m" && rm -rf $workdir/$srcdir +tar xf $tarball -C $workdir +echo "$(pwd), $workdir, $srcdir" +[ ! -d $workdir/$srcdir ] && echo -e "\033[31m$workdir/$srcdir directory is missing\033[0m" && exit 1 + +## patch dpdk +for patchfile in $(ls $patchdir) +do + patch -p1 -d $workdir/$srcdir < $patchdir/$patchfile + [ $? -ne 0 ] && echo -e "\033[31mfail to patch: $patchfile\033[0m" && exit 1 + echo -e "\033[32msucceed to patch: $patchfile\033[0m" +done + +## build dpdk and install +[ -d dpdkbuild ] && rm -rf dpdkbuild/* || mkdir dpdkbuild +[ -d dpdklib ] && rm -rf dpdklib/* || mkdir dpdklib + +meson $build_options -Dprefix=$(pwd)/dpdklib $srcdir dpdkbuild + +ninja -C dpdkbuild +[ $? -ne 0 ] && echo -e "\033[31mfail to build dpdk\033[0m" && exit 1 +ninja -C dpdkbuild install +[ $? -ne 0 ] && echo -e "\033[31mfail to install dpdk\033[0m" && exit 1 + +kni=dpdkbuild/kernel/linux/kni/rte_kni.ko +[ -f $kni ] && install -m 644 $kni dpdklib + +echo -e "DPDK library installed successfully into directory: \033[32m$(pwd)/dpdklib\033[0m" + +## export dpdk lib +echo -e "You can use this library in dpvs by running the command below:" +echo -e "\033[32m" +echo -e "export PKG_CONFIG_PATH=$(pwd)/dpdklib/lib64/pkgconfig" +echo -e "\033[0m" From 39760a14551781e7f91ee0a94ce0343c49c7d223 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Mon, 21 Jun 2021 10:24:24 +0800 Subject: [PATCH 19/41] patch: remove patches of old dpdk versions Signed-off-by: ywc689 --- ...link-event-for-multicast-driver-part.patch | 108 -- ...iable-IP-header-len-for-checksum-API.patch | 48 - .../0003-pdump-support-filter.patch | 926 ------------------ ....2-to-support-rh75-provided-by-Jason.patch | 48 - ...link-event-for-multicast-driver-part.patch | 108 -- ...iable-IP-header-len-for-checksum-API.patch | 56 -- ...-and-change-dpdk-pdump-tool-for-DPVS.patch | 920 ----------------- .../enable-dpdk-eal-memory-debug.patch | 68 -- ...add-debug-log-for-ixgbe-fdir-setting.patch | 53 - ...link-event-for-multicast-driver-part.patch | 124 --- ...2-Patch-ixgbe-fdir-rte_flow-for-DPVS.patch | 147 --- ...iable-IP-header-len-for-checksum-API.patch | 56 -- ...e-flow_item-type-comparsion-in-flow_.patch | 33 - ...imental-attribute-of-rte_memseg_walk.patch | 42 - ...-and-change-dpdk-pdump-tool-for-dpvs.patch | 581 ----------- .../0006-enable-dpdk-eal-memory-debug.patch | 68 -- ...ode-4-problem-caused-by-LACP-failure.patch | 62 -- 17 files changed, 3448 deletions(-) delete mode 100644 patch/dpdk-stable-17.11.2/0001-kni-use-netlink-event-for-multicast-driver-part.patch delete mode 100644 patch/dpdk-stable-17.11.2/0002-net-support-variable-IP-header-len-for-checksum-API.patch delete mode 100644 patch/dpdk-stable-17.11.2/0003-pdump-support-filter.patch delete mode 100644 patch/dpdk-stable-17.11.2/0004-patch-dpdk-17.11.2-to-support-rh75-provided-by-Jason.patch delete mode 100644 patch/dpdk-stable-17.11.6/0001-kni-use-netlink-event-for-multicast-driver-part.patch delete mode 100644 patch/dpdk-stable-17.11.6/0002-net-support-variable-IP-header-len-for-checksum-API.patch delete mode 100644 patch/dpdk-stable-17.11.6/0003-pdump-enable-and-change-dpdk-pdump-tool-for-DPVS.patch delete mode 100644 patch/dpdk-stable-17.11.6/enable-dpdk-eal-memory-debug.patch delete mode 100644 patch/dpdk-stable-18.11.2/0001-add-debug-log-for-ixgbe-fdir-setting.patch delete mode 100644 patch/dpdk-stable-18.11.2/0001-kni-use-netlink-event-for-multicast-driver-part.patch delete mode 100644 patch/dpdk-stable-18.11.2/0002-Patch-ixgbe-fdir-rte_flow-for-DPVS.patch delete mode 100644 patch/dpdk-stable-18.11.2/0002-net-support-variable-IP-header-len-for-checksum-API.patch delete mode 100644 patch/dpdk-stable-18.11.2/0003-driver-kni-enable-flow_item-type-comparsion-in-flow_.patch delete mode 100644 patch/dpdk-stable-18.11.2/0004-rm-rte_experimental-attribute-of-rte_memseg_walk.patch delete mode 100644 patch/dpdk-stable-18.11.2/0005-enable-pdump-and-change-dpdk-pdump-tool-for-dpvs.patch delete mode 100644 patch/dpdk-stable-18.11.2/0006-enable-dpdk-eal-memory-debug.patch delete mode 100644 patch/dpdk-stable-18.11.2/0007-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch diff --git a/patch/dpdk-stable-17.11.2/0001-kni-use-netlink-event-for-multicast-driver-part.patch b/patch/dpdk-stable-17.11.2/0001-kni-use-netlink-event-for-multicast-driver-part.patch deleted file mode 100644 index 05b10eba2..000000000 --- a/patch/dpdk-stable-17.11.2/0001-kni-use-netlink-event-for-multicast-driver-part.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 52f4389c80b4b41386c53daf16d860305252f325 Mon Sep 17 00:00:00 2001 -From: Lei Chen -Date: Tue, 23 Jan 2018 12:39:56 +0800 -Subject: [PATCH 1/4] kni: use netlink event for multicast (driver part). - -kni driver send netlink event every time hw-multicast list updated by -kernel, the user kni app should capture the event and update multicast -to kni device. - -original way is using rte_kni_request to pass hw-multicast to user kni -module. that method works but finally memory corruption found, which is -not easy to address. ---- - lib/librte_eal/linuxapp/kni/kni_net.c | 68 +++++++++++++++++++++++++++++++++++ - 1 file changed, 68 insertions(+) - -diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c -index db9f489..fab94d1 100644 ---- a/lib/librte_eal/linuxapp/kni/kni_net.c -+++ b/lib/librte_eal/linuxapp/kni/kni_net.c -@@ -35,6 +35,8 @@ - #include - #include - #include -+#include -+#include - - #include - #include -@@ -579,9 +581,75 @@ - return 0; - } - -+static size_t -+kni_nlmsg_size(void) -+{ -+ return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) -+ + nla_total_size(4) /* IFA_ADDRESS */ -+ + nla_total_size(4) /* IFA_LOCAL */ -+ + nla_total_size(4) /* IFA_BROADCAST */ -+ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ -+ + nla_total_size(4) /* IFA_FLAGS */ -+ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ -+} -+ - static void - kni_net_set_rx_mode(struct net_device *dev) - { -+ /* -+ * send event to notify user (DPDK KNI app) that multicast list changed, -+ * so that it can monitor multicast join/leave and set HW mc-addrs to -+ * kni dev accordinglly. -+ * -+ * this event is just an notification, we do not save any mc-addr here -+ * (so attribute space for us). user kni app should get maddrs after -+ * receive this notification. -+ * -+ * I was expecting kernel send some rtnl event for multicast join/leave, -+ * but it doesn't. By checking the call-chain of SIOCADDMULTI (ip maddr, -+ * manages only hardware multicast) and IP_ADD_MEMBERSHIP (ip_mc_join_group, -+ * used to for IPv4 multicast), no rtnl event sent. -+ * -+ * so as workaround, modify kni driver here to send RTM_NEWADDR. -+ * it may not suitalbe to use this event for mcast, but that should works. -+ * hope that won't affect other listener to this event. -+ * -+ * previous solution was using rte_kni_request to pass hw-maddr list to user. -+ * it "works" for times but finally memory corruption found, which is -+ * not easy to address (lock was added and reviewed). That's why we use -+ * netlink event instead. -+ */ -+ struct sk_buff *skb; -+ struct net *net = dev_net(dev); -+ struct nlmsghdr *nlh; -+ struct ifaddrmsg *ifm; -+ -+ skb = nlmsg_new(kni_nlmsg_size(), GFP_ATOMIC); -+ if (!skb) -+ return; -+ -+ /* no other event for us ? */ -+ nlh = nlmsg_put(skb, 0, 0, RTM_NEWADDR, sizeof(*ifm), 0); -+ if (!nlh) { -+ kfree_skb(skb); -+ return; -+ } -+ -+ /* just send an notification so no other info */ -+ ifm = nlmsg_data(nlh); -+ memset(ifm, 0, sizeof(*ifm)); -+ ifm->ifa_family = AF_UNSPEC; -+ ifm->ifa_prefixlen = 0; -+ ifm->ifa_flags = 0; -+ ifm->ifa_scope = RT_SCOPE_NOWHERE; -+ ifm->ifa_index = 0; -+ -+ nlmsg_end(skb, nlh); -+ -+ /* other group ? */ -+ pr_debug("%s: rx-mode/multicast-list changed\n", __func__); -+ rtnl_notify(skb, net, 0, RTNLGRP_NOTIFY, NULL, GFP_ATOMIC); -+ return; - } - - static int --- -1.8.3.1 - diff --git a/patch/dpdk-stable-17.11.2/0002-net-support-variable-IP-header-len-for-checksum-API.patch b/patch/dpdk-stable-17.11.2/0002-net-support-variable-IP-header-len-for-checksum-API.patch deleted file mode 100644 index 96dd76ea8..000000000 --- a/patch/dpdk-stable-17.11.2/0002-net-support-variable-IP-header-len-for-checksum-API.patch +++ /dev/null @@ -1,48 +0,0 @@ -From a949f95267849630a750f1e72ee468d58b806589 Mon Sep 17 00:00:00 2001 -From: Lei Chen -Date: Tue, 6 Mar 2018 16:04:36 +0800 -Subject: [PATCH 2/4] net: support variable IP header len for checksum API. - -IPv4 checksum APIs use fixe IP header length, it will failed if there is -any IP option. Now calculating header length by "ihl" field, so that we -can support options. - -Signed-off-by: Lei Chen ---- - lib/librte_net/rte_ip.h | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h -index 73ec398..e03f707 100644 ---- a/lib/librte_net/rte_ip.h -+++ b/lib/librte_net/rte_ip.h -@@ -314,7 +314,7 @@ struct ipv4_hdr { - rte_ipv4_cksum(const struct ipv4_hdr *ipv4_hdr) - { - uint16_t cksum; -- cksum = rte_raw_cksum(ipv4_hdr, sizeof(struct ipv4_hdr)); -+ cksum = rte_raw_cksum(ipv4_hdr, (ipv4_hdr->version_ihl & 0xf) * 4); - return (cksum == 0xffff) ? cksum : ~cksum; - } - -@@ -356,7 +356,7 @@ struct ipv4_hdr { - } else { - psd_hdr.len = rte_cpu_to_be_16( - (uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) -- - sizeof(struct ipv4_hdr))); -+ - (ipv4_hdr->version_ihl & 0xf) * 4)); - } - return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr)); - } -@@ -381,7 +381,7 @@ struct ipv4_hdr { - uint32_t l4_len; - - l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) - -- sizeof(struct ipv4_hdr); -+ (ipv4_hdr->version_ihl & 0xf) * 4; - - cksum = rte_raw_cksum(l4_hdr, l4_len); - cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0); --- -1.8.3.1 - diff --git a/patch/dpdk-stable-17.11.2/0003-pdump-support-filter.patch b/patch/dpdk-stable-17.11.2/0003-pdump-support-filter.patch deleted file mode 100644 index 0a4ed7e80..000000000 --- a/patch/dpdk-stable-17.11.2/0003-pdump-support-filter.patch +++ /dev/null @@ -1,926 +0,0 @@ -diff -uparN /home/weiyanhua/dpvs/dpdk-stable-17.11.2/app/pdump/main.c dpdk-stable-17.11.2/app/pdump/main.c ---- /home/weiyanhua/dpvs/dpdk-stable-17.11.2/app/pdump/main.c 2018-04-19 22:09:22.000000000 +0800 -+++ dpdk-stable-17.11.2/app/pdump/main.c 2019-10-17 18:57:38.979759023 +0800 -@@ -54,6 +54,7 @@ - #include - #include - #include -+#include - - #define CMD_LINE_OPT_PDUMP "pdump" - #define PDUMP_PORT_ARG "port" -@@ -65,6 +66,13 @@ - #define PDUMP_RING_SIZE_ARG "ring-size" - #define PDUMP_MSIZE_ARG "mbuf-size" - #define PDUMP_NUM_MBUFS_ARG "total-num-mbufs" -+#define PDUMP_HOST_ARG "host" -+#define PDUMP_SRC_ARG "src-host" -+#define PDUMP_DST_ARG "dst-host" -+#define PDUMP_PROTO_PORT_AGE "proto-port" -+#define PDUMP_SPORT_ARG "src-port" -+#define PDUMP_DPORT_ARG "dst-port" -+#define PDUMP_PROTO_ARG "proto" - #define CMD_LINE_OPT_SER_SOCK_PATH "server-socket-path" - #define CMD_LINE_OPT_CLI_SOCK_PATH "client-socket-path" - -@@ -120,6 +128,13 @@ const char *valid_pdump_arguments[] = { - PDUMP_RING_SIZE_ARG, - PDUMP_MSIZE_ARG, - PDUMP_NUM_MBUFS_ARG, -+ PDUMP_HOST_ARG, -+ PDUMP_SRC_ARG, -+ PDUMP_DST_ARG, -+ PDUMP_PROTO_PORT_AGE, -+ PDUMP_SPORT_ARG, -+ PDUMP_DPORT_ARG, -+ PDUMP_PROTO_ARG, - NULL - }; - -@@ -153,6 +168,7 @@ struct pdump_tuples { - enum pcap_stream rx_vdev_stream_type; - enum pcap_stream tx_vdev_stream_type; - bool single_pdump_dev; -+ struct pdump_filter *filter; - - /* stats */ - struct pdump_stats stats; -@@ -180,6 +196,11 @@ pdump_usage(const char *prgname) - "(queue=)," - "(rx-dev= |" - " tx-dev=," -+ "[host= | src-host= |" -+ "dst-host=]," -+ "[proto=support:tcp/udp/icmp]," -+ "[proto-port= |src-port= |" -+ "dst-port=]," - "[ring-size=default:16384]," - "[mbuf-size=default:2176]," - "[total-num-mbufs=default:65535]'\n" -@@ -270,6 +291,64 @@ parse_uint_value(const char *key, const - } - - static int -+parse_host(const char *key __rte_unused, const char *value, void *extra_args) -+{ -+ struct pdump_tuples *pt = extra_args; -+ struct in_addr inaddr; -+ struct in6_addr inaddr6; -+ union addr addr; -+ int af = 0; -+ -+ if (inet_pton(AF_INET6, value, &inaddr6) > 0) { -+ af = AF_INET6; -+ addr.in6 = inaddr6; -+ } else if (inet_pton(AF_INET, value, &inaddr) > 0){ -+ af = AF_INET; -+ addr.in = inaddr; -+ } else { -+ printf("IP address invaled\n"); -+ return -EINVAL; -+ } -+ -+ if (pt->filter && pt->filter->af != 0 && af != pt->filter->af) { -+ printf("IPv4 and IPv6 conflict\n"); -+ return -EINVAL; -+ } else { -+ pt->filter->af = af; -+ } -+ -+ if (!strcmp(key, PDUMP_HOST_ARG)) { -+ rte_memcpy(&pt->filter->host_addr, &addr, sizeof(addr)); -+ } else if (!strcmp(key, PDUMP_SRC_ARG)) { -+ rte_memcpy(&pt->filter->s_addr, &addr, sizeof(addr)); -+ } else if (!strcmp(key, PDUMP_DST_ARG)) { -+ rte_memcpy(&pt->filter->d_addr, &addr, sizeof(addr)); -+ } -+ -+ return 0; -+} -+ -+static int -+parse_proto(const char *key __rte_unused, const char *value, void *extra_args) -+{ -+ struct pdump_tuples *pt = extra_args; -+ -+ if (!strcmp(value, "tcp")) { -+ pt->filter->proto = IPPROTO_TCP; -+ } else if (!strcmp(value, "udp")) { -+ pt->filter->proto = IPPROTO_UDP; -+ } else if (!strcmp(value, "icmp")) { -+ pt->filter->proto = IPPROTO_ICMP; -+ } else { -+ printf("invalid value:\"%s\" for key:\"%s\", " -+ "value must be tcp/udp/icmp\n", value, key); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static int - parse_pdump(const char *optarg) - { - struct rte_kvargs *kvlist; -@@ -396,6 +475,75 @@ parse_pdump(const char *optarg) - } else - pt->total_num_mbufs = MBUFS_PER_POOL; - -+ /* filter parsing and validation */ -+ pt->filter = rte_zmalloc("pdump_filter", -+ sizeof(struct pdump_filter), 0); -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_HOST_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_HOST_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_SRC_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_SRC_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_DST_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_DST_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_PORT_AGE); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_PORT_AGE, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->proto_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_SPORT_ARG); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_SPORT_ARG, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->s_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_DPORT_ARG); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_DPORT_ARG, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->d_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_ARG, -+ &parse_proto, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ - num_tuples++; - - free_kvlist: -@@ -540,6 +688,8 @@ cleanup_rings(void) - rte_ring_free(pt->rx_ring); - if (pt->tx_ring) - rte_ring_free(pt->tx_ring); -+ if (pt->filter) -+ rte_free(pt->filter); - } - } - -@@ -583,11 +733,10 @@ configure_vdev(uint16_t port_id) - { - struct ether_addr addr; - const uint16_t rxRings = 0, txRings = 1; -- const uint8_t nb_ports = rte_eth_dev_count(); - int ret; - uint16_t q; - -- if (port_id > nb_ports) -+ if (!rte_eth_dev_is_valid_port(port_id)) - return -1; - - ret = rte_eth_dev_configure(port_id, rxRings, txRings, -@@ -799,20 +948,20 @@ enable_pdump(void) - pt->queue, - RTE_PDUMP_FLAG_RX, - pt->rx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - ret1 = rte_pdump_enable_by_deviceid( - pt->device_id, - pt->queue, - RTE_PDUMP_FLAG_TX, - pt->tx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - } else if (pt->dump_by_type == PORT_ID) { - ret = rte_pdump_enable(pt->port, pt->queue, - RTE_PDUMP_FLAG_RX, -- pt->rx_ring, pt->mp, NULL); -+ pt->rx_ring, pt->mp, pt->filter); - ret1 = rte_pdump_enable(pt->port, pt->queue, - RTE_PDUMP_FLAG_TX, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - } - } else if (pt->dir == RTE_PDUMP_FLAG_RX) { - if (pt->dump_by_type == DEVICE_ID) -@@ -820,22 +969,22 @@ enable_pdump(void) - pt->device_id, - pt->queue, - pt->dir, pt->rx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - else if (pt->dump_by_type == PORT_ID) - ret = rte_pdump_enable(pt->port, pt->queue, - pt->dir, -- pt->rx_ring, pt->mp, NULL); -+ pt->rx_ring, pt->mp, pt->filter); - } else if (pt->dir == RTE_PDUMP_FLAG_TX) { - if (pt->dump_by_type == DEVICE_ID) - ret = rte_pdump_enable_by_deviceid( - pt->device_id, - pt->queue, - pt->dir, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - else if (pt->dump_by_type == PORT_ID) - ret = rte_pdump_enable(pt->port, pt->queue, - pt->dir, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - } - if (ret < 0 || ret1 < 0) { - cleanup_pdump_resources(); -diff -uparN /home/weiyanhua/dpvs/dpdk-stable-17.11.2/app/pdump/Makefile dpdk-stable-17.11.2/app/pdump/Makefile ---- /home/weiyanhua/dpvs/dpdk-stable-17.11.2/app/pdump/Makefile 2018-04-19 22:09:22.000000000 +0800 -+++ dpdk-stable-17.11.2/app/pdump/Makefile 2019-10-16 20:21:23.939178027 +0800 -@@ -41,6 +41,6 @@ CFLAGS += $(WERROR_FLAGS) - - SRCS-y := main.c - --include $(RTE_SDK)/mk/rte.app.mk -+include $(RTE_SDK)/mk/rte.pdump.mk - - endif -diff -uparN /home/weiyanhua/dpvs/dpdk-stable-17.11.2/config/common_base dpdk-stable-17.11.2/config/common_base ---- /home/weiyanhua/dpvs/dpdk-stable-17.11.2/config/common_base 2018-04-19 22:09:22.000000000 +0800 -+++ dpdk-stable-17.11.2/config/common_base 2019-10-16 20:21:23.940178039 +0800 -@@ -397,7 +397,7 @@ CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16 - # - # Compile software PMD backed by PCAP files - # --CONFIG_RTE_LIBRTE_PMD_PCAP=n -+CONFIG_RTE_LIBRTE_PMD_PCAP=y - - # - # Compile link bonding PMD library -diff -uparN /home/weiyanhua/dpvs/dpdk-stable-17.11.2/lib/librte_pdump/rte_pdump.c dpdk-stable-17.11.2/lib/librte_pdump/rte_pdump.c ---- /home/weiyanhua/dpvs/dpdk-stable-17.11.2/lib/librte_pdump/rte_pdump.c 2018-04-19 22:09:22.000000000 +0800 -+++ dpdk-stable-17.11.2/lib/librte_pdump/rte_pdump.c 2019-10-17 18:07:28.821435563 +0800 -@@ -46,6 +46,10 @@ - #include - #include - #include -+#include -+#include -+#include -+#include - - #include "rte_pdump.h" - -@@ -177,6 +181,132 @@ pdump_pktmbuf_copy(struct rte_mbuf *m, s - return m_dup; - } - -+static bool -+inet_addr_equal(int af, const union addr *a1, -+ const union addr *a2) -+{ -+ switch (af) { -+ case AF_INET: -+ return a1->in.s_addr == a2->in.s_addr; -+ case AF_INET6: -+ return memcmp(a1->in6.s6_addr, a2->in6.s6_addr, 16) == 0; -+ default: -+ return memcmp(a1, a2, sizeof(union addr)) == 0; -+ } -+} -+ -+static bool -+inet_is_addr_any(int af, const union addr *addr) -+{ -+ switch (af) { -+ case AF_INET: -+ return addr->in.s_addr == htonl(INADDR_ANY); -+ case AF_INET6: -+ return IN6_ARE_ADDR_EQUAL(&addr->in6, &in6addr_any); -+ default: -+ return false; -+ } -+} -+ -+/* support vlan/arp/ipv4/ipv6 */ -+static int -+pdump_filter(struct rte_mbuf *m, struct pdump_filter *filter) -+{ -+ struct ether_hdr *eth_hdr; -+ struct vlan_eth_hdr *vlan_eth_hdr; -+ union addr s_addr, d_addr; -+ int prepend = 0; -+ uint16_t type = 0; -+ uint16_t iph_len = 0; -+ uint8_t proto = 0; -+ -+ int af; -+ -+ if (filter->af == 0 && filter->s_port == 0 && -+ filter->d_port == 0 && filter->proto == 0) -+ return 0; -+ -+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); -+ -+ if (eth_hdr->ether_type == htons(ETH_P_8021Q)) { -+ prepend += sizeof(struct vlan_eth_hdr); -+ vlan_eth_hdr = rte_pktmbuf_mtod(m, struct vlan_eth_hdr *); -+ type = vlan_eth_hdr->h_vlan_encapsulated_proto; -+ } else { -+ prepend += sizeof(struct ether_hdr); -+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); -+ type = eth_hdr->ether_type; -+ } -+ -+ if (rte_pktmbuf_adj(m, prepend) == NULL) -+ goto prepend; -+ -+ if (type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) { -+ struct arp_hdr *arp = rte_pktmbuf_mtod(m, struct arp_hdr *); -+ af = AF_INET; -+ s_addr.in.s_addr = arp->arp_data.arp_sip; -+ d_addr.in.s_addr = arp->arp_data.arp_tip; -+ } else if (type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { -+ struct ipv4_hdr *ip4 = rte_pktmbuf_mtod(m, struct ipv4_hdr *); -+ af = AF_INET; -+ s_addr.in.s_addr = ip4->src_addr; -+ d_addr.in.s_addr = ip4->dst_addr; -+ proto = ip4->next_proto_id; -+ iph_len = (ip4->version_ihl & 0xf) << 2; -+ } else if (type == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) { -+ struct ipv6_hdr *ip6 = rte_pktmbuf_mtod(m, struct ipv6_hdr *); -+ af = AF_INET6; -+ rte_memcpy(&s_addr.in6, &ip6->src_addr, 16); -+ rte_memcpy(&d_addr.in6, &ip6->dst_addr, 16); -+ proto = ip6->proto; -+ iph_len = sizeof(struct ipv6_hdr); -+ } else { -+ goto prepend; -+ } -+ -+ /*filter*/ -+ if (!inet_is_addr_any(af, &filter->s_addr) && -+ !inet_addr_equal(af, &filter->s_addr, &s_addr)) -+ goto prepend; -+ if (!inet_is_addr_any(af, &filter->d_addr) && -+ !inet_addr_equal(af, &filter->d_addr, &d_addr)) -+ goto prepend; -+ if (!inet_is_addr_any(af, &filter->host_addr) && -+ !inet_addr_equal(af, &filter->host_addr, &s_addr) && -+ !inet_addr_equal(af, &filter->host_addr, &d_addr)) -+ goto prepend; -+ -+ if (filter->proto && filter->proto != proto) -+ goto prepend; -+ -+ if (filter->s_port || filter->d_port) { -+ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) -+ goto prepend; -+ struct udp_hdr _uh; -+ const struct udp_hdr *uh; -+ uh = rte_pktmbuf_read(m, iph_len, sizeof(_uh), &_uh); -+ if (uh == NULL) -+ goto prepend; -+ if (filter->s_port && filter->s_port != rte_cpu_to_be_16(uh->src_port)) -+ goto prepend; -+ -+ if (filter->d_port && filter->d_port != rte_cpu_to_be_16(uh->dst_port)) -+ goto prepend; -+ -+ if (filter->proto_port && -+ filter->proto_port != rte_cpu_to_be_16(uh->src_port) && -+ filter->proto_port != rte_cpu_to_be_16(uh->dst_port)) -+ goto prepend; -+ } -+ -+ rte_pktmbuf_prepend(m, prepend); -+ return 0; -+ -+prepend: -+ rte_pktmbuf_prepend(m, prepend); -+ return -1; -+} -+ - static inline void - pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params) - { -@@ -193,6 +323,8 @@ pdump_copy(struct rte_mbuf **pkts, uint1 - ring = cbs->ring; - mp = cbs->mp; - for (i = 0; i < nb_pkts; i++) { -+ if (pdump_filter(pkts[i], cbs->filter) != 0) -+ continue; - p = pdump_pktmbuf_copy(pkts[i], mp); - if (p) - dup_bufs[d_pkts++] = p; -@@ -229,7 +361,7 @@ pdump_tx(uint16_t port __rte_unused, uin - static int - pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, - struct rte_ring *ring, struct rte_mempool *mp, -- uint16_t operation) -+ struct pdump_filter *filter, uint16_t operation) - { - uint16_t qid; - struct pdump_rxtx_cbs *cbs = NULL; -@@ -247,6 +379,7 @@ pdump_register_rx_callbacks(uint16_t end - } - cbs->ring = ring; - cbs->mp = mp; -+ cbs->filter = filter; - cbs->cb = rte_eth_add_first_rx_callback(port, qid, - pdump_rx, cbs); - if (cbs->cb == NULL) { -@@ -283,7 +416,7 @@ pdump_register_rx_callbacks(uint16_t end - static int - pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, - struct rte_ring *ring, struct rte_mempool *mp, -- uint16_t operation) -+ struct pdump_filter *filter, uint16_t operation) - { - - uint16_t qid; -@@ -302,6 +435,7 @@ pdump_register_tx_callbacks(uint16_t end - } - cbs->ring = ring; - cbs->mp = mp; -+ cbs->filter = filter; - cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx, - cbs); - if (cbs->cb == NULL) { -@@ -345,6 +479,7 @@ set_pdump_rxtx_cbs(struct pdump_request - uint16_t operation; - struct rte_ring *ring; - struct rte_mempool *mp; -+ struct pdump_filter *filter; - - flags = p->flags; - operation = p->op; -@@ -360,6 +495,7 @@ set_pdump_rxtx_cbs(struct pdump_request - queue = p->data.en_v1.queue; - ring = p->data.en_v1.ring; - mp = p->data.en_v1.mp; -+ filter = p->data.en_v1.filter; - } else { - ret = rte_eth_dev_get_port_by_name(p->data.dis_v1.device, - &port); -@@ -372,6 +508,7 @@ set_pdump_rxtx_cbs(struct pdump_request - queue = p->data.dis_v1.queue; - ring = p->data.dis_v1.ring; - mp = p->data.dis_v1.mp; -+ filter = p->data.dis_v1.filter; - } - - /* validation if packet capture is for all queues */ -@@ -403,7 +540,7 @@ set_pdump_rxtx_cbs(struct pdump_request - if (flags & RTE_PDUMP_FLAG_RX) { - end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1; - ret = pdump_register_rx_callbacks(end_q, port, queue, ring, mp, -- operation); -+ filter, operation); - if (ret < 0) - return ret; - } -@@ -412,7 +549,7 @@ set_pdump_rxtx_cbs(struct pdump_request - if (flags & RTE_PDUMP_FLAG_TX) { - end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1; - ret = pdump_register_tx_callbacks(end_q, port, queue, ring, mp, -- operation); -+ filter, operation); - if (ret < 0) - return ret; - } -diff -uparN /home/weiyanhua/dpvs/dpdk-stable-17.11.2/lib/librte_pdump/rte_pdump.h dpdk-stable-17.11.2/lib/librte_pdump/rte_pdump.h ---- /home/weiyanhua/dpvs/dpdk-stable-17.11.2/lib/librte_pdump/rte_pdump.h 2018-04-19 22:09:22.000000000 +0800 -+++ dpdk-stable-17.11.2/lib/librte_pdump/rte_pdump.h 2019-10-17 17:54:59.401175031 +0800 -@@ -44,6 +44,8 @@ - #include - #include - #include -+#include -+#include - - #ifdef __cplusplus - extern "C" { -@@ -63,6 +65,31 @@ enum rte_pdump_socktype { - RTE_PDUMP_SOCKET_CLIENT = 2 - }; - -+union addr { -+ struct in_addr in; -+ struct in6_addr in6; -+}; -+ -+struct pdump_filter { -+ int af; -+ union addr s_addr; -+ union addr d_addr; -+ union addr host_addr; //s_addr or d_addr -+ -+ uint8_t proto; -+ uint16_t proto_port; //s_port or d_port -+ uint16_t s_port; -+ uint16_t d_port; -+}; -+ -+struct vlan_eth_hdr { -+ unsigned char h_dest[ETH_ALEN]; -+ unsigned char h_source[ETH_ALEN]; -+ unsigned short h_vlan_proto; -+ unsigned short h_vlan_TCI; -+ unsigned short h_vlan_encapsulated_proto; -+}; -+ - /** - * Initialize packet capturing handling - * -diff -uparN /home/weiyanhua/dpvs/dpdk-stable-17.11.2/mk/rte.pdump.mk dpdk-stable-17.11.2/mk/rte.pdump.mk ---- /home/weiyanhua/dpvs/dpdk-stable-17.11.2/mk/rte.pdump.mk 1970-01-01 08:00:00.000000000 +0800 -+++ dpdk-stable-17.11.2/mk/rte.pdump.mk 2019-10-16 20:21:23.941178051 +0800 -@@ -0,0 +1,349 @@ -+# BSD LICENSE -+# -+# Copyright(c) 2010-2017 Intel Corporation. All rights reserved. -+# Copyright(c) 2014-2015 6WIND S.A. -+# All rights reserved. -+# -+# Redistribution and use in source and binary forms, with or without -+# modification, are permitted provided that the following conditions -+# are met: -+# -+# * Redistributions of source code must retain the above copyright -+# notice, this list of conditions and the following disclaimer. -+# * Redistributions in binary form must reproduce the above copyright -+# notice, this list of conditions and the following disclaimer in -+# the documentation and/or other materials provided with the -+# distribution. -+# * Neither the name of Intel Corporation nor the names of its -+# contributors may be used to endorse or promote products derived -+# from this software without specific prior written permission. -+# -+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+include $(RTE_SDK)/mk/internal/rte.compile-pre.mk -+include $(RTE_SDK)/mk/internal/rte.install-pre.mk -+include $(RTE_SDK)/mk/internal/rte.clean-pre.mk -+include $(RTE_SDK)/mk/internal/rte.build-pre.mk -+ -+# VPATH contains at least SRCDIR -+VPATH += $(SRCDIR) -+ -+_BUILD = $(APP) -+_INSTALL = $(INSTALL-FILES-y) $(SYMLINK-FILES-y) -+_INSTALL += $(RTE_OUTPUT)/app/$(APP) $(RTE_OUTPUT)/app/$(APP).map -+POSTINSTALL += target-appinstall -+_CLEAN = doclean -+POSTCLEAN += target-appclean -+ -+ifeq ($(NO_LDSCRIPT),) -+LDSCRIPT = $(RTE_LDSCRIPT) -+endif -+ -+# Link only the libraries used in the application -+LDFLAGS += --as-needed -+ -+# default path for libs -+_LDLIBS-y += -L$(RTE_SDK_BIN)/lib -+ -+# -+# Order is important: from higher level to lower level -+# -+_LDLIBS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += -lrte_flow_classify -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PIPELINE) += -lrte_pipeline -+_LDLIBS-$(CONFIG_RTE_LIBRTE_TABLE) += -lrte_table -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PORT) += -lrte_port -+ -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PDUMP) += -lrte_pdump -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += -lrte_distributor -+_LDLIBS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += -lrte_ip_frag -+_LDLIBS-$(CONFIG_RTE_LIBRTE_GRO) += -lrte_gro -+_LDLIBS-$(CONFIG_RTE_LIBRTE_GSO) += -lrte_gso -+_LDLIBS-$(CONFIG_RTE_LIBRTE_METER) += -lrte_meter -+_LDLIBS-$(CONFIG_RTE_LIBRTE_LPM) += -lrte_lpm -+# librte_acl needs --whole-archive because of weak functions -+_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --whole-archive -+_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --no-whole-archive -+_LDLIBS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += -lrte_jobstats -+_LDLIBS-$(CONFIG_RTE_LIBRTE_METRICS) += -lrte_metrics -+_LDLIBS-$(CONFIG_RTE_LIBRTE_BITRATE) += -lrte_bitratestats -+_LDLIBS-$(CONFIG_RTE_LIBRTE_LATENCY_STATS) += -lrte_latencystats -+_LDLIBS-$(CONFIG_RTE_LIBRTE_POWER) += -lrte_power -+ -+_LDLIBS-$(CONFIG_RTE_LIBRTE_TIMER) += -lrte_timer -+_LDLIBS-$(CONFIG_RTE_LIBRTE_EFD) += -lrte_efd -+ -+_LDLIBS-y += --whole-archive -+ -+_LDLIBS-$(CONFIG_RTE_LIBRTE_CFGFILE) += -lrte_cfgfile -+_LDLIBS-$(CONFIG_RTE_LIBRTE_HASH) += -lrte_hash -+_LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lrte_vhost -+_LDLIBS-$(CONFIG_RTE_LIBRTE_KVARGS) += -lrte_kvargs -+_LDLIBS-$(CONFIG_RTE_LIBRTE_MBUF) += -lrte_mbuf -+_LDLIBS-$(CONFIG_RTE_LIBRTE_NET) += -lrte_net -+_LDLIBS-$(CONFIG_RTE_LIBRTE_ETHER) += -lrte_ethdev -+_LDLIBS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += -lrte_cryptodev -+_LDLIBS-$(CONFIG_RTE_LIBRTE_SECURITY) += -lrte_security -+_LDLIBS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += -lrte_mempool -+_LDLIBS-$(CONFIG_RTE_DRIVER_MEMPOOL_RING) += -lrte_mempool_ring -+_LDLIBS-$(CONFIG_RTE_LIBRTE_RING) += -lrte_ring -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PCI) += -lrte_pci -+_LDLIBS-$(CONFIG_RTE_LIBRTE_EAL) += -lrte_eal -+_LDLIBS-$(CONFIG_RTE_LIBRTE_CMDLINE) += -lrte_cmdline -+_LDLIBS-$(CONFIG_RTE_LIBRTE_SCHED) += -lrte_sched -+ -+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI) += -lrte_kni -+endif -+ -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += -lrte_bus_pci -+_LDLIBS-$(CONFIG_RTE_LIBRTE_VDEV_BUS) += -lrte_bus_vdev -+ -+ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n) -+# plugins (link only if static libraries) -+ -+_LDLIBS-$(CONFIG_RTE_DRIVER_MEMPOOL_STACK) += -lrte_mempool_stack -+ -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += -lrte_pmd_af_packet -+_LDLIBS-$(CONFIG_RTE_LIBRTE_ARK_PMD) += -lrte_pmd_ark -+_LDLIBS-$(CONFIG_RTE_LIBRTE_AVP_PMD) += -lrte_pmd_avp -+_LDLIBS-$(CONFIG_RTE_LIBRTE_BNX2X_PMD) += -lrte_pmd_bnx2x -lz -+_LDLIBS-$(CONFIG_RTE_LIBRTE_BNXT_PMD) += -lrte_pmd_bnxt -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += -lrte_pmd_bond -+_LDLIBS-$(CONFIG_RTE_LIBRTE_CXGBE_PMD) += -lrte_pmd_cxgbe -+ifeq ($(CONFIG_RTE_LIBRTE_DPAA_BUS),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA_BUS) += -lrte_bus_dpaa -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA_MEMPOOL) += -lrte_mempool_dpaa -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA_PMD) += -lrte_pmd_dpaa -+endif -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += -lrte_pmd_dpaa2 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += -lrte_pmd_e1000 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += -lrte_pmd_ena -+_LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += -lrte_pmd_enic -+_LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += -lrte_pmd_fm10k -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += -lrte_pmd_failsafe -+_LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += -lrte_pmd_i40e -+_LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += -lrte_pmd_ixgbe -+ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += -lrte_pmd_kni -+endif -+_LDLIBS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += -lrte_pmd_lio -+_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += -lrte_pmd_mlx4 -libverbs -lmlx4 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += -lrte_pmd_mlx5 -libverbs -lmlx5 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_MRVL_PMD) += -lrte_pmd_mrvl -L$(LIBMUSDK_PATH)/lib -lmusdk -+_LDLIBS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += -lrte_pmd_nfp -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_NULL) += -lrte_pmd_null -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += -lrte_pmd_pcap -lpcap -+_LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += -lrte_pmd_qede -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING) += -lrte_pmd_ring -+ifeq ($(CONFIG_RTE_LIBRTE_SCHED),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += -lrte_pmd_softnic -+endif -+_LDLIBS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += -lrte_pmd_sfc_efx -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += -lrte_pmd_szedata2 -lsze2 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += -lrte_pmd_tap -+_LDLIBS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += -lrte_pmd_thunderx_nicvf -+_LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += -lrte_pmd_virtio -+ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += -lrte_pmd_vhost -+endif # $(CONFIG_RTE_LIBRTE_VHOST) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += -lrte_pmd_vmxnet3_uio -+ -+ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB) += -lrte_pmd_aesni_mb -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB) += -L$(AESNI_MULTI_BUFFER_LIB_PATH) -lIPSec_MB -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_GCM) += -lrte_pmd_aesni_gcm -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_GCM) += -L$(AESNI_MULTI_BUFFER_LIB_PATH) -lIPSec_MB -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_OPENSSL) += -lrte_pmd_openssl -lcrypto -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_NULL_CRYPTO) += -lrte_pmd_null_crypto -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_QAT) += -lrte_pmd_qat -lcrypto -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SNOW3G) += -lrte_pmd_snow3g -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SNOW3G) += -L$(LIBSSO_SNOW3G_PATH)/build -lsso_snow3g -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KASUMI) += -lrte_pmd_kasumi -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KASUMI) += -L$(LIBSSO_KASUMI_PATH)/build -lsso_kasumi -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_ZUC) += -lrte_pmd_zuc -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_ZUC) += -L$(LIBSSO_ZUC_PATH)/build -lsso_zuc -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO) += -lrte_pmd_armv8 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO) += -L$(ARMV8_CRYPTO_LIB_PATH) -larmv8_crypto -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_MRVL_CRYPTO) += -L$(LIBMUSDK_PATH)/lib -lrte_pmd_mrvl_crypto -lmusdk -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_CRYPTO_SCHEDULER) += -lrte_pmd_crypto_scheduler -+ifeq ($(CONFIG_RTE_LIBRTE_FSLMC_BUS),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC) += -lrte_pmd_dpaa2_sec -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC) += -lrte_mempool_dpaa2 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC) += -lrte_bus_fslmc -+endif # CONFIG_RTE_LIBRTE_FSLMC_BUS -+ -+ifeq ($(CONFIG_RTE_LIBRTE_DPAA_BUS),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DPAA_SEC) += -lrte_bus_dpaa -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DPAA_SEC) += -lrte_pmd_dpaa_sec -+endif # CONFIG_RTE_LIBRTE_DPAA_BUS -+ -+endif # CONFIG_RTE_LIBRTE_CRYPTODEV -+ -+ifeq ($(CONFIG_RTE_LIBRTE_DPAA2_PMD),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += -lrte_bus_fslmc -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += -lrte_mempool_dpaa2 -+endif # CONFIG_RTE_LIBRTE_DPAA2_PMD -+ -+endif # !CONFIG_RTE_BUILD_SHARED_LIBS -+ -+_LDLIBS-y += --no-whole-archive -+ -+ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n) -+# The static libraries do not know their dependencies. -+# So linking with static library requires explicit dependencies. -+_LDLIBS-$(CONFIG_RTE_LIBRTE_EAL) += -lrt -+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP)$(CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES),yy) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_EAL) += -lnuma -+endif -+_LDLIBS-$(CONFIG_RTE_LIBRTE_SCHED) += -lm -+_LDLIBS-$(CONFIG_RTE_LIBRTE_SCHED) += -lrt -+_LDLIBS-$(CONFIG_RTE_LIBRTE_MEMBER) += -lm -+_LDLIBS-$(CONFIG_RTE_LIBRTE_METER) += -lm -+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lnuma -+endif -+_LDLIBS-$(CONFIG_RTE_PORT_PCAP) += -lpcap -+endif # !CONFIG_RTE_BUILD_SHARED_LIBS -+ -+_LDLIBS-y += $(EXECENV_LDLIBS) -+ -+LDLIBS += $(_LDLIBS-y) $(CPU_LDLIBS) $(EXTRA_LDLIBS) -+ -+# all the words except the first one -+allbutfirst = $(wordlist 2,$(words $(1)),$(1)) -+ -+# Eliminate duplicates without sorting, only keep the last occurrence -+filter-libs = \ -+ $(if $(1),$(strip\ -+ $(if \ -+ $(and \ -+ $(filter $(firstword $(1)),$(call allbutfirst,$(1))),\ -+ $(filter -l%,$(firstword $(1)))),\ -+ ,\ -+ $(firstword $(1))) \ -+ $(call filter-libs,$(call allbutfirst,$(1))))) -+ -+LDLIBS := $(call filter-libs,$(LDLIBS)) -+ -+ifeq ($(RTE_DEVEL_BUILD)$(CONFIG_RTE_BUILD_SHARED_LIB),yy) -+LDFLAGS += -rpath=$(RTE_SDK_BIN)/lib -+endif -+ -+MAPFLAGS = -Map=$@.map --cref -+ -+.PHONY: all -+all: install -+ -+.PHONY: install -+install: build _postinstall -+ -+_postinstall: build -+ -+.PHONY: build -+build: _postbuild -+ -+exe2cmd = $(strip $(call dotfile,$(patsubst %,%.cmd,$(1)))) -+ -+ifeq ($(LINK_USING_CC),1) -+O_TO_EXE = $(CC) -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $(OBJS-y) $(call linkerprefix, \ -+ $(LDLIBS) $(LDFLAGS) $(LDFLAGS_$(@)) $(EXTRA_LDFLAGS) \ -+ $(MAPFLAGS)) -+else -+O_TO_EXE = $(LD) -o $@ $(OBJS-y) \ -+ $(LDLIBS) $(LDFLAGS) $(LDFLAGS_$(@)) $(EXTRA_LDFLAGS) \ -+ $(MAPFLAGS) -+endif -+O_TO_EXE_STR = $(subst ','\'',$(O_TO_EXE)) #'# fix syntax highlight -+O_TO_EXE_DISP = $(if $(V),"$(O_TO_EXE_STR)"," LD $(@)") -+O_TO_EXE_CMD = "cmd_$@ = $(O_TO_EXE_STR)" -+O_TO_EXE_DO = @set -e; \ -+ echo $(O_TO_EXE_DISP); \ -+ $(O_TO_EXE) && \ -+ echo $(O_TO_EXE_CMD) > $(call exe2cmd,$(@)) -+ -+-include .$(APP).cmd -+ -+# path where libraries are retrieved -+LDLIBS_PATH := $(subst -Wl$(comma)-L,,$(filter -Wl$(comma)-L%,$(LDLIBS))) -+LDLIBS_PATH += $(subst -L,,$(filter -L%,$(LDLIBS))) -+ -+# list of .a files that are linked to this application -+LDLIBS_NAMES := $(patsubst -l%,lib%.a,$(filter -l%,$(LDLIBS))) -+LDLIBS_NAMES += $(patsubst -Wl$(comma)-l%,lib%.a,$(filter -Wl$(comma)-l%,$(LDLIBS))) -+ -+# list of found libraries files (useful for deps). If not found, the -+# library is silently ignored and dep won't be checked -+LDLIBS_FILES := $(sort $(wildcard $(foreach dir,$(LDLIBS_PATH),\ -+ $(addprefix $(dir)/,$(LDLIBS_NAMES))))) -+ -+# -+# Compile executable file if needed -+# -+$(APP): $(OBJS-y) $(LDLIBS_FILES) $(DEP_$(APP)) $(LDSCRIPT) FORCE -+ @[ -d $(dir $@) ] || mkdir -p $(dir $@) -+ $(if $(D),\ -+ @echo -n "$< -> $@ " ; \ -+ echo -n "file_missing=$(call boolean,$(file_missing)) " ; \ -+ echo -n "cmdline_changed=$(call boolean,$(call cmdline_changed,$(O_TO_EXE_STR))) " ; \ -+ echo -n "depfile_missing=$(call boolean,$(depfile_missing)) " ; \ -+ echo "depfile_newer=$(call boolean,$(depfile_newer)) ") -+ $(if $(or \ -+ $(file_missing),\ -+ $(call cmdline_changed,$(O_TO_EXE_STR)),\ -+ $(depfile_missing),\ -+ $(depfile_newer)),\ -+ $(O_TO_EXE_DO)) -+ -+# -+# install app in $(RTE_OUTPUT)/app -+# -+$(RTE_OUTPUT)/app/$(APP): $(APP) -+ @echo " INSTALL-APP $(APP)" -+ @[ -d $(RTE_OUTPUT)/app ] || mkdir -p $(RTE_OUTPUT)/app -+ $(Q)cp -f $(APP) $(RTE_OUTPUT)/app -+ -+# -+# install app map file in $(RTE_OUTPUT)/app -+# -+$(RTE_OUTPUT)/app/$(APP).map: $(APP) -+ @echo " INSTALL-MAP $(APP).map" -+ @[ -d $(RTE_OUTPUT)/app ] || mkdir -p $(RTE_OUTPUT)/app -+ $(Q)cp -f $(APP).map $(RTE_OUTPUT)/app -+ -+# -+# Clean all generated files -+# -+.PHONY: clean -+clean: _postclean -+ $(Q)rm -f $(_BUILD_TARGETS) $(_INSTALL_TARGETS) $(_CLEAN_TARGETS) -+ -+.PHONY: doclean -+doclean: -+ $(Q)rm -rf $(APP) $(OBJS-all) $(DEPS-all) $(DEPSTMP-all) \ -+ $(CMDS-all) $(INSTALL-FILES-all) .$(APP).cmd $(APP).map -+ -+ -+include $(RTE_SDK)/mk/internal/rte.compile-post.mk -+include $(RTE_SDK)/mk/internal/rte.install-post.mk -+include $(RTE_SDK)/mk/internal/rte.clean-post.mk -+include $(RTE_SDK)/mk/internal/rte.build-post.mk -+ -+ifneq ($(wildcard $(RTE_SDK)/mk/target/$(RTE_TARGET)/rte.app.mk),) -+include $(RTE_SDK)/mk/target/$(RTE_TARGET)/rte.app.mk -+else -+include $(RTE_SDK)/mk/target/generic/rte.app.mk -+endif -+ -+.PHONY: FORCE -+FORCE: -+ diff --git a/patch/dpdk-stable-17.11.2/0004-patch-dpdk-17.11.2-to-support-rh75-provided-by-Jason.patch b/patch/dpdk-stable-17.11.2/0004-patch-dpdk-17.11.2-to-support-rh75-provided-by-Jason.patch deleted file mode 100644 index 3cb205562..000000000 --- a/patch/dpdk-stable-17.11.2/0004-patch-dpdk-17.11.2-to-support-rh75-provided-by-Jason.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 95e115dd2bfe5f7a7e54af0f73577af0a68fdba0 Mon Sep 17 00:00:00 2001 -From: wencyu -Date: Wed, 13 Nov 2019 10:17:35 +0800 -Subject: [PATCH 4/4] patch dpdk-17.11.2 to support rh75 (provided by Jason Joo - ) - ---- - lib/librte_eal/linuxapp/kni/compat.h | 6 ++++++ - lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h | 6 ++++++ - 2 files changed, 12 insertions(+) - -diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h -index 3f8c0bc..be707bc 100644 ---- a/lib/librte_eal/linuxapp/kni/compat.h -+++ b/lib/librte_eal/linuxapp/kni/compat.h -@@ -101,6 +101,12 @@ - #undef NET_NAME_UNKNOWN - #endif - -+#if (defined(RHEL_RELEASE_CODE) && \ -+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) && \ -+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8, 0))) -+#define ndo_change_mtu ndo_change_mtu_rh74 -+#endif -+ - #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) - #define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER - #endif -diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h -index 443a3f2..46cad90 100644 ---- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h -+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h -@@ -3932,6 +3932,12 @@ static inline struct sk_buff *__kc__vlan_hwaccel_put_tag(struct sk_buff *skb, - #define vlan_tx_tag_present skb_vlan_tag_present - #endif - -+#if (defined(RHEL_RELEASE_CODE) && \ -+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) && \ -+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8, 0))) -+#define ndo_change_mtu ndo_change_mtu_rh74 -+#endif -+ - #if ((LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)) || \ - (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(12, 3, 0))) - #define HAVE_VF_VLAN_PROTO --- -1.8.3.1 - diff --git a/patch/dpdk-stable-17.11.6/0001-kni-use-netlink-event-for-multicast-driver-part.patch b/patch/dpdk-stable-17.11.6/0001-kni-use-netlink-event-for-multicast-driver-part.patch deleted file mode 100644 index 0934c174a..000000000 --- a/patch/dpdk-stable-17.11.6/0001-kni-use-netlink-event-for-multicast-driver-part.patch +++ /dev/null @@ -1,108 +0,0 @@ -From b5dc636f0ccdccb3d4e94f3453b6e95a631bb10a Mon Sep 17 00:00:00 2001 -From: ywc689 -Date: Fri, 28 Jun 2019 17:52:13 +0800 -Subject: [PATCH 1/3] kni: use netlink event for multicast (driver part) - -kni driver send netlink event every time hw-multicast list updated by -kernel, the user kni app should capture the event and update multicast -to kni device. - -original way is using rte_kni_request to pass hw-multicast to user kni -module. that method works but finally memory corruption found, which is -to kni device. ---- - lib/librte_eal/linuxapp/kni/kni_net.c | 68 +++++++++++++++++++++++++++++++++++ - 1 file changed, 68 insertions(+) - -diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c -index db9f489..fab94d1 100644 ---- a/lib/librte_eal/linuxapp/kni/kni_net.c -+++ b/lib/librte_eal/linuxapp/kni/kni_net.c -@@ -35,6 +35,8 @@ - #include - #include - #include -+#include -+#include - - #include - #include -@@ -579,9 +581,75 @@ - return 0; - } - -+static size_t -+kni_nlmsg_size(void) -+{ -+ return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) -+ + nla_total_size(4) /* IFA_ADDRESS */ -+ + nla_total_size(4) /* IFA_LOCAL */ -+ + nla_total_size(4) /* IFA_BROADCAST */ -+ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ -+ + nla_total_size(4) /* IFA_FLAGS */ -+ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ -+} -+ - static void - kni_net_set_rx_mode(struct net_device *dev) - { -+ /* -+ * send event to notify user (DPDK KNI app) that multicast list changed, -+ * so that it can monitor multicast join/leave and set HW mc-addrs to -+ * kni dev accordinglly. -+ * -+ * this event is just an notification, we do not save any mc-addr here -+ * (so attribute space for us). user kni app should get maddrs after -+ * receive this notification. -+ * -+ * I was expecting kernel send some rtnl event for multicast join/leave, -+ * but it doesn't. By checking the call-chain of SIOCADDMULTI (ip maddr, -+ * manages only hardware multicast) and IP_ADD_MEMBERSHIP (ip_mc_join_group, -+ * used to for IPv4 multicast), no rtnl event sent. -+ * -+ * so as workaround, modify kni driver here to send RTM_NEWADDR. -+ * it may not suitalbe to use this event for mcast, but that should works. -+ * hope that won't affect other listener to this event. -+ * -+ * previous solution was using rte_kni_request to pass hw-maddr list to user. -+ * it "works" for times but finally memory corruption found, which is -+ * not easy to address (lock was added and reviewed). That's why we use -+ * netlink event instead. -+ */ -+ struct sk_buff *skb; -+ struct net *net = dev_net(dev); -+ struct nlmsghdr *nlh; -+ struct ifaddrmsg *ifm; -+ -+ skb = nlmsg_new(kni_nlmsg_size(), GFP_ATOMIC); -+ if (!skb) -+ return; -+ -+ /* no other event for us ? */ -+ nlh = nlmsg_put(skb, 0, 0, RTM_NEWADDR, sizeof(*ifm), 0); -+ if (!nlh) { -+ kfree_skb(skb); -+ return; -+ } -+ -+ /* just send an notification so no other info */ -+ ifm = nlmsg_data(nlh); -+ memset(ifm, 0, sizeof(*ifm)); -+ ifm->ifa_family = AF_UNSPEC; -+ ifm->ifa_prefixlen = 0; -+ ifm->ifa_flags = 0; -+ ifm->ifa_scope = RT_SCOPE_NOWHERE; -+ ifm->ifa_index = 0; -+ -+ nlmsg_end(skb, nlh); -+ -+ /* other group ? */ -+ pr_debug("%s: rx-mode/multicast-list changed\n", __func__); -+ rtnl_notify(skb, net, 0, RTNLGRP_NOTIFY, NULL, GFP_ATOMIC); -+ return; - } - - static int --- -1.8.3.1 - diff --git a/patch/dpdk-stable-17.11.6/0002-net-support-variable-IP-header-len-for-checksum-API.patch b/patch/dpdk-stable-17.11.6/0002-net-support-variable-IP-header-len-for-checksum-API.patch deleted file mode 100644 index 0a24083af..000000000 --- a/patch/dpdk-stable-17.11.6/0002-net-support-variable-IP-header-len-for-checksum-API.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 4be86649fd61173409040093eecffdbc30570988 Mon Sep 17 00:00:00 2001 -From: ywc689 -Date: Fri, 28 Jun 2019 17:48:12 +0800 -Subject: [PATCH 2/3] net: support variable IP header len for checksum API. - -IPv4 checksum APIs use fixe IP header length, it will failed if there is -any IP option. Now calculating header length by "ihl" field, so that we -can support options. ---- - lib/librte_net/rte_ip.h | 13 +++++++------ - 1 file changed, 7 insertions(+), 6 deletions(-) - -diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h -index 8d4907f..0d504f6 100644 ---- a/lib/librte_net/rte_ip.h -+++ b/lib/librte_net/rte_ip.h -@@ -314,7 +314,7 @@ struct ipv4_hdr { - rte_ipv4_cksum(const struct ipv4_hdr *ipv4_hdr) - { - uint16_t cksum; -- cksum = rte_raw_cksum(ipv4_hdr, sizeof(struct ipv4_hdr)); -+ cksum = rte_raw_cksum(ipv4_hdr, (ipv4_hdr->version_ihl & 0xf) * 4); - return (cksum == 0xffff) ? cksum : (uint16_t)~cksum; - } - -@@ -356,7 +356,7 @@ struct ipv4_hdr { - } else { - psd_hdr.len = rte_cpu_to_be_16( - (uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) -- - sizeof(struct ipv4_hdr))); -+ - (ipv4_hdr->version_ihl & 0xf) * 4)); - } - return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr)); - } -@@ -379,13 +379,14 @@ struct ipv4_hdr { - rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr) - { - uint32_t cksum; -- uint32_t l3_len, l4_len; -+ uint32_t l3_len, l4_len, iphlen; - - l3_len = rte_be_to_cpu_16(ipv4_hdr->total_length); -- if (l3_len < sizeof(struct ipv4_hdr)) -- return 0; -+ iphlen = (ipv4_hdr->version_ihl & 0xf) * 4; - -- l4_len = l3_len - sizeof(struct ipv4_hdr); -+ if (l3_len < iphlen) -+ return 0; -+ l4_len = l3_len - iphlen; - - cksum = rte_raw_cksum(l4_hdr, l4_len); - cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0); --- -1.8.3.1 - diff --git a/patch/dpdk-stable-17.11.6/0003-pdump-enable-and-change-dpdk-pdump-tool-for-DPVS.patch b/patch/dpdk-stable-17.11.6/0003-pdump-enable-and-change-dpdk-pdump-tool-for-DPVS.patch deleted file mode 100644 index b7eebfdcd..000000000 --- a/patch/dpdk-stable-17.11.6/0003-pdump-enable-and-change-dpdk-pdump-tool-for-DPVS.patch +++ /dev/null @@ -1,920 +0,0 @@ -diff -uparN dpdk-stable-17.11.6/app/pdump/main.c dpdk-stable-17.11.6-new/app/pdump/main.c ---- dpdk-stable-17.11.6/app/pdump/main.c 2019-05-22 03:15:57.000000000 +0800 -+++ dpdk-stable-17.11.6-new/app/pdump/main.c 2020-08-24 11:09:32.166622729 +0800 -@@ -54,6 +54,7 @@ - #include - #include - #include -+#include - - #define CMD_LINE_OPT_PDUMP "pdump" - #define PDUMP_PORT_ARG "port" -@@ -65,6 +66,13 @@ - #define PDUMP_RING_SIZE_ARG "ring-size" - #define PDUMP_MSIZE_ARG "mbuf-size" - #define PDUMP_NUM_MBUFS_ARG "total-num-mbufs" -+#define PDUMP_HOST_ARG "host" -+#define PDUMP_SRC_ARG "src-host" -+#define PDUMP_DST_ARG "dst-host" -+#define PDUMP_PROTO_PORT_AGE "proto-port" -+#define PDUMP_SPORT_ARG "src-port" -+#define PDUMP_DPORT_ARG "dst-port" -+#define PDUMP_PROTO_ARG "proto" - #define CMD_LINE_OPT_SER_SOCK_PATH "server-socket-path" - #define CMD_LINE_OPT_CLI_SOCK_PATH "client-socket-path" - -@@ -120,6 +128,13 @@ const char *valid_pdump_arguments[] = { - PDUMP_RING_SIZE_ARG, - PDUMP_MSIZE_ARG, - PDUMP_NUM_MBUFS_ARG, -+ PDUMP_HOST_ARG, -+ PDUMP_SRC_ARG, -+ PDUMP_DST_ARG, -+ PDUMP_PROTO_PORT_AGE, -+ PDUMP_SPORT_ARG, -+ PDUMP_DPORT_ARG, -+ PDUMP_PROTO_ARG, - NULL - }; - -@@ -153,6 +168,7 @@ struct pdump_tuples { - enum pcap_stream rx_vdev_stream_type; - enum pcap_stream tx_vdev_stream_type; - bool single_pdump_dev; -+ struct pdump_filter *filter; - - /* stats */ - struct pdump_stats stats; -@@ -180,6 +196,11 @@ pdump_usage(const char *prgname) - "(queue=)," - "(rx-dev= |" - " tx-dev=," -+ "[host= | src-host= |" -+ "dst-host=]," -+ "[proto=support:tcp/udp/icmp]," -+ "[proto-port= |src-port= |" -+ "dst-port=]," - "[ring-size=default:16384]," - "[mbuf-size=default:2176]," - "[total-num-mbufs=default:65535]'\n" -@@ -270,6 +291,60 @@ parse_uint_value(const char *key, const - } - - static int -+parse_host(const char *key __rte_unused, const char *value, void *extra_args) { -+ struct pdump_tuples *pt =extra_args; -+ struct in_addr inaddr; -+ struct in6_addr inaddr6; -+ union addr addr; -+ int af = 0; -+ -+ if (inet_pton(AF_INET6, value, &inaddr6) > 0) { -+ af = AF_INET6; -+ addr.in6 = inaddr6; -+ } else if (inet_pton(AF_INET, value, &inaddr) > 0){ -+ af = AF_INET; -+ addr.in = inaddr; -+ } else { -+ printf("IP address invalid\n"); -+ return -EINVAL; -+ } -+ -+ if (pt->filter && pt->filter->af != 0 && af != pt->filter->af) { -+ printf("IPV4 and IPV6 conflict \n"); -+ return -EINVAL; -+ } else { -+ pt->filter->af = af; -+ } -+ -+ if (!strcmp(key, PDUMP_HOST_ARG)) { -+ rte_memcpy(&pt->filter->host_addr, &addr, sizeof(addr)); -+ } else if (!strcmp(key, PDUMP_SRC_ARG)){ -+ rte_memcpy(&pt->filter->s_addr, &addr, sizeof(addr)); -+ } else if (!strcmp(key, PDUMP_DST_ARG)){ -+ rte_memcpy(&pt->filter->d_addr, &addr, sizeof(addr)); -+ } -+ return 0; -+} -+ -+static int -+parse_proto(const char *key __rte_unused, const char *value, void *extra_args) { -+ struct pdump_tuples *pt =extra_args; -+ -+ if (!strcmp(value, "tcp")) { -+ pt->filter->proto = IPPROTO_TCP; -+ } else if (!strcmp(value, "udp")) { -+ pt->filter->proto = IPPROTO_UDP; -+ } else if (!strcmp(value, "icmp")) { -+ pt->filter->proto = IPPROTO_ICMP; -+ } else { -+ printf("invalid value:\"%s\" for key:\"%s\", " -+ "value must be tcp/udp/icmp\n", value, key); -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+static int - parse_pdump(const char *optarg) - { - struct rte_kvargs *kvlist; -@@ -396,6 +471,73 @@ parse_pdump(const char *optarg) - } else - pt->total_num_mbufs = MBUFS_PER_POOL; - -+ /*filter parsing and validation*/ -+ pt->filter = rte_zmalloc("pdump_filter", -+ sizeof(struct pdump_filter), 0); -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_HOST_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_HOST_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_SRC_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_SRC_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_DST_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_DST_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_PORT_AGE); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_PORT_AGE, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->proto_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_SPORT_ARG); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_SPORT_ARG, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->s_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_DPORT_ARG); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_DPORT_ARG, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->d_port = (uint16_t) v.val; -+ } -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_ARG, -+ &parse_proto, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ - num_tuples++; - - free_kvlist: -@@ -540,6 +682,8 @@ cleanup_rings(void) - rte_ring_free(pt->rx_ring); - if (pt->tx_ring) - rte_ring_free(pt->tx_ring); -+ if (pt->filter) -+ rte_free(pt->filter); - } - } - -@@ -583,11 +727,10 @@ configure_vdev(uint16_t port_id) - { - struct ether_addr addr; - const uint16_t rxRings = 0, txRings = 1; -- const uint8_t nb_ports = rte_eth_dev_count(); - int ret; - uint16_t q; - -- if (port_id > nb_ports) -+ if (!rte_eth_dev_is_valid_port(port_id)) - return -1; - - ret = rte_eth_dev_configure(port_id, rxRings, txRings, -@@ -799,20 +942,20 @@ enable_pdump(void) - pt->queue, - RTE_PDUMP_FLAG_RX, - pt->rx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - ret1 = rte_pdump_enable_by_deviceid( - pt->device_id, - pt->queue, - RTE_PDUMP_FLAG_TX, - pt->tx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - } else if (pt->dump_by_type == PORT_ID) { - ret = rte_pdump_enable(pt->port, pt->queue, - RTE_PDUMP_FLAG_RX, -- pt->rx_ring, pt->mp, NULL); -+ pt->rx_ring, pt->mp, pt->filter); - ret1 = rte_pdump_enable(pt->port, pt->queue, - RTE_PDUMP_FLAG_TX, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - } - } else if (pt->dir == RTE_PDUMP_FLAG_RX) { - if (pt->dump_by_type == DEVICE_ID) -@@ -820,22 +963,22 @@ enable_pdump(void) - pt->device_id, - pt->queue, - pt->dir, pt->rx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - else if (pt->dump_by_type == PORT_ID) - ret = rte_pdump_enable(pt->port, pt->queue, - pt->dir, -- pt->rx_ring, pt->mp, NULL); -+ pt->rx_ring, pt->mp, pt->filter); - } else if (pt->dir == RTE_PDUMP_FLAG_TX) { - if (pt->dump_by_type == DEVICE_ID) - ret = rte_pdump_enable_by_deviceid( - pt->device_id, - pt->queue, - pt->dir, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - else if (pt->dump_by_type == PORT_ID) - ret = rte_pdump_enable(pt->port, pt->queue, - pt->dir, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - } - if (ret < 0 || ret1 < 0) { - cleanup_pdump_resources(); -diff -uparN dpdk-stable-17.11.6/app/pdump/Makefile dpdk-stable-17.11.6-new/app/pdump/Makefile ---- dpdk-stable-17.11.6/app/pdump/Makefile 2019-05-22 03:15:57.000000000 +0800 -+++ dpdk-stable-17.11.6-new/app/pdump/Makefile 2020-08-19 19:12:09.011111072 +0800 -@@ -41,6 +41,6 @@ CFLAGS += $(WERROR_FLAGS) - - SRCS-y := main.c - --include $(RTE_SDK)/mk/rte.app.mk -+include $(RTE_SDK)/mk/rte.pdump.mk - - endif -diff -uparN dpdk-stable-17.11.6/config/common_base dpdk-stable-17.11.6-new/config/common_base ---- dpdk-stable-17.11.6/config/common_base 2019-05-22 03:15:57.000000000 +0800 -+++ dpdk-stable-17.11.6-new/config/common_base 2020-08-19 19:12:09.011111072 +0800 -@@ -399,7 +399,7 @@ CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16 - # - # Compile software PMD backed by PCAP files - # --CONFIG_RTE_LIBRTE_PMD_PCAP=n -+CONFIG_RTE_LIBRTE_PMD_PCAP=y - - # - # Compile link bonding PMD library -diff -uparN dpdk-stable-17.11.6/lib/librte_pdump/rte_pdump.c dpdk-stable-17.11.6-new/lib/librte_pdump/rte_pdump.c ---- dpdk-stable-17.11.6/lib/librte_pdump/rte_pdump.c 2019-05-22 03:15:57.000000000 +0800 -+++ dpdk-stable-17.11.6-new/lib/librte_pdump/rte_pdump.c 2020-08-24 12:03:10.436176887 +0800 -@@ -46,6 +46,10 @@ - #include - #include - #include -+#include -+#include -+#include -+#include - - #include "rte_pdump.h" - -@@ -177,6 +181,132 @@ pdump_pktmbuf_copy(struct rte_mbuf *m, s - return m_dup; - } - -+static bool -+inet_addr_equal(int af, const union addr *a1, const union addr *a2) -+{ -+ switch (af) { -+ case AF_INET: -+ return a1->in.s_addr == a2->in.s_addr; -+ case AF_INET6: -+ return memcmp(a1->in6.s6_addr, a2->in6.s6_addr, 16) == 0; -+ default: -+ return memcmp(a1, a2, sizeof(union addr)) == 0; -+ } -+} -+ -+static bool -+inet_is_addr_any(int af, const union addr *addr) -+{ -+ switch (af) { -+ case AF_INET: -+ return addr->in.s_addr == htonl(INADDR_ANY); -+ case AF_INET6: -+ return IN6_ARE_ADDR_EQUAL(&addr->in6, &in6addr_any); -+ default: -+ return false; -+ } -+ -+} -+ -+/* support vlan/arp/ipv4ipv6 */ -+static int -+pdump_filter(struct rte_mbuf *m, struct pdump_filter *filter) -+{ -+ struct ether_hdr *eth_hdr; -+ struct vlan_eth_hdr *vlan_eth_hdr; -+ union addr s_addr, d_addr; -+ int prepend = 0; -+ uint16_t type = 0; -+ uint16_t iph_len = 0; -+ uint8_t proto = 0; -+ -+ int af; -+ -+ if (filter->af == 0 && filter->s_port == 0 && -+ filter->d_port == 0 && filter->proto == 0) -+ return 0; -+ -+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); -+ -+ if (eth_hdr->ether_type == htons(ETH_P_8021Q)) { -+ prepend += sizeof(struct vlan_eth_hdr); -+ vlan_eth_hdr = rte_pktmbuf_mtod(m, struct vlan_eth_hdr *); -+ type = vlan_eth_hdr->h_vlan_encapsulated_proto; -+ } else { -+ prepend += sizeof(struct ether_hdr); -+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); -+ type = eth_hdr->ether_type; -+ } -+ -+ if (rte_pktmbuf_adj(m, prepend) == NULL) -+ goto prepend; -+ -+ if (type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) { -+ struct arp_hdr *arp = rte_pktmbuf_mtod(m, struct arp_hdr *); -+ af = AF_INET; -+ s_addr.in.s_addr = arp->arp_data.arp_sip; -+ d_addr.in.s_addr = arp->arp_data.arp_tip; -+ //proto = IPPROTO_ICMP; -+ } else if (type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { -+ struct ipv4_hdr *ip4 = rte_pktmbuf_mtod(m, struct ipv4_hdr *); -+ af = AF_INET; -+ s_addr.in.s_addr = ip4->src_addr; -+ d_addr.in.s_addr = ip4->dst_addr; -+ proto = ip4->next_proto_id; -+ iph_len = (ip4->version_ihl & 0xf) << 2; -+ } else if (type == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) { -+ struct ipv6_hdr *ip6 = rte_pktmbuf_mtod(m, struct ipv6_hdr *); -+ af = AF_INET6; -+ rte_memcpy(&s_addr.in6, &ip6->src_addr, 16); -+ rte_memcpy(&d_addr.in6, &ip6->dst_addr, 16); -+ proto = ip6->proto; -+ iph_len = sizeof(struct ipv6_hdr); -+ } else { -+ goto prepend; -+ } -+ -+ /* filter */ -+ if (!inet_is_addr_any(af, &filter->s_addr) && -+ !inet_addr_equal(af, &filter->s_addr, &s_addr)) -+ goto prepend; -+ if (!inet_is_addr_any(af, &filter->d_addr) && -+ !inet_addr_equal(af, &filter->d_addr, &d_addr)) -+ goto prepend; -+ if (!inet_is_addr_any(af, &filter->host_addr) && -+ !inet_addr_equal(af, &filter->host_addr, &s_addr) && -+ !inet_addr_equal(af, &filter->host_addr, &d_addr)) -+ goto prepend; -+ -+ if (filter->proto && filter->proto != proto) -+ goto prepend; -+ -+ if (filter->s_port || filter->d_port) { -+ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) -+ goto prepend; -+ struct udp_hdr _uh; -+ const struct udp_hdr *uh; -+ uh = rte_pktmbuf_read(m, iph_len, sizeof(_uh), &_uh); -+ if (uh == NULL) -+ goto prepend; -+ if (filter->s_port && filter->s_port != rte_cpu_to_be_16(uh->src_port)) -+ goto prepend; -+ if (filter->d_port && filter->d_port != rte_cpu_to_be_16(uh->dst_port)) -+ goto prepend; -+ -+ if (filter->proto_port && -+ filter->proto_port != rte_cpu_to_be_16(uh->src_port) && -+ filter->proto_port != rte_cpu_to_be_16(uh->dst_port)) -+ goto prepend; -+ } -+ -+ rte_pktmbuf_prepend(m, prepend); -+ return 0; -+ -+prepend: -+ rte_pktmbuf_prepend(m, prepend); -+ return -1; -+} -+ - static inline void - pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params) - { -@@ -193,6 +323,8 @@ pdump_copy(struct rte_mbuf **pkts, uint1 - ring = cbs->ring; - mp = cbs->mp; - for (i = 0; i < nb_pkts; i++) { -+ if (pdump_filter(pkts[i], cbs->filter) != 0) -+ continue; - p = pdump_pktmbuf_copy(pkts[i], mp); - if (p) - dup_bufs[d_pkts++] = p; -@@ -229,7 +361,7 @@ pdump_tx(uint16_t port __rte_unused, uin - static int - pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, - struct rte_ring *ring, struct rte_mempool *mp, -- uint16_t operation) -+ struct pdump_filter *filter, uint16_t operation) - { - uint16_t qid; - struct pdump_rxtx_cbs *cbs = NULL; -@@ -247,6 +379,7 @@ pdump_register_rx_callbacks(uint16_t end - } - cbs->ring = ring; - cbs->mp = mp; -+ cbs->filter = filter; - cbs->cb = rte_eth_add_first_rx_callback(port, qid, - pdump_rx, cbs); - if (cbs->cb == NULL) { -@@ -283,7 +416,7 @@ pdump_register_rx_callbacks(uint16_t end - static int - pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, - struct rte_ring *ring, struct rte_mempool *mp, -- uint16_t operation) -+ struct pdump_filter *filter, uint16_t operation) - { - - uint16_t qid; -@@ -302,6 +435,7 @@ pdump_register_tx_callbacks(uint16_t end - } - cbs->ring = ring; - cbs->mp = mp; -+ cbs->filter = filter; - cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx, - cbs); - if (cbs->cb == NULL) { -@@ -345,6 +479,7 @@ set_pdump_rxtx_cbs(struct pdump_request - uint16_t operation; - struct rte_ring *ring; - struct rte_mempool *mp; -+ struct pdump_filter *filter; - - flags = p->flags; - operation = p->op; -@@ -360,6 +495,7 @@ set_pdump_rxtx_cbs(struct pdump_request - queue = p->data.en_v1.queue; - ring = p->data.en_v1.ring; - mp = p->data.en_v1.mp; -+ filter = p->data.en_v1.filter; - } else { - ret = rte_eth_dev_get_port_by_name(p->data.dis_v1.device, - &port); -@@ -372,6 +508,7 @@ set_pdump_rxtx_cbs(struct pdump_request - queue = p->data.dis_v1.queue; - ring = p->data.dis_v1.ring; - mp = p->data.dis_v1.mp; -+ filter = p->data.dis_v1.filter; - } - - /* validation if packet capture is for all queues */ -@@ -403,7 +540,7 @@ set_pdump_rxtx_cbs(struct pdump_request - if (flags & RTE_PDUMP_FLAG_RX) { - end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1; - ret = pdump_register_rx_callbacks(end_q, port, queue, ring, mp, -- operation); -+ filter, operation); - if (ret < 0) - return ret; - } -@@ -412,7 +549,7 @@ set_pdump_rxtx_cbs(struct pdump_request - if (flags & RTE_PDUMP_FLAG_TX) { - end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1; - ret = pdump_register_tx_callbacks(end_q, port, queue, ring, mp, -- operation); -+ filter, operation); - if (ret < 0) - return ret; - } -diff -uparN dpdk-stable-17.11.6/lib/librte_pdump/rte_pdump.h dpdk-stable-17.11.6-new/lib/librte_pdump/rte_pdump.h ---- dpdk-stable-17.11.6/lib/librte_pdump/rte_pdump.h 2019-05-22 03:15:57.000000000 +0800 -+++ dpdk-stable-17.11.6-new/lib/librte_pdump/rte_pdump.h 2020-08-19 19:12:09.012111071 +0800 -@@ -44,6 +44,8 @@ - #include - #include - #include -+#include -+#include - - #ifdef __cplusplus - extern "C" { -@@ -63,6 +65,31 @@ enum rte_pdump_socktype { - RTE_PDUMP_SOCKET_CLIENT = 2 - }; - -+union addr { -+ struct in_addr in; -+ struct in6_addr in6; -+}; -+ -+struct pdump_filter { -+ int af; -+ union addr s_addr; //s_addr or dst_addr -+ union addr d_addr; //s_addr or dst_addr -+ union addr host_addr; //s_addr or dst_addr -+ -+ uint8_t proto; -+ uint16_t proto_port; -+ uint16_t s_port; -+ uint16_t d_port; -+}; -+ -+struct vlan_eth_hdr { -+ unsigned char h_dest[ETH_ALEN]; -+ unsigned char h_source[ETH_ALEN]; -+ unsigned short h_vlan_proto; -+ unsigned short h_vlan_TCI; -+ unsigned short h_vlan_encapsulated_proto; -+}; -+ - /** - * Initialize packet capturing handling - * -diff -uparN dpdk-stable-17.11.6/mk/rte.pdump.mk dpdk-stable-17.11.6-new/mk/rte.pdump.mk ---- dpdk-stable-17.11.6/mk/rte.pdump.mk 1970-01-01 08:00:00.000000000 +0800 -+++ dpdk-stable-17.11.6-new/mk/rte.pdump.mk 2020-08-19 19:12:09.012111071 +0800 -@@ -0,0 +1,349 @@ -+# BSD LICENSE -+# -+# Copyright(c) 2010-2017 Intel Corporation. All rights reserved. -+# Copyright(c) 2014-2015 6WIND S.A. -+# All rights reserved. -+# -+# Redistribution and use in source and binary forms, with or without -+# modification, are permitted provided that the following conditions -+# are met: -+# -+# * Redistributions of source code must retain the above copyright -+# notice, this list of conditions and the following disclaimer. -+# * Redistributions in binary form must reproduce the above copyright -+# notice, this list of conditions and the following disclaimer in -+# the documentation and/or other materials provided with the -+# distribution. -+# * Neither the name of Intel Corporation nor the names of its -+# contributors may be used to endorse or promote products derived -+# from this software without specific prior written permission. -+# -+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+include $(RTE_SDK)/mk/internal/rte.compile-pre.mk -+include $(RTE_SDK)/mk/internal/rte.install-pre.mk -+include $(RTE_SDK)/mk/internal/rte.clean-pre.mk -+include $(RTE_SDK)/mk/internal/rte.build-pre.mk -+ -+# VPATH contains at least SRCDIR -+VPATH += $(SRCDIR) -+ -+_BUILD = $(APP) -+_INSTALL = $(INSTALL-FILES-y) $(SYMLINK-FILES-y) -+_INSTALL += $(RTE_OUTPUT)/app/$(APP) $(RTE_OUTPUT)/app/$(APP).map -+POSTINSTALL += target-appinstall -+_CLEAN = doclean -+POSTCLEAN += target-appclean -+ -+ifeq ($(NO_LDSCRIPT),) -+LDSCRIPT = $(RTE_LDSCRIPT) -+endif -+ -+# Link only the libraries used in the application -+LDFLAGS += --as-needed -+ -+# default path for libs -+_LDLIBS-y += -L$(RTE_SDK_BIN)/lib -+ -+# -+# Order is important: from higher level to lower level -+# -+_LDLIBS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += -lrte_flow_classify -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PIPELINE) += -lrte_pipeline -+_LDLIBS-$(CONFIG_RTE_LIBRTE_TABLE) += -lrte_table -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PORT) += -lrte_port -+ -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PDUMP) += -lrte_pdump -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += -lrte_distributor -+_LDLIBS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += -lrte_ip_frag -+_LDLIBS-$(CONFIG_RTE_LIBRTE_GRO) += -lrte_gro -+_LDLIBS-$(CONFIG_RTE_LIBRTE_GSO) += -lrte_gso -+_LDLIBS-$(CONFIG_RTE_LIBRTE_METER) += -lrte_meter -+_LDLIBS-$(CONFIG_RTE_LIBRTE_LPM) += -lrte_lpm -+# librte_acl needs --whole-archive because of weak functions -+_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --whole-archive -+_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --no-whole-archive -+_LDLIBS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += -lrte_jobstats -+_LDLIBS-$(CONFIG_RTE_LIBRTE_METRICS) += -lrte_metrics -+_LDLIBS-$(CONFIG_RTE_LIBRTE_BITRATE) += -lrte_bitratestats -+_LDLIBS-$(CONFIG_RTE_LIBRTE_LATENCY_STATS) += -lrte_latencystats -+_LDLIBS-$(CONFIG_RTE_LIBRTE_POWER) += -lrte_power -+ -+_LDLIBS-$(CONFIG_RTE_LIBRTE_TIMER) += -lrte_timer -+_LDLIBS-$(CONFIG_RTE_LIBRTE_EFD) += -lrte_efd -+ -+_LDLIBS-y += --whole-archive -+ -+_LDLIBS-$(CONFIG_RTE_LIBRTE_CFGFILE) += -lrte_cfgfile -+_LDLIBS-$(CONFIG_RTE_LIBRTE_HASH) += -lrte_hash -+_LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lrte_vhost -+_LDLIBS-$(CONFIG_RTE_LIBRTE_KVARGS) += -lrte_kvargs -+_LDLIBS-$(CONFIG_RTE_LIBRTE_MBUF) += -lrte_mbuf -+_LDLIBS-$(CONFIG_RTE_LIBRTE_NET) += -lrte_net -+_LDLIBS-$(CONFIG_RTE_LIBRTE_ETHER) += -lrte_ethdev -+_LDLIBS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += -lrte_cryptodev -+_LDLIBS-$(CONFIG_RTE_LIBRTE_SECURITY) += -lrte_security -+_LDLIBS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += -lrte_mempool -+_LDLIBS-$(CONFIG_RTE_DRIVER_MEMPOOL_RING) += -lrte_mempool_ring -+_LDLIBS-$(CONFIG_RTE_LIBRTE_RING) += -lrte_ring -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PCI) += -lrte_pci -+_LDLIBS-$(CONFIG_RTE_LIBRTE_EAL) += -lrte_eal -+_LDLIBS-$(CONFIG_RTE_LIBRTE_CMDLINE) += -lrte_cmdline -+_LDLIBS-$(CONFIG_RTE_LIBRTE_SCHED) += -lrte_sched -+ -+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI) += -lrte_kni -+endif -+ -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += -lrte_bus_pci -+_LDLIBS-$(CONFIG_RTE_LIBRTE_VDEV_BUS) += -lrte_bus_vdev -+ -+ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n) -+# plugins (link only if static libraries) -+ -+_LDLIBS-$(CONFIG_RTE_DRIVER_MEMPOOL_STACK) += -lrte_mempool_stack -+ -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += -lrte_pmd_af_packet -+_LDLIBS-$(CONFIG_RTE_LIBRTE_ARK_PMD) += -lrte_pmd_ark -+_LDLIBS-$(CONFIG_RTE_LIBRTE_AVP_PMD) += -lrte_pmd_avp -+_LDLIBS-$(CONFIG_RTE_LIBRTE_BNX2X_PMD) += -lrte_pmd_bnx2x -lz -+_LDLIBS-$(CONFIG_RTE_LIBRTE_BNXT_PMD) += -lrte_pmd_bnxt -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += -lrte_pmd_bond -+_LDLIBS-$(CONFIG_RTE_LIBRTE_CXGBE_PMD) += -lrte_pmd_cxgbe -+ifeq ($(CONFIG_RTE_LIBRTE_DPAA_BUS),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA_BUS) += -lrte_bus_dpaa -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA_MEMPOOL) += -lrte_mempool_dpaa -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA_PMD) += -lrte_pmd_dpaa -+endif -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += -lrte_pmd_dpaa2 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += -lrte_pmd_e1000 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += -lrte_pmd_ena -+_LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += -lrte_pmd_enic -+_LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += -lrte_pmd_fm10k -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += -lrte_pmd_failsafe -+_LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += -lrte_pmd_i40e -+_LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += -lrte_pmd_ixgbe -+ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += -lrte_pmd_kni -+endif -+_LDLIBS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += -lrte_pmd_lio -+_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += -lrte_pmd_mlx4 -libverbs -lmlx4 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += -lrte_pmd_mlx5 -libverbs -lmlx5 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_MRVL_PMD) += -lrte_pmd_mrvl -L$(LIBMUSDK_PATH)/lib -lmusdk -+_LDLIBS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += -lrte_pmd_nfp -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_NULL) += -lrte_pmd_null -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += -lrte_pmd_pcap -lpcap -+_LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += -lrte_pmd_qede -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING) += -lrte_pmd_ring -+ifeq ($(CONFIG_RTE_LIBRTE_SCHED),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += -lrte_pmd_softnic -+endif -+_LDLIBS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += -lrte_pmd_sfc_efx -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += -lrte_pmd_szedata2 -lsze2 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += -lrte_pmd_tap -+_LDLIBS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += -lrte_pmd_thunderx_nicvf -+_LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += -lrte_pmd_virtio -+ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += -lrte_pmd_vhost -+endif # $(CONFIG_RTE_LIBRTE_VHOST) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += -lrte_pmd_vmxnet3_uio -+ -+ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB) += -lrte_pmd_aesni_mb -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB) += -L$(AESNI_MULTI_BUFFER_LIB_PATH) -lIPSec_MB -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_GCM) += -lrte_pmd_aesni_gcm -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_GCM) += -L$(AESNI_MULTI_BUFFER_LIB_PATH) -lIPSec_MB -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_OPENSSL) += -lrte_pmd_openssl -lcrypto -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_NULL_CRYPTO) += -lrte_pmd_null_crypto -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_QAT) += -lrte_pmd_qat -lcrypto -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SNOW3G) += -lrte_pmd_snow3g -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SNOW3G) += -L$(LIBSSO_SNOW3G_PATH)/build -lsso_snow3g -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KASUMI) += -lrte_pmd_kasumi -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KASUMI) += -L$(LIBSSO_KASUMI_PATH)/build -lsso_kasumi -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_ZUC) += -lrte_pmd_zuc -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_ZUC) += -L$(LIBSSO_ZUC_PATH)/build -lsso_zuc -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO) += -lrte_pmd_armv8 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO) += -L$(ARMV8_CRYPTO_LIB_PATH) -larmv8_crypto -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_MRVL_CRYPTO) += -L$(LIBMUSDK_PATH)/lib -lrte_pmd_mrvl_crypto -lmusdk -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_CRYPTO_SCHEDULER) += -lrte_pmd_crypto_scheduler -+ifeq ($(CONFIG_RTE_LIBRTE_FSLMC_BUS),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC) += -lrte_pmd_dpaa2_sec -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC) += -lrte_mempool_dpaa2 -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC) += -lrte_bus_fslmc -+endif # CONFIG_RTE_LIBRTE_FSLMC_BUS -+ -+ifeq ($(CONFIG_RTE_LIBRTE_DPAA_BUS),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DPAA_SEC) += -lrte_bus_dpaa -+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DPAA_SEC) += -lrte_pmd_dpaa_sec -+endif # CONFIG_RTE_LIBRTE_DPAA_BUS -+ -+endif # CONFIG_RTE_LIBRTE_CRYPTODEV -+ -+ifeq ($(CONFIG_RTE_LIBRTE_DPAA2_PMD),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += -lrte_bus_fslmc -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += -lrte_mempool_dpaa2 -+endif # CONFIG_RTE_LIBRTE_DPAA2_PMD -+ -+endif # !CONFIG_RTE_BUILD_SHARED_LIBS -+ -+_LDLIBS-y += --no-whole-archive -+ -+ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n) -+# The static libraries do not know their dependencies. -+# So linking with static library requires explicit dependencies. -+_LDLIBS-$(CONFIG_RTE_LIBRTE_EAL) += -lrt -+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP)$(CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES),yy) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_EAL) += -lnuma -+endif -+_LDLIBS-$(CONFIG_RTE_LIBRTE_SCHED) += -lm -+_LDLIBS-$(CONFIG_RTE_LIBRTE_SCHED) += -lrt -+_LDLIBS-$(CONFIG_RTE_LIBRTE_MEMBER) += -lm -+_LDLIBS-$(CONFIG_RTE_LIBRTE_METER) += -lm -+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y) -+_LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lnuma -+endif -+_LDLIBS-$(CONFIG_RTE_PORT_PCAP) += -lpcap -+endif # !CONFIG_RTE_BUILD_SHARED_LIBS -+ -+_LDLIBS-y += $(EXECENV_LDLIBS) -+ -+LDLIBS += $(_LDLIBS-y) $(CPU_LDLIBS) $(EXTRA_LDLIBS) -+ -+# all the words except the first one -+allbutfirst = $(wordlist 2,$(words $(1)),$(1)) -+ -+# Eliminate duplicates without sorting, only keep the last occurrence -+filter-libs = \ -+ $(if $(1),$(strip\ -+ $(if \ -+ $(and \ -+ $(filter $(firstword $(1)),$(call allbutfirst,$(1))),\ -+ $(filter -l%,$(firstword $(1)))),\ -+ ,\ -+ $(firstword $(1))) \ -+ $(call filter-libs,$(call allbutfirst,$(1))))) -+ -+LDLIBS := $(call filter-libs,$(LDLIBS)) -+ -+ifeq ($(RTE_DEVEL_BUILD)$(CONFIG_RTE_BUILD_SHARED_LIB),yy) -+LDFLAGS += -rpath=$(RTE_SDK_BIN)/lib -+endif -+ -+MAPFLAGS = -Map=$@.map --cref -+ -+.PHONY: all -+all: install -+ -+.PHONY: install -+install: build _postinstall -+ -+_postinstall: build -+ -+.PHONY: build -+build: _postbuild -+ -+exe2cmd = $(strip $(call dotfile,$(patsubst %,%.cmd,$(1)))) -+ -+ifeq ($(LINK_USING_CC),1) -+O_TO_EXE = $(CC) -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $(OBJS-y) $(call linkerprefix, \ -+ $(LDLIBS) $(LDFLAGS) $(LDFLAGS_$(@)) $(EXTRA_LDFLAGS) \ -+ $(MAPFLAGS)) -+else -+O_TO_EXE = $(LD) -o $@ $(OBJS-y) \ -+ $(LDLIBS) $(LDFLAGS) $(LDFLAGS_$(@)) $(EXTRA_LDFLAGS) \ -+ $(MAPFLAGS) -+endif -+O_TO_EXE_STR = $(subst ','\'',$(O_TO_EXE)) #'# fix syntax highlight -+O_TO_EXE_DISP = $(if $(V),"$(O_TO_EXE_STR)"," LD $(@)") -+O_TO_EXE_CMD = "cmd_$@ = $(O_TO_EXE_STR)" -+O_TO_EXE_DO = @set -e; \ -+ echo $(O_TO_EXE_DISP); \ -+ $(O_TO_EXE) && \ -+ echo $(O_TO_EXE_CMD) > $(call exe2cmd,$(@)) -+ -+-include .$(APP).cmd -+ -+# path where libraries are retrieved -+LDLIBS_PATH := $(subst -Wl$(comma)-L,,$(filter -Wl$(comma)-L%,$(LDLIBS))) -+LDLIBS_PATH += $(subst -L,,$(filter -L%,$(LDLIBS))) -+ -+# list of .a files that are linked to this application -+LDLIBS_NAMES := $(patsubst -l%,lib%.a,$(filter -l%,$(LDLIBS))) -+LDLIBS_NAMES += $(patsubst -Wl$(comma)-l%,lib%.a,$(filter -Wl$(comma)-l%,$(LDLIBS))) -+ -+# list of found libraries files (useful for deps). If not found, the -+# library is silently ignored and dep won't be checked -+LDLIBS_FILES := $(sort $(wildcard $(foreach dir,$(LDLIBS_PATH),\ -+ $(addprefix $(dir)/,$(LDLIBS_NAMES))))) -+ -+# -+# Compile executable file if needed -+# -+$(APP): $(OBJS-y) $(LDLIBS_FILES) $(DEP_$(APP)) $(LDSCRIPT) FORCE -+ @[ -d $(dir $@) ] || mkdir -p $(dir $@) -+ $(if $(D),\ -+ @echo -n "$< -> $@ " ; \ -+ echo -n "file_missing=$(call boolean,$(file_missing)) " ; \ -+ echo -n "cmdline_changed=$(call boolean,$(call cmdline_changed,$(O_TO_EXE_STR))) " ; \ -+ echo -n "depfile_missing=$(call boolean,$(depfile_missing)) " ; \ -+ echo "depfile_newer=$(call boolean,$(depfile_newer)) ") -+ $(if $(or \ -+ $(file_missing),\ -+ $(call cmdline_changed,$(O_TO_EXE_STR)),\ -+ $(depfile_missing),\ -+ $(depfile_newer)),\ -+ $(O_TO_EXE_DO)) -+ -+# -+# install app in $(RTE_OUTPUT)/app -+# -+$(RTE_OUTPUT)/app/$(APP): $(APP) -+ @echo " INSTALL-APP $(APP)" -+ @[ -d $(RTE_OUTPUT)/app ] || mkdir -p $(RTE_OUTPUT)/app -+ $(Q)cp -f $(APP) $(RTE_OUTPUT)/app -+ -+# -+# install app map file in $(RTE_OUTPUT)/app -+# -+$(RTE_OUTPUT)/app/$(APP).map: $(APP) -+ @echo " INSTALL-MAP $(APP).map" -+ @[ -d $(RTE_OUTPUT)/app ] || mkdir -p $(RTE_OUTPUT)/app -+ $(Q)cp -f $(APP).map $(RTE_OUTPUT)/app -+ -+# -+# Clean all generated files -+# -+.PHONY: clean -+clean: _postclean -+ $(Q)rm -f $(_BUILD_TARGETS) $(_INSTALL_TARGETS) $(_CLEAN_TARGETS) -+ -+.PHONY: doclean -+doclean: -+ $(Q)rm -rf $(APP) $(OBJS-all) $(DEPS-all) $(DEPSTMP-all) \ -+ $(CMDS-all) $(INSTALL-FILES-all) .$(APP).cmd $(APP).map -+ -+ -+include $(RTE_SDK)/mk/internal/rte.compile-post.mk -+include $(RTE_SDK)/mk/internal/rte.install-post.mk -+include $(RTE_SDK)/mk/internal/rte.clean-post.mk -+include $(RTE_SDK)/mk/internal/rte.build-post.mk -+ -+ifneq ($(wildcard $(RTE_SDK)/mk/target/$(RTE_TARGET)/rte.app.mk),) -+include $(RTE_SDK)/mk/target/$(RTE_TARGET)/rte.app.mk -+else -+include $(RTE_SDK)/mk/target/generic/rte.app.mk -+endif -+ -+.PHONY: FORCE -+FORCE: -+ diff --git a/patch/dpdk-stable-17.11.6/enable-dpdk-eal-memory-debug.patch b/patch/dpdk-stable-17.11.6/enable-dpdk-eal-memory-debug.patch deleted file mode 100644 index 96c4940b8..000000000 --- a/patch/dpdk-stable-17.11.6/enable-dpdk-eal-memory-debug.patch +++ /dev/null @@ -1,68 +0,0 @@ -From e21a4e12c4e8dd60a68041a7b52f07e9e68053ff Mon Sep 17 00:00:00 2001 -From: wencyu -Date: Mon, 6 Jan 2020 20:20:17 +0800 -Subject: [PATCH] debug: enable dpdk eal memory debug - ---- - config/common_base | 2 +- - lib/librte_eal/common/include/rte_malloc.h | 15 +++++++++++++++ - lib/librte_eal/common/rte_malloc.c | 4 ++++ - 3 files changed, 20 insertions(+), 1 deletion(-) - -diff --git a/config/common_base b/config/common_base -index 31f50b4..4cb1957 100644 ---- a/config/common_base -+++ b/config/common_base -@@ -103,7 +103,7 @@ CONFIG_RTE_EAL_ALLOW_INV_SOCKET_ID=n - CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n - CONFIG_RTE_EAL_IGB_UIO=n - CONFIG_RTE_EAL_VFIO=n --CONFIG_RTE_MALLOC_DEBUG=n -+CONFIG_RTE_MALLOC_DEBUG=y - CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES=n - CONFIG_RTE_USE_LIBBSD=n - -diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h -index 5d4c11a..0d3833e 100644 ---- a/lib/librte_eal/common/include/rte_malloc.h -+++ b/lib/librte_eal/common/include/rte_malloc.h -@@ -242,6 +242,21 @@ struct rte_malloc_socket_stats { - rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket); - - /** -+ * Check the header/tailer cookies of memory pointed to by the provided pointer. -+ * -+ * This pointer must have been returned by a previous call to -+ * rte_malloc(), rte_zmalloc(), rte_calloc() or rte_realloc(). -+ * -+ * @param ptr -+ * The pointer to memory to be checked. -+ * @return -+ * - true if the header/tailer cookies are OK. -+ * - Otherwise, false. -+ */ -+int -+rte_memmory_ok(void *ptr); -+ -+/** - * Frees the memory space pointed to by the provided pointer. - * - * This pointer must have been returned by a previous call to -diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c -index fe2278b..81d22f0 100644 ---- a/lib/librte_eal/common/rte_malloc.c -+++ b/lib/librte_eal/common/rte_malloc.c -@@ -53,6 +53,10 @@ - #include "malloc_elem.h" - #include "malloc_heap.h" - -+int rte_memmory_ok(void *addr) -+{ -+ return malloc_elem_cookies_ok(RTE_PTR_SUB(addr, MALLOC_ELEM_HEADER_LEN)); -+} - - /* Free the memory space back to heap */ - void rte_free(void *addr) --- -1.8.3.1 - diff --git a/patch/dpdk-stable-18.11.2/0001-add-debug-log-for-ixgbe-fdir-setting.patch b/patch/dpdk-stable-18.11.2/0001-add-debug-log-for-ixgbe-fdir-setting.patch deleted file mode 100644 index 69293f320..000000000 --- a/patch/dpdk-stable-18.11.2/0001-add-debug-log-for-ixgbe-fdir-setting.patch +++ /dev/null @@ -1,53 +0,0 @@ -From ecd84bd29fd7eff2b8db4c04e92224929322a51f Mon Sep 17 00:00:00 2001 -From: wencyu -Date: Tue, 29 Sep 2020 14:42:52 +0800 -Subject: [PATCH 1/2] add debug log for ixgbe fdir setting - ---- - drivers/net/ixgbe/ixgbe_fdir.c | 23 +++++++++++++++++++++++ - 1 file changed, 23 insertions(+) - -diff --git a/drivers/net/ixgbe/ixgbe_fdir.c b/drivers/net/ixgbe/ixgbe_fdir.c -index e559f0f..c17642a 100644 ---- a/drivers/net/ixgbe/ixgbe_fdir.c -+++ b/drivers/net/ixgbe/ixgbe_fdir.c -@@ -278,6 +278,14 @@ static void ixgbe_fdir_stats_get(struct rte_eth_dev *dev, - uint32_t fdiripv6m; /* IPv6 source and destination masks. */ - volatile uint32_t *reg; - -+ PMD_INIT_LOG(DEBUG, "%s: tci 0x%04x, ip4src 0x%08x, ip4dst 0x%08x, " -+ "ip6src 0x%04x, ip6dst 0x%04x, src_port 0x%04x, dst_port 0x%04x, " -+ "flex 0x%04x, mac_addr 0x%02x, tunid 0x%08x, tuntype 0x02%x\n", __func__, -+ info->mask.vlan_tci_mask, info->mask.src_ipv4_mask, info->mask.dst_ipv4_mask, -+ info->mask.src_ipv6_mask, info->mask.dst_ipv6_mask, info->mask.src_port_mask, -+ info->mask.dst_port_mask, info->mask.flex_bytes_mask, info->mask.mac_addr_byte_mask, -+ info->mask.tunnel_id_mask, info->mask.tunnel_type_mask); -+ - PMD_INIT_FUNC_TRACE(); - - /* -@@ -1242,6 +1250,21 @@ static void ixgbe_fdir_stats_get(struct rte_eth_dev *dev, - struct ixgbe_fdir_filter *node; - bool add_node = FALSE; - -+ PMD_DRV_LOG(DEBUG, "%s: ixgbe_fdir_rule: b_spec %d, b_mask %d, mode %d, flags 0x%08x, softid %d, " -+ "queue %d, flex_off %d....ixgbe_fdir: vm_pool %d, flow_type %d, vlan_id %d, dst_ip 0x%8x, src_ip " -+ "0x%8x, inner_mac %02x:%02x:%02x:%02x:%02x:%02x, tuntype 0x%4x, tni_vni 0x%08x, src_port 0x%04x, " -+ "dst_port 0x%04x, flexbytes %d, bkt_hash %d\n", __func__, rule->b_spec, rule->b_mask, -+ rule->mode, rule->fdirflags, rule->soft_id, rule->queue, rule->flex_bytes_offset, -+ rule->ixgbe_fdir.formatted.vm_pool, rule->ixgbe_fdir.formatted.flow_type, -+ rule->ixgbe_fdir.formatted.vlan_id, -+ *((uint32_t *)&rule->ixgbe_fdir.formatted.dst_ip[0]), -+ *((uint32_t *)&rule->ixgbe_fdir.formatted.src_ip[0]), -+ rule->ixgbe_fdir.formatted.inner_mac[0], rule->ixgbe_fdir.formatted.inner_mac[1], -+ rule->ixgbe_fdir.formatted.inner_mac[2], rule->ixgbe_fdir.formatted.inner_mac[3], -+ rule->ixgbe_fdir.formatted.inner_mac[4], rule->ixgbe_fdir.formatted.inner_mac[5], -+ rule->ixgbe_fdir.formatted.tunnel_type, rule->ixgbe_fdir.formatted.tni_vni, -+ rule->ixgbe_fdir.formatted.src_port, rule->ixgbe_fdir.formatted.dst_port, -+ rule->ixgbe_fdir.formatted.flex_bytes, rule->ixgbe_fdir.formatted.bkt_hash); - if (fdir_mode == RTE_FDIR_MODE_NONE || - fdir_mode != rule->mode) - return -ENOTSUP; --- -1.8.3.1 - diff --git a/patch/dpdk-stable-18.11.2/0001-kni-use-netlink-event-for-multicast-driver-part.patch b/patch/dpdk-stable-18.11.2/0001-kni-use-netlink-event-for-multicast-driver-part.patch deleted file mode 100644 index 0bc548181..000000000 --- a/patch/dpdk-stable-18.11.2/0001-kni-use-netlink-event-for-multicast-driver-part.patch +++ /dev/null @@ -1,124 +0,0 @@ -From 76eb58e26ec25be8e0d281085a3c0274d9abfede Mon Sep 17 00:00:00 2001 -From: ywc689 -Date: Fri, 28 Jun 2019 16:52:24 +0800 -Subject: [PATCH 1/7] kni: use netlink event for multicast (driver part) - -kni driver send netlink event every time hw-multicast list updated by -kernel, the user kni app should capture the event and update multicast -to kni device. - -original way is using rte_kni_request to pass hw-multicast to user kni -module. that method works but finally memory corruption found, which is -to kni device. ---- - kernel/linux/kni/kni_net.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 70 insertions(+) - -diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c -index 7371b6d..edc1416 100644 ---- a/kernel/linux/kni/kni_net.c -+++ b/kernel/linux/kni/kni_net.c -@@ -16,6 +16,8 @@ - #include - #include - #include -+#include -+#include - - #include - #include -@@ -103,6 +105,7 @@ - ret_val = wait_event_interruptible_timeout(kni->wq, - kni_fifo_count(kni->resp_q), 3 * HZ); - if (signal_pending(current) || ret_val <= 0) { -+ pr_err("%s: wait_event_interruptible timeout\n", __func__); - ret = -ETIME; - goto fail; - } -@@ -605,9 +608,75 @@ void kni_net_release_fifo_phy(struct kni_dev *kni) - return -EOPNOTSUPP; - } - -+static size_t -+kni_nlmsg_size(void) -+{ -+ return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) -+ + nla_total_size(4) /* IFA_ADDRESS */ -+ + nla_total_size(4) /* IFA_LOCAL */ -+ + nla_total_size(4) /* IFA_BROADCAST */ -+ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ -+ + nla_total_size(4) /* IFA_FLAGS */ -+ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ -+} -+ - static void - kni_net_set_rx_mode(struct net_device *dev) - { -+ /* -+ * send event to notify user (DPDK KNI app) that multicast list changed, -+ * so that it can monitor multicast join/leave and set HW mc-addrs to -+ * kni dev accordinglly. -+ * -+ * this event is just an notification, we do not save any mc-addr here -+ * (so attribute space for us). user kni app should get maddrs after -+ * receive this notification. -+ * -+ * I was expecting kernel send some rtnl event for multicast join/leave, -+ * but it doesn't. By checking the call-chain of SIOCADDMULTI (ip maddr, -+ * manages only hardware multicast) and IP_ADD_MEMBERSHIP (ip_mc_join_group, -+ * used to for IPv4 multicast), no rtnl event sent. -+ * -+ * so as workaround, modify kni driver here to send RTM_NEWADDR. -+ * it may not suitalbe to use this event for mcast, but that should works. -+ * hope that won't affect other listener to this event. -+ * -+ * previous solution was using rte_kni_request to pass hw-maddr list to user. -+ * it "works" for times but finally memory corruption found, which is -+ * not easy to address (lock was added and reviewed). That's why we use -+ * netlink event instead. -+ */ -+ struct sk_buff *skb; -+ struct net *net = dev_net(dev); -+ struct nlmsghdr *nlh; -+ struct ifaddrmsg *ifm; -+ -+ skb = nlmsg_new(kni_nlmsg_size(), GFP_ATOMIC); -+ if (!skb) -+ return; -+ -+ /* no other event for us ? */ -+ nlh = nlmsg_put(skb, 0, 0, RTM_NEWADDR, sizeof(*ifm), 0); -+ if (!nlh) { -+ kfree_skb(skb); -+ return; -+ } -+ -+ /* just send an notification so no other info */ -+ ifm = nlmsg_data(nlh); -+ memset(ifm, 0, sizeof(*ifm)); -+ ifm->ifa_family = AF_UNSPEC; -+ ifm->ifa_prefixlen = 0; -+ ifm->ifa_flags = 0; -+ ifm->ifa_scope = RT_SCOPE_NOWHERE; -+ ifm->ifa_index = 0; -+ -+ nlmsg_end(skb, nlh); -+ -+ /* other group ? */ -+ pr_debug("%s: rx-mode/multicast-list changed\n", __func__); -+ rtnl_notify(skb, net, 0, RTNLGRP_NOTIFY, NULL, GFP_ATOMIC); -+ return; - } - - static int -@@ -727,6 +796,7 @@ void kni_net_release_fifo_phy(struct kni_dev *kni) - kni = netdev_priv(netdev); - ret = kni_net_process_request(kni, &req); - -+ pr_info("%s request returns %d!\n", __func__, ret); - return (ret == 0 ? req.result : ret); - } - --- -1.8.3.1 - diff --git a/patch/dpdk-stable-18.11.2/0002-Patch-ixgbe-fdir-rte_flow-for-DPVS.patch b/patch/dpdk-stable-18.11.2/0002-Patch-ixgbe-fdir-rte_flow-for-DPVS.patch deleted file mode 100644 index dd5d1c39a..000000000 --- a/patch/dpdk-stable-18.11.2/0002-Patch-ixgbe-fdir-rte_flow-for-DPVS.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 09a4a420427dda9084669512e7c9c95ebe8586f4 Mon Sep 17 00:00:00 2001 -From: wencyu -Date: Tue, 29 Sep 2020 14:45:08 +0800 -Subject: [PATCH 2/2] Patch ixgbe fdir rte_flow for DPVS. 1. Ignore fdir flow - rule priority attribute. 2. Use different fdir soft-id for flow rules - configured for the same queue. 3. Disable fdir mask settings by rte_flow. 4. - Allow IPv6 to pass flow rule ETH item validation. - ---- - drivers/net/ixgbe/ixgbe_flow.c | 62 ++++++++++++++++++++++++++++++++++-------- - 1 file changed, 51 insertions(+), 11 deletions(-) - -diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c -index f0fafeb..05dd5df 100644 ---- a/drivers/net/ixgbe/ixgbe_flow.c -+++ b/drivers/net/ixgbe/ixgbe_flow.c -@@ -1428,11 +1428,8 @@ const struct rte_flow_action *next_no_void_action( - - /* not supported */ - if (attr->priority) { -- memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); -- rte_flow_error_set(error, EINVAL, -- RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, -- attr, "Not support priority."); -- return -rte_errno; -+ PMD_DRV_LOG(WARNING, "Ixgbe fdir not support flow priority %d (only 0 is supported), " -+ "ignore and continue....\n", attr->priority); - } - - /* check if the first not void action is QUEUE or DROP. */ -@@ -1651,7 +1648,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - * value. So, we need not do anything for the not provided fields later. - */ - memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); -- memset(&rule->mask, 0xFF, sizeof(struct ixgbe_hw_fdir_mask)); -+ memset(&rule->mask, 0, sizeof(struct ixgbe_hw_fdir_mask)); /* mask default zero */ - rule->mask.vlan_tci_mask = 0; - rule->mask.flex_bytes_mask = 0; - -@@ -1769,6 +1766,8 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - } - } else { - if (item->type != RTE_FLOW_ITEM_TYPE_IPV4 && -+ /* Signature mode supports IPv6. */ -+ item->type != RTE_FLOW_ITEM_TYPE_IPV6 && - item->type != RTE_FLOW_ITEM_TYPE_VLAN) { - memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); - rte_flow_error_set(error, EINVAL, -@@ -1897,6 +1896,9 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - rule->ixgbe_fdir.formatted.flow_type = - IXGBE_ATR_FLOW_TYPE_IPV6; - -+ /* Update flow rule mode by global param. */ -+ rule->mode = dev->data->dev_conf.fdir_conf.mode; -+ - /** - * 1. must signature match - * 2. not support last -@@ -2757,12 +2759,45 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - return ixgbe_parse_fdir_act_attr(attr, actions, rule, error); - } - -+static inline int -+ixgbe_fdir_rule_patch(struct rte_eth_dev *dev, struct ixgbe_fdir_rule *rule) -+{ -+ static uint32_t softid[IXGBE_MAX_RX_QUEUE_NUM] = { 0 }; -+ -+ if (!rule) -+ return 0; -+ -+ if (!dev || !dev->data) -+ return -EINVAL; -+ if (rule->queue >= IXGBE_MAX_RX_QUEUE_NUM) -+ return -EINVAL; -+ -+ /* Soft-id for different rx-queue should be different. */ -+ rule->soft_id = softid[rule->queue]++; -+ -+ /* Disable mask config from rte_flow. -+ * FIXME: -+ * Ixgbe only supports one global mask, all the masks should be the same. -+ * Generally, fdir masks should be configured globally before port start. -+ * But the rte_flow configures masks at flow creation. So we disable fdir -+ * mask configs in rte_flow and configure it globally when port start. -+ * Refer to `ixgbe_dev_start/ixgbe_fdir_configure` for details. The global -+ * masks are configured into device initially with user specified params. -+ */ -+ rule->b_mask = 0; -+ -+ /* Use user-defined mode. */ -+ rule->mode = dev->data->dev_conf.fdir_conf.mode; -+ -+ return 0; -+} -+ - static int - ixgbe_parse_fdir_filter(struct rte_eth_dev *dev, - const struct rte_flow_attr *attr, - const struct rte_flow_item pattern[], - const struct rte_flow_action actions[], -- struct ixgbe_fdir_rule *rule, -+ struct ixgbe_fdir_rule *rule, bool b_patch, - struct rte_flow_error *error) - { - int ret; -@@ -2796,13 +2831,18 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - rule->ixgbe_fdir.formatted.dst_port != 0)) - return -ENOTSUP; - -- if (fdir_mode == RTE_FDIR_MODE_NONE || -- fdir_mode != rule->mode) -+ if (fdir_mode == RTE_FDIR_MODE_NONE) - return -ENOTSUP; - - if (rule->queue >= dev->data->nb_rx_queues) - return -ENOTSUP; - -+ if (ret) -+ return ret; -+ -+ if (b_patch) -+ return ixgbe_fdir_rule_patch(dev, rule); -+ - return ret; - } - -@@ -3137,7 +3177,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - - memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); - ret = ixgbe_parse_fdir_filter(dev, attr, pattern, -- actions, &fdir_rule, error); -+ actions, &fdir_rule, true, error); - if (!ret) { - /* A mask cannot be deleted. */ - if (fdir_rule.b_mask) { -@@ -3307,7 +3347,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - - memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); - ret = ixgbe_parse_fdir_filter(dev, attr, pattern, -- actions, &fdir_rule, error); -+ actions, &fdir_rule, false, error); - if (!ret) - return 0; - --- -1.8.3.1 - diff --git a/patch/dpdk-stable-18.11.2/0002-net-support-variable-IP-header-len-for-checksum-API.patch b/patch/dpdk-stable-18.11.2/0002-net-support-variable-IP-header-len-for-checksum-API.patch deleted file mode 100644 index 70a2de217..000000000 --- a/patch/dpdk-stable-18.11.2/0002-net-support-variable-IP-header-len-for-checksum-API.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 30a0939aa7c1f2de926b7af1d881144fe8a315bb Mon Sep 17 00:00:00 2001 -From: ywc689 -Date: Fri, 28 Jun 2019 16:27:08 +0800 -Subject: [PATCH 2/7] net: support variable IP header len for checksum API. - -IPv4 checksum APIs use fixe IP header length, it will failed if there is -any IP option. Now calculating header length by "ihl" field, so that we -can support options. ---- - lib/librte_net/rte_ip.h | 13 +++++++------ - 1 file changed, 7 insertions(+), 6 deletions(-) - -diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h -index f9b9090..635bdcc 100644 ---- a/lib/librte_net/rte_ip.h -+++ b/lib/librte_net/rte_ip.h -@@ -252,7 +252,7 @@ struct ipv4_hdr { - rte_ipv4_cksum(const struct ipv4_hdr *ipv4_hdr) - { - uint16_t cksum; -- cksum = rte_raw_cksum(ipv4_hdr, sizeof(struct ipv4_hdr)); -+ cksum = rte_raw_cksum(ipv4_hdr, (ipv4_hdr->version_ihl & 0xf) * 4); - return (cksum == 0xffff) ? cksum : (uint16_t)~cksum; - } - -@@ -294,7 +294,7 @@ struct ipv4_hdr { - } else { - psd_hdr.len = rte_cpu_to_be_16( - (uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) -- - sizeof(struct ipv4_hdr))); -+ - (ipv4_hdr->version_ihl & 0xf) * 4)); - } - return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr)); - } -@@ -317,13 +317,14 @@ struct ipv4_hdr { - rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr) - { - uint32_t cksum; -- uint32_t l3_len, l4_len; -+ uint32_t l3_len, l4_len, iphlen; - - l3_len = rte_be_to_cpu_16(ipv4_hdr->total_length); -- if (l3_len < sizeof(struct ipv4_hdr)) -- return 0; -+ iphlen = (ipv4_hdr->version_ihl & 0xf) * 4; - -- l4_len = l3_len - sizeof(struct ipv4_hdr); -+ if (l3_len < iphlen) -+ return 0; -+ l4_len = l3_len - iphlen; - - cksum = rte_raw_cksum(l4_hdr, l4_len); - cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0); --- -1.8.3.1 - diff --git a/patch/dpdk-stable-18.11.2/0003-driver-kni-enable-flow_item-type-comparsion-in-flow_.patch b/patch/dpdk-stable-18.11.2/0003-driver-kni-enable-flow_item-type-comparsion-in-flow_.patch deleted file mode 100644 index cfaf95059..000000000 --- a/patch/dpdk-stable-18.11.2/0003-driver-kni-enable-flow_item-type-comparsion-in-flow_.patch +++ /dev/null @@ -1,33 +0,0 @@ -From e3d1e01d57ebca7feb9a602a2949661e4351c5ad Mon Sep 17 00:00:00 2001 -From: ywc689 -Date: Fri, 28 Jun 2019 17:02:40 +0800 -Subject: [PATCH 3/7] driver:kni: enable flow_item type comparsion in - flow_fdir_cmp - -the existence is checked before adding/deleting a fdir flow, but -the flow type is not compared in 'flow_fdir_cmp', which resulting -in the failure or unwanted behavior in adding/deleting two same -fdir flows with flow type(such as ipv4 tcp/udp) different only. ---- - drivers/net/mlx5/mlx5_flow.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c -index 222cd81..d99edce 100644 ---- a/drivers/net/mlx5/mlx5_flow.c -+++ b/drivers/net/mlx5/mlx5_flow.c -@@ -2668,6 +2668,11 @@ struct rte_flow * - static int - flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2) - { -+ unsigned i; -+ for (i = 0; i < sizeof(f1->items)/sizeof(f1->items[0]); i++) { -+ if (f1->items[i].type != f2->items[i].type) -+ return 1; -+ } - if (FLOW_FDIR_CMP(f1, f2, attr) || - FLOW_FDIR_CMP(f1, f2, l2) || - FLOW_FDIR_CMP(f1, f2, l2_mask) || --- -1.8.3.1 - diff --git a/patch/dpdk-stable-18.11.2/0004-rm-rte_experimental-attribute-of-rte_memseg_walk.patch b/patch/dpdk-stable-18.11.2/0004-rm-rte_experimental-attribute-of-rte_memseg_walk.patch deleted file mode 100644 index b44843c45..000000000 --- a/patch/dpdk-stable-18.11.2/0004-rm-rte_experimental-attribute-of-rte_memseg_walk.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 5230e76681ade3da2f25a896887ce31fac5ff397 Mon Sep 17 00:00:00 2001 -From: liuchuanqi -Date: Fri, 7 Aug 2020 19:20:57 +0800 -Subject: [PATCH 4/7] rm rte_experimental attribute of rte_memseg_walk - -there is no __rte_experimental attribute in function rte_mempool_walk and rte_memzone_walk of dpdk 18.11, -and there is no __rte_experimental attribute in function rte_memseg_walk of the higher version's dpdk(eg: dpdk 20.05). -so remove it to prevent compilation error when dpdk application calls the function. ---- - lib/librte_eal/common/eal_common_memory.c | 2 +- - lib/librte_eal/common/include/rte_memory.h | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c -index e3ef371..ed442e7 100644 ---- a/lib/librte_eal/common/eal_common_memory.c -+++ b/lib/librte_eal/common/eal_common_memory.c -@@ -606,7 +606,7 @@ unsigned rte_memory_get_nrank(void) - return ret; - } - --int __rte_experimental -+int - rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg) - { - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; -diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h -index d970825..71bee8b 100644 ---- a/lib/librte_eal/common/include/rte_memory.h -+++ b/lib/librte_eal/common/include/rte_memory.h -@@ -227,7 +227,7 @@ typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl, - * 1 if stopped by the user - * -1 if user function reported error - */ --int __rte_experimental -+int - rte_memseg_walk(rte_memseg_walk_t func, void *arg); - - /** --- -1.8.3.1 - diff --git a/patch/dpdk-stable-18.11.2/0005-enable-pdump-and-change-dpdk-pdump-tool-for-dpvs.patch b/patch/dpdk-stable-18.11.2/0005-enable-pdump-and-change-dpdk-pdump-tool-for-dpvs.patch deleted file mode 100644 index 53c027ed4..000000000 --- a/patch/dpdk-stable-18.11.2/0005-enable-pdump-and-change-dpdk-pdump-tool-for-dpvs.patch +++ /dev/null @@ -1,581 +0,0 @@ -From 769e98ca325dee673ff297d5906e5ef3c9632862 Mon Sep 17 00:00:00 2001 -From: yuwenchao -Date: Tue, 8 Jun 2021 09:52:12 +0800 -Subject: [PATCH 5/7] enable pdump and change dpdk pdump tool for dpvs - -Signed-off-by: yuwenchao ---- - app/pdump/main.c | 170 +++++++++++++++++++++++++++++++++++++++++-- - config/common_base | 2 +- - lib/librte_pdump/rte_pdump.c | 146 ++++++++++++++++++++++++++++++++++++- - lib/librte_pdump/rte_pdump.h | 27 +++++++ - 4 files changed, 332 insertions(+), 13 deletions(-) - -diff --git a/app/pdump/main.c b/app/pdump/main.c -index ccf2a1d..3913515 100644 ---- a/app/pdump/main.c -+++ b/app/pdump/main.c -@@ -26,6 +26,8 @@ - #include - #include - #include -+#include -+ - - #define CMD_LINE_OPT_PDUMP "pdump" - #define PDUMP_PORT_ARG "port" -@@ -38,6 +40,14 @@ - #define PDUMP_MSIZE_ARG "mbuf-size" - #define PDUMP_NUM_MBUFS_ARG "total-num-mbufs" - -+#define PDUMP_HOST_ARG "host" -+#define PDUMP_SRC_ARG "src-host" -+#define PDUMP_DST_ARG "dst-host" -+#define PDUMP_PROTO_PORT_AGE "proto-port" -+#define PDUMP_SPORT_ARG "src-port" -+#define PDUMP_DPORT_ARG "dst-port" -+#define PDUMP_PROTO_ARG "proto" -+ - #define VDEV_NAME_FMT "net_pcap_%s_%d" - #define VDEV_PCAP_ARGS_FMT "tx_pcap=%s" - #define VDEV_IFACE_ARGS_FMT "tx_iface=%s" -@@ -91,6 +101,13 @@ enum pdump_by { - PDUMP_RING_SIZE_ARG, - PDUMP_MSIZE_ARG, - PDUMP_NUM_MBUFS_ARG, -+ PDUMP_HOST_ARG, -+ PDUMP_SRC_ARG, -+ PDUMP_DST_ARG, -+ PDUMP_PROTO_PORT_AGE, -+ PDUMP_SPORT_ARG, -+ PDUMP_DPORT_ARG, -+ PDUMP_PROTO_ARG, - NULL - }; - -@@ -124,6 +141,7 @@ struct pdump_tuples { - enum pcap_stream rx_vdev_stream_type; - enum pcap_stream tx_vdev_stream_type; - bool single_pdump_dev; -+ struct pdump_filter *filter; - - /* stats */ - struct pdump_stats stats; -@@ -149,6 +167,11 @@ struct parse_val { - "(queue=)," - "(rx-dev= |" - " tx-dev=," -+ "[host= | src-host= |" -+ "dst-host=]," -+ "[proto=support:tcp/udp/icmp]," -+ "[proto-port= |src-port= |" -+ "dst-port=]," - "[ring-size=default:16384]," - "[mbuf-size=default:2176]," - "[total-num-mbufs=default:65535]'\n", -@@ -235,6 +258,65 @@ struct parse_val { - } - - static int -+parse_host(const char *key __rte_unused, const char *value, void *extra_args) -+{ -+ struct pdump_tuples *pt = extra_args; -+ struct in_addr inaddr; -+ struct in6_addr inaddr6; -+ union addr addr; -+ int af = 0; -+ -+ if (inet_pton(AF_INET6, value, &inaddr6) > 0) { -+ af = AF_INET6; -+ addr.in6 = inaddr6; -+ } else if (inet_pton(AF_INET, value, &inaddr) > 0){ -+ af = AF_INET; -+ addr.in = inaddr; -+ } else { -+ printf("IP address invaled\n"); -+ return -EINVAL; -+ } -+ -+ if (pt->filter && pt->filter->af != 0 && af != pt->filter->af) { -+ printf("IPv4 and IPv6 conflict\n"); -+ return -EINVAL; -+ } else { -+ pt->filter->af = af; -+ } -+ -+ if (!strcmp(key, PDUMP_HOST_ARG)) { -+ rte_memcpy(&pt->filter->host_addr, &addr, sizeof(addr)); -+ } else if (!strcmp(key, PDUMP_SRC_ARG)) { -+ rte_memcpy(&pt->filter->s_addr, &addr, sizeof(addr)); -+ } else if (!strcmp(key, PDUMP_DST_ARG)) { -+ rte_memcpy(&pt->filter->d_addr, &addr, sizeof(addr)); -+ } -+ -+ return 0; -+} -+ -+static int -+parse_proto(const char *key __rte_unused, const char *value, void *extra_args) -+{ -+ struct pdump_tuples *pt = extra_args; -+ -+ if (!strcmp(value, "tcp")) { -+ pt->filter->proto = IPPROTO_TCP; -+ } else if (!strcmp(value, "udp")) { -+ pt->filter->proto = IPPROTO_UDP; -+ } else if (!strcmp(value, "icmp")) { -+ pt->filter->proto = IPPROTO_ICMP; -+ } else { -+ printf("invalid value:\"%s\" for key:\"%s\", " -+ "value must be tcp/udp/icmp\n", value, key); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+ -+static int - parse_pdump(const char *optarg) - { - struct rte_kvargs *kvlist; -@@ -361,6 +443,75 @@ struct parse_val { - } else - pt->total_num_mbufs = MBUFS_PER_POOL; - -+ /* filter parsing and validation */ -+ pt->filter = rte_zmalloc("pdump_filter", -+ sizeof(struct pdump_filter), 0); -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_HOST_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_HOST_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_SRC_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_SRC_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_DST_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_DST_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_PORT_AGE); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_PORT_AGE, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->proto_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_SPORT_ARG); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_SPORT_ARG, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->s_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_DPORT_ARG); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_DPORT_ARG, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->d_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_ARG, -+ &parse_proto, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ - num_tuples++; - - free_kvlist: -@@ -486,6 +637,8 @@ struct parse_val { - rte_ring_free(pt->rx_ring); - if (pt->tx_ring) - rte_ring_free(pt->tx_ring); -+ if (pt->filter) -+ rte_free(pt->filter); - } - } - -@@ -527,6 +680,7 @@ struct parse_val { - } - - } -+ - cleanup_rings(); - } - -@@ -789,20 +943,20 @@ struct parse_val { - pt->queue, - RTE_PDUMP_FLAG_RX, - pt->rx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - ret1 = rte_pdump_enable_by_deviceid( - pt->device_id, - pt->queue, - RTE_PDUMP_FLAG_TX, - pt->tx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - } else if (pt->dump_by_type == PORT_ID) { - ret = rte_pdump_enable(pt->port, pt->queue, - RTE_PDUMP_FLAG_RX, -- pt->rx_ring, pt->mp, NULL); -+ pt->rx_ring, pt->mp, pt->filter); - ret1 = rte_pdump_enable(pt->port, pt->queue, - RTE_PDUMP_FLAG_TX, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - } - } else if (pt->dir == RTE_PDUMP_FLAG_RX) { - if (pt->dump_by_type == DEVICE_ID) -@@ -810,22 +964,22 @@ struct parse_val { - pt->device_id, - pt->queue, - pt->dir, pt->rx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - else if (pt->dump_by_type == PORT_ID) - ret = rte_pdump_enable(pt->port, pt->queue, - pt->dir, -- pt->rx_ring, pt->mp, NULL); -+ pt->rx_ring, pt->mp, pt->filter); - } else if (pt->dir == RTE_PDUMP_FLAG_TX) { - if (pt->dump_by_type == DEVICE_ID) - ret = rte_pdump_enable_by_deviceid( - pt->device_id, - pt->queue, - pt->dir, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - else if (pt->dump_by_type == PORT_ID) - ret = rte_pdump_enable(pt->port, pt->queue, - pt->dir, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - } - if (ret < 0 || ret1 < 0) { - cleanup_pdump_resources(); -diff --git a/config/common_base b/config/common_base -index d12ae98..5ac8d11 100644 ---- a/config/common_base -+++ b/config/common_base -@@ -451,7 +451,7 @@ CONFIG_RTE_LIBRTE_PMD_NULL=y - # - # Compile software PMD backed by PCAP files - # --CONFIG_RTE_LIBRTE_PMD_PCAP=n -+CONFIG_RTE_LIBRTE_PMD_PCAP=y - - # - # Compile example software rings based PMD -diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c -index 6c3a885..971e095 100644 ---- a/lib/librte_pdump/rte_pdump.c -+++ b/lib/librte_pdump/rte_pdump.c -@@ -9,6 +9,10 @@ - #include - #include - #include -+#include -+#include -+#include -+#include - - #include "rte_pdump.h" - -@@ -132,6 +136,133 @@ struct pdump_response { - return m_dup; - } - -+static int -+inet_addr_equal(int af, const union addr *a1, -+ const union addr *a2) -+{ -+ switch (af) { -+ case AF_INET: -+ return a1->in.s_addr == a2->in.s_addr; -+ case AF_INET6: -+ return memcmp(a1->in6.s6_addr, a2->in6.s6_addr, 16) == 0; -+ default: -+ return memcmp(a1, a2, sizeof(union addr)) == 0; -+ } -+} -+ -+static int -+inet_is_addr_any(int af, const union addr *addr) -+{ -+ switch (af) { -+ case AF_INET: -+ return addr->in.s_addr == htonl(INADDR_ANY); -+ case AF_INET6: -+ return IN6_ARE_ADDR_EQUAL(&addr->in6, &in6addr_any); -+ default: -+ return -1; -+ } -+ -+ return -1; -+} -+static int -+pdump_filter(struct rte_mbuf *m, struct pdump_filter *filter) -+{ -+ struct ether_hdr *eth_hdr; -+ struct vlan_eth_hdr *vlan_eth_hdr; -+ union addr s_addr, d_addr; -+ int prepend = 0; -+ uint16_t type = 0; -+ uint16_t iph_len = 0; -+ uint8_t proto = 0; -+ -+ int af; -+ -+ if (filter->af == 0 && filter->s_port == 0 && -+ filter->d_port == 0 && filter->proto == 0 && -+ filter->proto_port == 0) -+ return 0; -+ -+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); -+ -+ if (eth_hdr->ether_type == htons(ETH_P_8021Q)) { -+ prepend += sizeof(struct vlan_eth_hdr); -+ vlan_eth_hdr = rte_pktmbuf_mtod(m, struct vlan_eth_hdr *); -+ type = vlan_eth_hdr->h_vlan_encapsulated_proto; -+ } else { -+ prepend += sizeof(struct ether_hdr); -+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); -+ type = eth_hdr->ether_type; -+ } -+ -+ if (rte_pktmbuf_adj(m, prepend) == NULL) -+ goto prepend; -+ -+ if (type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) { -+ struct arp_hdr *arp = rte_pktmbuf_mtod(m, struct arp_hdr *); -+ af = AF_INET; -+ s_addr.in.s_addr = arp->arp_data.arp_sip; -+ d_addr.in.s_addr = arp->arp_data.arp_tip; -+ } else if (type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { -+ struct ipv4_hdr *ip4 = rte_pktmbuf_mtod(m, struct ipv4_hdr *); -+ af = AF_INET; -+ s_addr.in.s_addr = ip4->src_addr; -+ d_addr.in.s_addr = ip4->dst_addr; -+ proto = ip4->next_proto_id; -+ iph_len = (ip4->version_ihl & 0xf) << 2; -+ } else if (type == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) { -+ struct ipv6_hdr *ip6 = rte_pktmbuf_mtod(m, struct ipv6_hdr *); -+ af = AF_INET6; -+ rte_memcpy(&s_addr.in6, &ip6->src_addr, 16); -+ rte_memcpy(&d_addr.in6, &ip6->dst_addr, 16); -+ proto = ip6->proto; -+ iph_len = sizeof(struct ipv6_hdr); -+ } else { -+ goto prepend; -+ } -+ -+ /*filter*/ -+ if (!inet_is_addr_any(af, &filter->s_addr) && -+ !inet_addr_equal(af, &filter->s_addr, &s_addr)) -+ goto prepend; -+ if (!inet_is_addr_any(af, &filter->d_addr) && -+ !inet_addr_equal(af, &filter->d_addr, &d_addr)) -+ goto prepend; -+ if (!inet_is_addr_any(af, &filter->host_addr) && -+ !inet_addr_equal(af, &filter->host_addr, &s_addr) && -+ !inet_addr_equal(af, &filter->host_addr, &d_addr)) -+ goto prepend; -+ -+ if (filter->proto && filter->proto != proto) -+ goto prepend; -+ -+ if (filter->s_port || filter->d_port || filter->proto_port) { -+ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) -+ goto prepend; -+ struct udp_hdr _uh; -+ const struct udp_hdr *uh; -+ uh = rte_pktmbuf_read(m, iph_len, sizeof(_uh), &_uh); -+ if (uh == NULL) -+ goto prepend; -+ if (filter->s_port && filter->s_port != rte_cpu_to_be_16(uh->src_port)) -+ goto prepend; -+ -+ if (filter->d_port && filter->d_port != rte_cpu_to_be_16(uh->dst_port)) -+ goto prepend; -+ -+ if (filter->proto_port && -+ filter->proto_port != rte_cpu_to_be_16(uh->src_port) && -+ filter->proto_port != rte_cpu_to_be_16(uh->dst_port)) -+ goto prepend; -+ } -+ -+ rte_pktmbuf_prepend(m, prepend); -+ return 0; -+ -+prepend: -+ rte_pktmbuf_prepend(m, prepend); -+ return -1; -+} -+ - static inline void - pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params) - { -@@ -148,6 +279,8 @@ struct pdump_response { - ring = cbs->ring; - mp = cbs->mp; - for (i = 0; i < nb_pkts; i++) { -+ if (pdump_filter(pkts[i], cbs->filter) != 0) -+ continue; - p = pdump_pktmbuf_copy(pkts[i], mp); - if (p) - dup_bufs[d_pkts++] = p; -@@ -184,7 +317,7 @@ struct pdump_response { - static int - pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, - struct rte_ring *ring, struct rte_mempool *mp, -- uint16_t operation) -+ struct pdump_filter *filter, uint16_t operation) - { - uint16_t qid; - struct pdump_rxtx_cbs *cbs = NULL; -@@ -202,6 +335,7 @@ struct pdump_response { - } - cbs->ring = ring; - cbs->mp = mp; -+ cbs->filter = filter; - cbs->cb = rte_eth_add_first_rx_callback(port, qid, - pdump_rx, cbs); - if (cbs->cb == NULL) { -@@ -238,7 +372,7 @@ struct pdump_response { - static int - pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, - struct rte_ring *ring, struct rte_mempool *mp, -- uint16_t operation) -+ struct pdump_filter *filter, uint16_t operation) - { - - uint16_t qid; -@@ -257,6 +391,7 @@ struct pdump_response { - } - cbs->ring = ring; - cbs->mp = mp; -+ cbs->filter = filter; - cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx, - cbs); - if (cbs->cb == NULL) { -@@ -300,6 +435,7 @@ struct pdump_response { - uint16_t operation; - struct rte_ring *ring; - struct rte_mempool *mp; -+ struct pdump_filter *filter; - - flags = p->flags; - operation = p->op; -@@ -315,6 +451,7 @@ struct pdump_response { - queue = p->data.en_v1.queue; - ring = p->data.en_v1.ring; - mp = p->data.en_v1.mp; -+ filter = p->data.en_v1.filter; - } else { - ret = rte_eth_dev_get_port_by_name(p->data.dis_v1.device, - &port); -@@ -327,6 +464,7 @@ struct pdump_response { - queue = p->data.dis_v1.queue; - ring = p->data.dis_v1.ring; - mp = p->data.dis_v1.mp; -+ filter = p->data.dis_v1.filter; - } - - /* validation if packet capture is for all queues */ -@@ -358,7 +496,7 @@ struct pdump_response { - if (flags & RTE_PDUMP_FLAG_RX) { - end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1; - ret = pdump_register_rx_callbacks(end_q, port, queue, ring, mp, -- operation); -+ filter, operation); - if (ret < 0) - return ret; - } -@@ -367,7 +505,7 @@ struct pdump_response { - if (flags & RTE_PDUMP_FLAG_TX) { - end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1; - ret = pdump_register_tx_callbacks(end_q, port, queue, ring, mp, -- operation); -+ filter, operation); - if (ret < 0) - return ret; - } -diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h -index 673a2b0..e9568e0 100644 ---- a/lib/librte_pdump/rte_pdump.h -+++ b/lib/librte_pdump/rte_pdump.h -@@ -15,6 +15,8 @@ - #include - #include - #include -+#include -+#include - - #ifdef __cplusplus - extern "C" { -@@ -34,6 +36,31 @@ enum rte_pdump_socktype { - RTE_PDUMP_SOCKET_CLIENT = 2 - }; - -+union addr { -+ struct in_addr in; -+ struct in6_addr in6; -+}; -+ -+struct pdump_filter { -+ int af; -+ union addr s_addr; -+ union addr d_addr; -+ union addr host_addr; //s_addr or d_addr -+ -+ uint8_t proto; -+ uint16_t proto_port; //s_port or d_port -+ uint16_t s_port; -+ uint16_t d_port; -+}; -+ -+struct vlan_eth_hdr { -+ unsigned char h_dest[ETH_ALEN]; -+ unsigned char h_source[ETH_ALEN]; -+ unsigned short h_vlan_proto; -+ unsigned short h_vlan_TCI; -+ unsigned short h_vlan_encapsulated_proto; -+}; -+ - /** - * Initialize packet capturing handling - * --- -1.8.3.1 - diff --git a/patch/dpdk-stable-18.11.2/0006-enable-dpdk-eal-memory-debug.patch b/patch/dpdk-stable-18.11.2/0006-enable-dpdk-eal-memory-debug.patch deleted file mode 100644 index 3b13fd0bd..000000000 --- a/patch/dpdk-stable-18.11.2/0006-enable-dpdk-eal-memory-debug.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 51aa71b8c000a55e9caac0fe12b216d8bde05ac8 Mon Sep 17 00:00:00 2001 -From: ywc -Date: Mon, 25 Jan 2021 10:27:52 +0800 -Subject: [PATCH 6/7] enable dpdk eal memory debug - ---- - config/common_base | 2 +- - lib/librte_eal/common/include/rte_malloc.h | 15 +++++++++++++++ - lib/librte_eal/common/rte_malloc.c | 4 ++++ - 3 files changed, 20 insertions(+), 1 deletion(-) - -diff --git a/config/common_base b/config/common_base -index 5ac8d11..ef15b0a 100644 ---- a/config/common_base -+++ b/config/common_base -@@ -94,7 +94,7 @@ CONFIG_RTE_EAL_IGB_UIO=n - CONFIG_RTE_EAL_VFIO=n - CONFIG_RTE_MAX_VFIO_GROUPS=64 - CONFIG_RTE_MAX_VFIO_CONTAINERS=64 --CONFIG_RTE_MALLOC_DEBUG=n -+CONFIG_RTE_MALLOC_DEBUG=y - CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES=n - CONFIG_RTE_USE_LIBBSD=n - -diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h -index e0be13c..f3bcdc6 100644 ---- a/lib/librte_eal/common/include/rte_malloc.h -+++ b/lib/librte_eal/common/include/rte_malloc.h -@@ -214,6 +214,21 @@ struct rte_malloc_socket_stats { - rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket); - - /** -+ * Check the header/tailer cookies of memory pointed to by the provided pointer. -+ * -+ * This pointer must have been returned by a previous call to -+ * rte_malloc(), rte_zmalloc(), rte_calloc() or rte_realloc(). -+ * -+ * @param ptr -+ * The pointer to memory to be checked. -+ * @return -+ * - true if the header/tailer cookies are OK. -+ * - Otherwise, false. -+ */ -+int -+rte_memmory_ok(void *ptr); -+ -+/** - * Frees the memory space pointed to by the provided pointer. - * - * This pointer must have been returned by a previous call to -diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c -index 47c2bec..1fab27c 100644 ---- a/lib/librte_eal/common/rte_malloc.c -+++ b/lib/librte_eal/common/rte_malloc.c -@@ -26,6 +26,10 @@ - #include "malloc_heap.h" - #include "eal_memalloc.h" - -+int rte_memmory_ok(void *addr) -+{ -+ return malloc_elem_cookies_ok(RTE_PTR_SUB(addr, MALLOC_ELEM_HEADER_LEN)); -+} - - /* Free the memory space back to heap */ - void rte_free(void *addr) --- -1.8.3.1 - diff --git a/patch/dpdk-stable-18.11.2/0007-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch b/patch/dpdk-stable-18.11.2/0007-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch deleted file mode 100644 index 6fe3edfdc..000000000 --- a/patch/dpdk-stable-18.11.2/0007-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 9b1bcdc0419328b4a88128922567dacbe5630dd0 Mon Sep 17 00:00:00 2001 -From: yuwenchao -Date: Tue, 8 Jun 2021 11:45:11 +0800 -Subject: [PATCH 7/7] Fix bonding mode 4 problem caused by LACP failure. - -The problem is disscussed in Issue #725 of iqiyi/dpvs in detail. -https://github.com/iqiyi/dpvs/issues/725 - -Signed-off-by: yuwenchao ---- - drivers/net/bonding/rte_eth_bond_8023ad.c | 20 +++++++++++--------- - 1 file changed, 11 insertions(+), 9 deletions(-) - -diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c -index 1e6a3fc..6d1cca5 100644 ---- a/drivers/net/bonding/rte_eth_bond_8023ad.c -+++ b/drivers/net/bonding/rte_eth_bond_8023ad.c -@@ -811,7 +811,6 @@ - struct port *port; - struct rte_eth_link link_info; - struct ether_addr slave_addr; -- struct rte_mbuf *lacp_pkt = NULL; - uint16_t slave_id; - uint16_t i; - -@@ -876,6 +875,7 @@ - /* Find LACP packet to this port. Do not check subtype, - * it is done in function that queued packet - */ -+ struct rte_mbuf *lacp_pkt = NULL; - int retval = rte_ring_dequeue(port->rx_ring, - (void **)&lacp_pkt); - -@@ -884,15 +884,17 @@ - - rx_machine_update(internals, slave_id, lacp_pkt); - } else { -- uint16_t rx_count = rte_eth_rx_burst(slave_id, -- internals->mode4.dedicated_queues.rx_qid, -- &lacp_pkt, 1); -- -- if (rx_count == 1) -- bond_mode_8023ad_handle_slow_pkt(internals, -- slave_id, lacp_pkt); -- else -+ uint16_t rx_count, j; -+ struct rte_mbuf *lacp_pkt[16] = { NULL }; -+ -+ rx_count = rte_eth_rx_burst(slave_id, internals->mode4.dedicated_queues.rx_qid, -+ &lacp_pkt[0], sizeof(lacp_pkt)/sizeof(struct rte_mbuf *)); -+ if (rx_count > 0) { -+ for (j = 0; j < rx_count; j++) -+ bond_mode_8023ad_handle_slow_pkt(internals, slave_id, lacp_pkt[j]); -+ } else { - rx_machine_update(internals, slave_id, NULL); -+ } - } - - periodic_machine(internals, slave_id); --- -1.8.3.1 - From c53a255f681d0b15ad13ee0956382d71ff2434ef Mon Sep 17 00:00:00 2001 From: ywc689 Date: Mon, 21 Jun 2021 14:41:18 +0800 Subject: [PATCH 20/41] main: add dpdk version check Signed-off-by: ywc689 --- conf/dpvs.conf.sample | 5 ----- src/main.c | 9 +++++++++ src/netif.c | 4 ++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/conf/dpvs.conf.sample b/conf/dpvs.conf.sample index 069fad9a8..a79622ff2 100644 --- a/conf/dpvs.conf.sample +++ b/conf/dpvs.conf.sample @@ -49,11 +49,6 @@ netif_defs { queue_number 8 descriptor_number 1024 } - fdir { - mode perfect - pballoc 64k - status matched - } ! mtu 1500 ! promisc_mode kni_name dpdk1.kni diff --git a/src/main.c b/src/main.c index 627c4fe30..0efc4728e 100644 --- a/src/main.c +++ b/src/main.c @@ -59,6 +59,13 @@ extern bool g_dpvs_pdump; extern int log_slave_init(void); +static void inline dpdk_version_check(void) +{ +#if RTE_VERSION < RTE_VERSION_NUM(20, 11, 1, 0) + rte_panic("The current DPVS needs dpdk-stable-20.11.1 or higher. " + "Try old releases if you are using earlier dpdk versions."); +#endif +} /* * the initialization order of all the modules @@ -253,6 +260,8 @@ int main(int argc, char *argv[]) char pql_conf_buf[LCORE_CONF_BUFFER_LEN]; int pql_conf_buf_len = LCORE_CONF_BUFFER_LEN; + dpdk_version_check(); + /** * add application agruments parse before EAL ones. * use it like the following: diff --git a/src/netif.c b/src/netif.c index 0e06f51e4..67a26fe17 100644 --- a/src/netif.c +++ b/src/netif.c @@ -264,7 +264,7 @@ static void fdir_mode_handler(vector_t tokens) mode, "perfect"); g_fdir_mode = RTE_FDIR_MODE_PERFECT; } - RTE_LOG(INFO, NETIF, "%s:g_fdir_mode = %s\n", mode); + RTE_LOG(INFO, NETIF, "g_fdir_mode = %s\n", mode); FREE_PTR(str); } @@ -445,7 +445,7 @@ static void kni_name_handler(vector_t tokens) struct port_conf_stream, port_list_node); assert(str); - RTE_LOG(INFO, NETIF, "%s: kni_name = %s\n",current_device->name, str); + RTE_LOG(INFO, NETIF, "%s:kni_name = %s\n",current_device->name, str); strncpy(current_device->kni_name, str, sizeof(current_device->kni_name)); FREE_PTR(str); From 1918017961a0f27d44e1ebca308b3cf59e673658 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Mon, 21 Jun 2021 17:28:20 +0800 Subject: [PATCH 21/41] doc: update docs with dpdk 20.11 Signed-off-by: ywc689 --- README.md | 82 ++++++++++++++++++++++--------------------------- doc/faq.md | 30 ++++++++++++------ doc/tutorial.md | 30 +++++++++--------- 3 files changed, 73 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index 2f1f71aa5..6bddd6c08 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ `DPVS` is a high performance **Layer-4 load balancer** based on [DPDK](http://dpdk.org). It's derived from Linux Virtual Server [LVS](http://www.linuxvirtualserver.org/) and its modification [alibaba/LVS](https://github.com/alibaba/LVS). -> The name `DPVS` comes from "DPDK-LVS". +> Notes: The name `DPVS` comes from "DPDK-LVS". ![dpvs.png](./pic/dpvs.png) @@ -52,7 +52,9 @@ This *quick start* is tested with the environment below. Other environments should also be OK if DPDK works, please check [dpdk.org](http://www.dpdk.org) for more info. * Please check this link for NICs supported by DPDK: http://dpdk.org/doc/nics. -* Note `flow-director` ([fdir](http://dpdk.org/doc/guides/nics/overview.html#id1)) is needed for `FNAT` and `SNAT` mode with multi-cores. +* Note `flow control` ([rte_flow](http://dpdk.org/doc/guides/nics/overview.html#id1)) is needed for `FNAT` and `SNAT` mode with multi-cores. + +> Notes: To let dpvs work properly with multi-cores, rte_flow items must support "ipv4, ipv6, tcp, udp" four items, and rte_flow actions must support "drop, queue" at least. ## Clone DPVS @@ -65,60 +67,49 @@ Well, let's start from DPDK then. ## DPDK setup. -Currently, `dpdk-stable-18.11.2` is recommended for `DPVS`. `dpdk-stable-17.11.2` and `dpdk-stable-17.11.6` are supported until the lifecycle end of DPVS v1.8. +Currently, `dpdk-stable-20.11.1` is recommended for `DPVS`, and we will not support dpdk version earlier than dpdk-20.11 any more. If you are still using earlier dpdk versions, such as `dpdk-stable-17.11.2`, `dpdk-stable-17.11.6` and `dpdk-stable-18.11.2`, please use earlier dpvs releases, such as [v1.8.10](https://github.com/iqiyi/dpvs/releases/tag/v1.8.10). -> You can skip this section if experienced with DPDK, and refer the [link](http://dpdk.org/doc/guides/linux_gsg/index.html) for details. +> Notes: You can skip this section if experienced with DPDK, and refer the [link](http://dpdk.org/doc/guides/linux_gsg/index.html) for details. ```bash -$ wget https://fast.dpdk.org/rel/dpdk-18.11.2.tar.xz # download from dpdk.org if link failed. -$ tar xf dpdk-18.11.2.tar.xz +$ wget https://fast.dpdk.org/rel/dpdk-20.11.1.tar.xz # download from dpdk.org if link failed. +$ tar xf dpdk-20.11.1.tar.xz ``` ### DPDK patchs There are some patches for DPDK to support extra features needed by DPVS. Apply them if needed. For example, there's a patch for DPDK `kni` driver for hardware multicast, apply it if you are to launch `ospfd` on `kni` device. -> Assuming we are in DPVS root directory and dpdk-stable-18.11.2 is under it, please note it's not mandatory, just for convenience. +> Notes: Assuming we are in DPVS root directory and dpdk-stable-20.11.1 is under it, please note it's not mandatory, just for convenience. ``` $ cd -$ cp patch/dpdk-stable-18.11.2/*.patch dpdk-stable-18.11.2/ -$ cd dpdk-stable-18.11.2/ +$ cp patch/dpdk-stable-20.11.1/*.patch dpdk-stable-20.11.1/ +$ cd dpdk-stable-20.11.1/ $ patch -p1 < 0001-kni-use-netlink-event-for-multicast-driver-part.patch -$ patch -p1 < 0002-net-support-variable-IP-header-len-for-checksum-API.patch +$ patch -p1 < 0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch $ ... ``` -> It's advised to patch all if your are not sure about what they are meant for. +> Tips: It's advised to patch all if your are not sure about what they are meant for. ### DPDK build and install -1. Using meson-ninja for building DPDK libraries. The `dpdk.mk` checks for presence of libdpdk +Use meson-ninja to build DPDK libraries, and export environment variable `PKG_CONFIG_PATH` for DPDK app (DPVS). The `dpdk.mk` in DPVS checks the presence of libdpdk. ```bash -$ cd dpdk-stable-18.11.11 -$ mkdir [user desired install folder - dpdklib] -$ mkdir [user desired build folder - dpdkbuild] -$ meson -Dprefix=[dpdklib] dpdkbuild +$ cd dpdk-stable-20.11.1 +$ mkdir dpdklib # user desired install folder +$ mkdir dpdkbuild # user desired build folder +$ meson -Denable_kmods=true -Dprefix=dpdklib dpdkbuild $ ninja -C dpdkbuild $ cd dpdkbuild; ninja install -$ export PKG_CONFIG_PATH=${PKG_CONFIG_PATH:+$PKG_CONFIG_PATH:}$(pwd)/../dpdklib/lib64/pkgconfig/libdpdk.pc -``` - -2. Using Makefile for build DPDK linraries and export `RTE_SDK` env variable for DPDK app (DPVS). - -```bash -$ cd dpdk-stable-18.11.2/ -$ make config T=x86_64-native-linuxapp-gcc -Configuration done -$ make # or make -j40 to save time, where 40 is the cpu core number. -$ export RTE_SDK=$PWD -$ export RTE_TARGET=build +$ export PKG_CONFIG_PATH=$(pwd)/../dpdklib/lib64/pkgconfig/libdpdk.pc ``` -In our tutorial, `RTE_TARGET` is set to the default "build", thus DPDK libs and header files can be found in `dpdk-stable-18.11.2/build`. +> Tips: You can use script [dpdk-build.sh](./scripts/dpdk-build.sh) to facilitate dpdk build. Run `dpdk-build.sh -h` for the usage of the script. -Now to set up DPDK hugepage, our test environment is NUMA system. For single-node system please refer to the [link](http://dpdk.org/doc/guides/linux_gsg/sys_reqs.html). +Next is to set up DPDK hugepage. Our test environment is NUMA system. For single-node system please refer to the [link](http://dpdk.org/doc/guides/linux_gsg/sys_reqs.html). ```bash $ # for NUMA machine @@ -129,40 +120,41 @@ $ mkdir /mnt/huge $ mount -t hugetlbfs nodev /mnt/huge ``` -Install kernel modules and bind NIC with `igb_uio` driver. Quick start uses only one NIC, normally we use 2 for FNAT cluster, even 4 for bonding mode. For example, suppose the NIC we would use to run DPVS is eth0, in the meantime, we still keep another standalone NIC eth1 for debugging. +Install kernel modules and bind NIC with `uio_pci_generic` driver. Quick start uses only one NIC, normally we use two for FNAT cluster, even four for bonding mode. For example, suppose the NIC we would use to run DPVS is eth0, in the meantime, we still keep another standalone NIC eth1 for debugging. ```bash -$ modprobe uio -$ cd dpdk-stable-18.11.2 +$ modprobe uio_pci_generic -$ insmod build/kmod/igb_uio.ko -$ insmod build/kmod/rte_kni.ko carrier=on +$ cd dpdk-stable-20.11.1 +$ insmod dpdkbuild/kernel/linux/kni/rte_kni.ko carrier=on $ ./usertools/dpdk-devbind.py --status -$ ifconfig eth0 down # assuming eth0 is 0000:06:00.0 -$ ./usertools/dpdk-devbind.py -b igb_uio 0000:06:00.0 +$ ifconfig eth0 down # assuming eth0 is 0000:06:00.0 +$ ./usertools/dpdk-devbind.py -b uio_pci_generic 0000:06:00.0 ``` -> Note that a kernel parameter `carrier` is added to `rte_kni.ko` since [DPDK v18.11](https://elixir.bootlin.com/dpdk/v18.11/source/kernel/linux/kni/kni_misc.c), and the default value for it is "off". We need to load `rte_kni.ko` with the extra parameter `carrier=on` to make KNI devices work properly. +> Notes: +> 1. An alternative to the `uio_pci_generic` is `igb_uio`, which is moved to a separated repository [dpdk-kmods](http://git.dpdk.org/dpdk-kmods). +> 2. A kernel module parameter `carrier` is added to `rte_kni.ko` since [DPDK v18.11](https://elixir.bootlin.com/dpdk/v18.11/source/kernel/linux/kni/kni_misc.c), and the default value for it is "off". We need to load `rte_kni.ko` with the extra parameter `carrier=on` to make KNI devices work properly. `dpdk-devbind.py -u` can be used to unbind driver and switch it back to Linux driver like `ixgbe`. You can also use `lspci` or `ethtool -i eth0` to check the NIC PCI bus-id. Please refer to [DPDK site](http://www.dpdk.org) for more details. -> Note: PMD of Mellanox NIC is built on top of libibverbs using the Raw Ethernet Accelerated Verbs AP. It doesn't rely on UIO/VFIO driver. Thus, Mellanox NICs should not bind the `igb_uio` driver. Refer to [Mellanox DPDK](https://community.mellanox.com/s/article/mellanox-dpdk) for details. +> Notes: PMD of Mellanox NIC is built on top of libibverbs using the Raw Ethernet Accelerated Verbs AP. It doesn't rely on UIO/VFIO driver. Thus, Mellanox NICs should not bind the `igb_uio` driver. Refer to [Mellanox DPDK](https://community.mellanox.com/s/article/mellanox-dpdk) for details. ## Build DPVS It's simple, just set `RTE_SDK` and build it. ```bash -$ cd dpdk-stable-18.11.2/ -$ export RTE_SDK=$PWD +$ export PKG_CONFIG_PATH= # normally located at dpdklib/lib64/pkgconfig/libdpdk.pc $ cd -$ make # or "make -j40" to speed up. +$ make # or "make -j" to speed up $ make install ``` - -> Build dependencies may be needed, such as `pkg-config`(latest version),`automake`, `libnl3`, `libnl-genl-3.0`, `openssl`, `popt` and `numactl`. You can install the missing dependencies by using the package manager of the system, e.g., `yum install popt-devel` (CentOS). +> Notes: +> 1. Build dependencies may be needed, such as `pkg-config`(version 0.29.2+),`automake`, `libnl3`, `libnl-genl-3.0`, `openssl`, `popt` and `numactl`. You can install the missing dependencies by using the package manager of the system, e.g., `yum install popt-devel` (CentOS). +> 2. Early `pkg-config` versions (v0.29.2 before) may cause dpvs build failure. If so, please upgrade this tool. Output files are installed to `dpvs/bin`. @@ -208,7 +200,7 @@ EAL: Error - exiting with code: 1 ``` >It means the NIC count of DPVS does not match `/etc/dpvs.conf`. Please use `dpdk-devbind` to adjust the NIC number or modify `dpvs.conf`. We'll improve this part to make DPVS more "clever" to avoid modify config file when NIC count does not match. -What config items does `dpvs.conf` support and how to configure them? Well, `DPVS` maintains a config item file `conf/dpvs.conf.items` which lists all supported config entries and corresponding feasible values. +What config items does `dpvs.conf` support? How to configure them? Well, `DPVS` maintains a config item file `conf/dpvs.conf.items` which lists all supported config entries and corresponding feasible values. Besides, some config sample files maintained as `./conf/dpvs.*.sample` show the configurations of dpvs in some specified cases. ## Test Full-NAT (FNAT) Load Balancer diff --git a/doc/faq.md b/doc/faq.md index 86f601aeb..f9e7a5da5 100644 --- a/doc/faq.md +++ b/doc/faq.md @@ -27,12 +27,12 @@ DPVS Frequently Asked Questions (FAQ) Please try to follow `README.md` and `doc/tutorial.md` first. And if you still have problem, possible reasons are: -1. NIC do not support DPDK or *flow-director* (`fdir`), please check this [answer](#nic). -2. DPDK not compatible with Kernel Version, it cause build error, please refer to [DPDK.org](https://www.dpdk.org/) or consider upgrade the Kernel. +1. NIC does not support DPDK or *flow control* (`rte_flow`), please check this [answer](#nic). +2. DPDK is not compatible with Kernel Version, it cause build error, please refer to [DPDK.org](https://www.dpdk.org/) or consider upgrade the Kernel. 3. CPU core (`lcore`) and NIC queue's configure is miss-match. Please read `conf/*.sample`, note worker-CPU/NIC-queue are 1:1 mapping and you need one more cpu for master. 4. DPDK NIC's link is not up ? please check NIC cable first. -5. `curl` VIP in FullNAT mode fails (or sometime fails)? Please check if NIC support [fdir](#nic). +5. `curl` VIP in FullNAT mode fails (or sometime fails)? Please check if NIC support [rte_flow](#nic). 6. `curl` still fails. Please check route and arp by `dpip route show`, `dpip neigh show`. 6. The patchs in `patch/` are not applied. @@ -42,16 +42,28 @@ And you may find other similar issues and solutions from Github's issues list. ### Does my NIC support DPVS ? -Actaully, it's the question about if the NIC support DPDK as well as "flow-director (fdir)". +Actaully, it's the question about if the NIC support DPDK as well as "flow control(rte_flow)". -First, please make sure the NIC support `DPDK`, you can check the [link](https://core.dpdk.org/supported/). Second, DPVS's FullNAT/SNAT mode need flow-director feature, *unless you configure only one worker*. For `fdir` support, this [link](http://doc.dpdk.org/guides/nics/overview.html#id1) can be checked. +First, please make sure the NIC support `DPDK`, you can check the [link](https://core.dpdk.org/supported/). Second, DPVS's FullNAT/SNAT mode need flow control(rte_flow) feature, *unless you configure only one worker*. For `rte_flow` support, this [link](http://doc.dpdk.org/guides/nics/overview.html#id1) can be checked. -Please find the DPDK driver name according to your NIC by the first link. And check `fdir` support for each drivers from the matrix in the second link. +Please find the DPDK driver name according to your NIC by the first link. And check `rte_flow` support for each drivers from the matrix in the second link. 1. https://core.dpdk.org/supported/ 2. http://doc.dpdk.org/guides/nics/overview.html#id1 -> `Fdir` is replaced with `rte_flow` in the lastest DPDK. DPVS is making efforts to adapt to the change. +The PMD of your NIC should support the following rte_flow items, + +* ipv4 +* ipv6 +* tcp +* udp + +and the following rte_flow actions at least. + +* queue +* drop + +> If you are using only one worker, you can turn off dpvs flow control by setting `sa_pool/flow_enable` to `off` in dpvs.conf. @@ -106,7 +118,7 @@ Yes, it does support UDP. In order to get the real client IP/port in FullNAT mod ### Does DPVS support IP fragment ? -No, since connection table is per-lcore (per-CPU), and RSS/fdir are used for FNAT. Assuming RSS mode is TCP and fdir uses L4 info ``. Considered that IP fragment doesn't have L4 info, it needs reassembling first and re-schedule the pkt to **correct** lcore which the 5-tuple flow (connection) belongs to. +No, since connection table is per-lcore (per-CPU), and RSS/rte_flow are used for FNAT. Assuming RSS mode is TCP and rte_flow uses L4 info ``. Considered that IP fragment doesn't have L4 info, it needs reassembling first and re-schedule the pkt to **correct** lcore which the 5-tuple flow (connection) belongs to. May be someday in the future, we will support "pkt re-schedule" on lcores or use L3 (IP) info only for `RSS`/`FDIR`, then we may support fragment. But even we support fragment, it may hurt the performance (reassemble, re-schedule effort) or security. @@ -116,7 +128,7 @@ Actually, IPv4 fragment is not recommended, while IPv6 even not support fragment ### How to launch DPVS on Virtual Machine ? -Please refer to the [tutorial.md](../doc/tutorial.md), there's an exmaple to run DPVS on `Ubuntu`. Basically, you may need to reduce memory usage. And for VM's NIC, `fdir` is not supported, so if you want to config FullNAT/SNAT mode, you have to configure **only one** worker (cpu), and another CPU core for master. +Please refer to the [tutorial.md](../doc/tutorial.md), there's an exmaple to run DPVS on `Ubuntu`. Basically, you may need to reduce memory usage. And for VM's NIC, `rte_flow` is not supported, so if you want to config FullNAT/SNAT mode, you have to configure **only one** worker (cpu), and another CPU core for master. diff --git a/doc/tutorial.md b/doc/tutorial.md index 5c39850ce..bd69d37a3 100644 --- a/doc/tutorial.md +++ b/doc/tutorial.md @@ -639,9 +639,9 @@ A strict limitation exists for DPVS NAT mode: **DPVS `NAT` mode can only work in * DPVS session entries are splited and distributed on lcores by RSS. * NAT forwarding requires both inbound and outbound traffic go through DPVS. * Only dest IP/port is translated in NAT forwarding, source IP/port is not changed. -* Very limited maximum flow director rules can be set for a NIC. +* Very limited maximum rte_flow rules can be set for a NIC. -So, if no other control of the traffic flow, outbound packets may arrive at different lcore from inbound packets. If so, outbound packets would be dropped because session lookup miss. Full-NAT fixes the problem by using Flow Director(FDIR). However, there are very limited rules can be added for a NIC, i.e. 8K for XT-540. Unlike Full-NAT, NAT does not have local IP/port, so FDIR rules can only be set on source IP/port, which means only thousands concurrency is supported. Therefore, FDIR is not feasible for NAT. +So, if no other control of the traffic flow, outbound packets may arrive at different lcore from inbound packets. If so, outbound packets would be dropped because session lookup miss. Full-NAT fixes the problem by using Flow Control (rte_flow). However, there are very limited rules can be added for a NIC, i.e. 8K for XT-540. Unlike Full-NAT, NAT does not have local IP/port, so flow rules can only be set on source IP/port, which means only thousands concurrency is supported. Therefore, rte_flow is not feasible for NAT. Whatever, we give a simple example for NAT mode. Remind it only works single lcore. @@ -994,31 +994,28 @@ DPVS supports IPv6-IPv4 for fullnat, which means VIP/client IP can be IPv6 and l ``` OSPF can just be configured like IPv6-IPv6. If you prefer keepalived, you can configure it like IPv6-IPv6 except real_server/local_address_group. -**IPv6 and Flow Director** +**IPv6 and Flow Control** -We found there exists some NICs do not (fully) support Flow Director for IPv6. -For example, 82599 10GE Controller do not support IPv6 *perfect mode*, and IPv4/IPv6 *signature mode* supports only one locall IP. - -If you would like to use Flow Director signature mode, add the following lines into the device configs of `dpvs.conf`: +We found there exists some NICs do not (fully) support Flow Control of IPv6 required by IPv6. +For example, the rte_flow of 82599 10GE Controller (ixgbe PMD) relies on an old fashion flow type `flow director` (fdir), which doesn't support IPv6 in its *perfect mode*, and support only one local IPv4 or IPv6 in its *signature mode*. DPVS supports the fdir mode config for compatibility. ``` -fdir { +netif_defs { + ... mode signature - pballoc 64k - status matched } ``` -Another method to avoid Flow Director problem is to use the redirect forwarding, which forwards the recieved packets to the right lcore where the session resides by using lockless DPDK rings. +Another method to avoid not (fully) supported rte_flow problem is to use the redirect forwarding, which forwards the recieved packets to the correct worker lcore where the session resides by using lockless DPDK rings. If you want to try this method, turn on the `redirect` switch in the `dpvs.conf`. ``` ipvs_defs { conn { - ...... + ... redirect on } - ...... + ... } ``` It should note that the redirect forwarding may harm performance to a certain degree. Keep it in `off` state unless you have no other solutions. @@ -1090,7 +1087,7 @@ Please also check `dpip tunnel help` for details. > Notes: > 1. RSS schedule all packets to same queue/CPU since underlay source IP may the same. > If one lcore's `sa_pool` gets full, `sa_miss` happens. This is not a problem for some NICs which support inner RSS for tunnelling. -> 2. `fdir`/`rss` won't works well on tunnel deivce, do not use tunnel for FNAT. +> 2. `rte_flow`/`rss` won't works well on tunnel deivce, do not use tunnel for FNAT. @@ -1161,7 +1158,7 @@ Now, `dpvs.conf` must be put at `/etc/dpvs.conf`, just copy it from `conf/dpvs.c $ cp conf/dpvs.conf.single-nic.sample /etc/dpvs.conf ``` -The NIC for Ubuntu may not support flow-director(fdir),for that case ,please use 'single worker',may decrease conn_pool_size . +The NIC for Ubuntu may not support flow control(rte_flow) required by DPVS. For that case, please use 'single worker', and disable flow control. ```bash queue_number 1 @@ -1183,6 +1180,9 @@ worker_defs { } } + sa_pool { + flow_enable off + } ``` From c57a0db7c6f4311b0ecef17d528d8a7dd83aa022 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Tue, 22 Jun 2021 20:26:59 +0800 Subject: [PATCH 22/41] makefile: update config.mk --- src/config.mk | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/config.mk b/src/config.mk index 5142e00d0..930353ac6 100644 --- a/src/config.mk +++ b/src/config.mk @@ -21,7 +21,7 @@ # TODO: use standard way to define compile flags. # -CONFIG_MLX5=n +CONFIG_IXGEB_PMD=y CONFIG_PDUMP=y CFLAGS += -D DPVS_MAX_SOCKET=2 @@ -47,7 +47,7 @@ CFLAGS += -D DPVS_MAX_LCORE=64 #CFLAGS += -D CONFIG_ICMP_REDIRECT_CORE # for ixgbe nic -ifneq ($(CONFIG_MLX5), y) +ifeq ($(CONFIG_IXGEB_PMD), y) CFLAGS += -D CONFIG_DPVS_FDIR endif @@ -59,6 +59,3 @@ GCC_MAJOR = $(shell echo __GNUC__ | $(CC) -E -x c - | tail -n 1) GCC_MINOR = $(shell echo __GNUC_MINOR__ | $(CC) -E -x c - | tail -n 1) GCC_VERSION = $(GCC_MAJOR)$(GCC_MINOR) -ifeq ($(CONFIG_MLX5), y) -LIBS += -libverbs -lmlx5 -lmnl -endif From 9a5d303d969147ea4c3669c2bac09c815d6800e9 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Tue, 22 Jun 2021 21:10:01 +0800 Subject: [PATCH 23/41] netif: fix rte_flow flush problem for bonding slaves --- src/netif_flow.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/netif_flow.c b/src/netif_flow.c index ab723fcf2..ed4da0874 100644 --- a/src/netif_flow.c +++ b/src/netif_flow.c @@ -220,6 +220,9 @@ int netif_flow_flush(struct netif_port *dev) if (unlikely(!dev)) return EDPVS_INVAL; + if (dev->type == PORT_TYPE_BOND_SLAVE) + return EDPVS_OK; + if (dev->type == PORT_TYPE_VLAN) { struct vlan_dev_priv *vlan = netif_priv(dev); if (unlikely(!vlan || !vlan->real_dev)) @@ -244,7 +247,7 @@ int netif_flow_flush(struct netif_port *dev) return err; } - return EDPVS_INVAL; + return EDPVS_NOTSUPP; } /* From 82f66c519e9171bba39e1142ce6caa3b623d7b82 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Thu, 24 Jun 2021 17:07:04 +0800 Subject: [PATCH 24/41] ci: adapt ci to dpdk-20.11 Signed-off-by: ywc689 --- .github/workflows/build.yaml | 33 ++++----------------------------- .github/workflows/run.yaml | 5 ++--- 2 files changed, 6 insertions(+), 32 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index cb43a6d8f..ccdfd6688 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -16,44 +16,19 @@ jobs: build-basic: runs-on: self-hosted env: - RTE_SDK: /data/dpdk/intel/dpdk-stable-18.11.2 - RTE_TARGET: x86_64-native-linuxapp-gcc + PKG_CONFIG_PATH: /data/dpdk/dpdklib/lib64/pkgconfig steps: - uses: actions/checkout@v2 - name: make - run: make -j32 + run: make -j - build-mlnx: - runs-on: self-hosted - env: - RTE_SDK: /data/dpdk/mlnx/dpdk-stable-18.11.2 - RTE_TARGET: x86_64-native-linuxapp-gcc - steps: - - uses: actions/checkout@v2 - - name: config - run: sed -i 's/^CONFIG_MLX5=./CONFIG_MLX5=y/' src/config.mk - - name: make - run: make -j32 - build-debug: runs-on: self-hosted env: - RTE_SDK: /data/dpdk/intel/dpdk-stable-18.11.2 - RTE_TARGET: x86_64-native-linuxapp-gcc + PKG_CONFIG_PATH: /data/dpdk/dpdklib/lib64/pkgconfig steps: - uses: actions/checkout@v2 - name: config run: sed -i 's/#CFLAGS +=/CFLAGS +=/' src/config.mk && sed -i 's/^#DEBUG := 1/DEBUG := 1/' src/Makefile - name: make - run: make -j32 - - build-olddpdk: - runs-on: self-hosted - env: - RTE_SDK: /data/dpdk/intel/dpdk-stable-17.11.6 - RTE_TARGET: x86_64-native-linuxapp-gcc - steps: - - uses: actions/checkout@v2 - - name: make - run: make -j32 - + run: make -j diff --git a/.github/workflows/run.yaml b/.github/workflows/run.yaml index cf3350f1b..41a77bcba 100644 --- a/.github/workflows/run.yaml +++ b/.github/workflows/run.yaml @@ -16,12 +16,11 @@ jobs: run-dpvs: runs-on: self-hosted env: - RTE_SDK: /data/dpdk/intel/dpdk-stable-18.11.2 - RTE_TARGET: x86_64-native-linuxapp-gcc + PKG_CONFIG_PATH: /data/dpdk/dpdklib/lib64/pkgconfig steps: - uses: actions/checkout@v2 - name: make - run: make -j32 + run: make -j - name: install run: make install - name: run-dpvs From 5e23cc95eb7e5836521a847814a7bc9425a9b162 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Thu, 24 Jun 2021 17:31:01 +0800 Subject: [PATCH 25/41] patch: add dpdk 20.11.1 bonding mode 4 patch for mlx5 Signed-off-by: ywc689 --- ...link-event-for-multicast-driver-part.patch | 2 +- ...dump-change-dpdk-pdump-tool-for-dpvs.patch | 2 +- ...ug-only-enable-dpdk-eal-memory-debug.patch | 2 +- ...w-patch-ixgbe-fdir-rte_flow-for-dpvs.patch | 2 +- ...ode-4-problem-caused-by-LACP-failure.patch | 62 +++++++++++++++++++ 5 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch diff --git a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch index fce646ee4..b3ddee908 100644 --- a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch +++ b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch @@ -1,7 +1,7 @@ From fc25cda5bab943feac5455779fb6a6f00ee2a87d Mon Sep 17 00:00:00 2001 From: wencyu Date: Thu, 17 Jun 2021 20:39:55 +0800 -Subject: [PATCH 1/4] kni: use netlink event for multicast (driver part) +Subject: [PATCH 1/5] kni: use netlink event for multicast (driver part) Kni driver sends netlink event every time hw-multicast list updated by kernel, the user kni app should capture the event and update multicast diff --git a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch index ea3dd924c..8f2604ef3 100644 --- a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch @@ -1,7 +1,7 @@ From 6a99af8a3f9067a18211d15b4a65bcafa9430875 Mon Sep 17 00:00:00 2001 From: wencyu Date: Fri, 18 Jun 2021 10:20:45 +0800 -Subject: [PATCH 2/4] pdump: change dpdk-pdump tool for dpvs +Subject: [PATCH 2/5] pdump: change dpdk-pdump tool for dpvs Signed-off-by: wencyu --- diff --git a/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch b/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch index 04e37b323..848a0cf4e 100644 --- a/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch +++ b/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch @@ -1,7 +1,7 @@ From 906f4690d1f3cadca260b23fd1c839d12db9e629 Mon Sep 17 00:00:00 2001 From: wencyu Date: Fri, 18 Jun 2021 11:43:07 +0800 -Subject: [PATCH 3/4] [for debug only] enable dpdk eal memory debug +Subject: [PATCH 3/5] [for debug only] enable dpdk eal memory debug The patch is used for memory debug. To use the patch, configure meson with option -Dc_args="-DRTE_MALLOC_DEBUG" when building dpdk. For example, diff --git a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch index 96e21e69a..ce7126acf 100644 --- a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch @@ -1,7 +1,7 @@ From 83ba9cf5e6eb111f8becc1e9c05301fabb40e16b Mon Sep 17 00:00:00 2001 From: wencyu Date: Fri, 18 Jun 2021 14:00:24 +0800 -Subject: [PATCH 4/4] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs +Subject: [PATCH 4/5] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs 1. Ignore fdir flow rule priority attribute. 2. Use different fdir soft-id for flow rules configured for the same queue. diff --git a/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch b/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch new file mode 100644 index 000000000..d20fa5cb7 --- /dev/null +++ b/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch @@ -0,0 +1,62 @@ +From b83dc422e4de55dbaa2f1376d56254d8fa00dbea Mon Sep 17 00:00:00 2001 +From: wencyu +Date: Thu, 24 Jun 2021 17:24:51 +0800 +Subject: [PATCH 5/5] Fix bonding mode 4 problem caused by LACP failure. + +The problem is disscussed in Issue #725 of iqiyi/dpvs in detail. +https://github.com/iqiyi/dpvs/issues/725 + +Signed-off-by: wencyu +--- + drivers/net/bonding/rte_eth_bond_8023ad.c | 20 +++++++++++--------- + 1 file changed, 11 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c +index 5fe004e..52bd960 100644 +--- a/drivers/net/bonding/rte_eth_bond_8023ad.c ++++ b/drivers/net/bonding/rte_eth_bond_8023ad.c +@@ -831,7 +831,6 @@ + struct port *port; + struct rte_eth_link link_info; + struct rte_ether_addr slave_addr; +- struct rte_mbuf *lacp_pkt = NULL; + uint16_t slave_id; + uint16_t i; + +@@ -903,6 +902,7 @@ + /* Find LACP packet to this port. Do not check subtype, + * it is done in function that queued packet + */ ++ struct rte_mbuf *lacp_pkt = NULL; + int retval = rte_ring_dequeue(port->rx_ring, + (void **)&lacp_pkt); + +@@ -911,15 +911,17 @@ + + rx_machine_update(internals, slave_id, lacp_pkt); + } else { +- uint16_t rx_count = rte_eth_rx_burst(slave_id, +- internals->mode4.dedicated_queues.rx_qid, +- &lacp_pkt, 1); +- +- if (rx_count == 1) +- bond_mode_8023ad_handle_slow_pkt(internals, +- slave_id, lacp_pkt); +- else ++ uint16_t rx_count, j; ++ struct rte_mbuf *lacp_pkt[16] = { NULL }; ++ ++ rx_count = rte_eth_rx_burst(slave_id, internals->mode4.dedicated_queues.rx_qid, ++ &lacp_pkt[0], sizeof(lacp_pkt)/sizeof(struct rte_mbuf *)); ++ if (rx_count > 0) { ++ for (j = 0; j < rx_count; j++) ++ bond_mode_8023ad_handle_slow_pkt(internals, slave_id, lacp_pkt[j]); ++ } else { + rx_machine_update(internals, slave_id, NULL); ++ } + } + + periodic_machine(internals, slave_id); +-- +1.8.3.1 + From 291f4a41aefdd46de998896136653b5db490de52 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Tue, 29 Jun 2021 17:57:57 +0800 Subject: [PATCH 26/41] single worker rte_flow invalid process bugfix patch --- ...kni-use-netlink-event-for-multicast.patch} | 18 +++--- ...002-change-dpdk-pdump-tool-for-dpvs.patch} | 45 +++++++------- ...ug-only-enable-dpdk-eal-memory-debug.patch | 10 ++- ...w-patch-ixgbe-fdir-rte_flow-for-dpvs.patch | 61 ++++++++++++++----- ...ode-4-problem-caused-by-LACP-failure.patch | 14 ++--- 5 files changed, 85 insertions(+), 63 deletions(-) rename patch/dpdk-stable-20.11.1/{0001-kni-use-netlink-event-for-multicast-driver-part.patch => 0001-kni-use-netlink-event-for-multicast.patch} (88%) rename patch/dpdk-stable-20.11.1/{0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch => 0002-change-dpdk-pdump-tool-for-dpvs.patch} (90%) diff --git a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast.patch similarity index 88% rename from patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch rename to patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast.patch index b3ddee908..d12d30b97 100644 --- a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch +++ b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast.patch @@ -1,7 +1,7 @@ -From fc25cda5bab943feac5455779fb6a6f00ee2a87d Mon Sep 17 00:00:00 2001 -From: wencyu -Date: Thu, 17 Jun 2021 20:39:55 +0800 -Subject: [PATCH 1/5] kni: use netlink event for multicast (driver part) +From db292243dcd6ef371fc6268e54b9ec16116a9d87 Mon Sep 17 00:00:00 2001 +From: huangyichen +Date: Tue, 29 Jun 2021 11:19:12 +0800 +Subject: [PATCH 1/5] kni: use netlink event for multicast Kni driver sends netlink event every time hw-multicast list updated by kernel, the user kni app should capture the event and update multicast @@ -10,8 +10,6 @@ to kni device. Original way is using rte_kni_request to pass hw-multicast to user kni module. That method works but finally memory corruption found, which is not easy to address. That's why we use netlink event instead. - -Signed-off-by: wencyu --- kernel/linux/kni/kni_net.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) @@ -29,7 +27,7 @@ index 4b75208..cde565e 100644 #include #include -@@ -128,6 +130,7 @@ +@@ -128,6 +130,7 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) ret_val = wait_event_interruptible_timeout(kni->wq, kni_fifo_count(kni->resp_q), 3 * HZ); if (signal_pending(current) || ret_val <= 0) { @@ -37,7 +35,7 @@ index 4b75208..cde565e 100644 ret = -ETIME; goto fail; } -@@ -657,6 +660,77 @@ void kni_net_release_fifo_phy(struct kni_dev *kni) +@@ -657,6 +660,77 @@ kni_net_change_mtu(struct net_device *dev, int new_mtu) return (ret == 0) ? req.result : ret; } @@ -115,7 +113,7 @@ index 4b75208..cde565e 100644 static void kni_net_change_rx_flags(struct net_device *netdev, int flags) { -@@ -757,6 +831,7 @@ void kni_net_release_fifo_phy(struct kni_dev *kni) +@@ -757,6 +831,7 @@ kni_net_set_mac(struct net_device *netdev, void *p) kni = netdev_priv(netdev); ret = kni_net_process_request(kni, &req); @@ -123,7 +121,7 @@ index 4b75208..cde565e 100644 return (ret == 0 ? req.result : ret); } -@@ -788,6 +863,7 @@ void kni_net_release_fifo_phy(struct kni_dev *kni) +@@ -788,6 +863,7 @@ static const struct net_device_ops kni_net_netdev_ops = { .ndo_change_rx_flags = kni_net_change_rx_flags, .ndo_start_xmit = kni_net_tx, .ndo_change_mtu = kni_net_change_mtu, diff --git a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0002-change-dpdk-pdump-tool-for-dpvs.patch similarity index 90% rename from patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch rename to patch/dpdk-stable-20.11.1/0002-change-dpdk-pdump-tool-for-dpvs.patch index 8f2604ef3..4ded5ade0 100644 --- a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0002-change-dpdk-pdump-tool-for-dpvs.patch @@ -1,9 +1,8 @@ -From 6a99af8a3f9067a18211d15b4a65bcafa9430875 Mon Sep 17 00:00:00 2001 -From: wencyu -Date: Fri, 18 Jun 2021 10:20:45 +0800 -Subject: [PATCH 2/5] pdump: change dpdk-pdump tool for dpvs +From fad87988c197755a353f113613d8f3b96be7025a Mon Sep 17 00:00:00 2001 +From: huangyichen +Date: Tue, 29 Jun 2021 11:22:43 +0800 +Subject: [PATCH 2/5] change dpdk-pdump tool for dpvs -Signed-off-by: wencyu --- app/pdump/main.c | 167 ++++++++++++++++++++++++++++++++++++++++--- lib/librte_pdump/rte_pdump.c | 145 +++++++++++++++++++++++++++++++++++-- @@ -37,7 +36,7 @@ index b34bf33..9d14474 100644 #define VDEV_NAME_FMT "net_pcap_%s_%d" #define VDEV_PCAP_ARGS_FMT "tx_pcap=%s" #define VDEV_IFACE_ARGS_FMT "tx_iface=%s" -@@ -97,6 +106,13 @@ enum pdump_by { +@@ -97,6 +106,13 @@ static const char * const valid_pdump_arguments[] = { PDUMP_RING_SIZE_ARG, PDUMP_MSIZE_ARG, PDUMP_NUM_MBUFS_ARG, @@ -59,7 +58,7 @@ index b34bf33..9d14474 100644 /* stats */ struct pdump_stats stats; -@@ -158,6 +175,11 @@ struct parse_val { +@@ -158,6 +175,11 @@ pdump_usage(const char *prgname) "(queue=)," "(rx-dev= |" " tx-dev=," @@ -71,7 +70,7 @@ index b34bf33..9d14474 100644 "[ring-size=default:16384]," "[mbuf-size=default:2176]," "[total-num-mbufs=default:65535]'\n", -@@ -244,6 +266,64 @@ struct parse_val { +@@ -244,6 +266,64 @@ parse_uint_value(const char *key, const char *value, void *extra_args) } static int @@ -136,7 +135,7 @@ index b34bf33..9d14474 100644 parse_pdump(const char *optarg) { struct rte_kvargs *kvlist; -@@ -370,6 +450,75 @@ struct parse_val { +@@ -370,6 +450,75 @@ parse_pdump(const char *optarg) } else pt->total_num_mbufs = MBUFS_PER_POOL; @@ -212,7 +211,7 @@ index b34bf33..9d14474 100644 num_tuples++; free_kvlist: -@@ -510,6 +659,8 @@ struct parse_val { +@@ -510,6 +659,8 @@ cleanup_rings(void) rte_ring_free(pt->rx_ring); if (pt->tx_ring) rte_ring_free(pt->tx_ring); @@ -221,7 +220,7 @@ index b34bf33..9d14474 100644 } } -@@ -837,20 +988,20 @@ struct parse_val { +@@ -837,20 +988,20 @@ enable_pdump(void) pt->queue, RTE_PDUMP_FLAG_RX, pt->rx_ring, @@ -246,7 +245,7 @@ index b34bf33..9d14474 100644 } } else if (pt->dir == RTE_PDUMP_FLAG_RX) { if (pt->dump_by_type == DEVICE_ID) -@@ -858,22 +1009,22 @@ struct parse_val { +@@ -858,22 +1009,22 @@ enable_pdump(void) pt->device_id, pt->queue, pt->dir, pt->rx_ring, @@ -288,7 +287,7 @@ index b3c8d5c..b73fb8f 100644 #include "rte_pdump.h" -@@ -69,6 +73,132 @@ struct pdump_response { +@@ -69,6 +73,132 @@ static struct pdump_rxtx_cbs { } rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT], tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; @@ -421,7 +420,7 @@ index b3c8d5c..b73fb8f 100644 static inline void pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params) -@@ -86,6 +216,8 @@ struct pdump_response { +@@ -86,6 +216,8 @@ pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params) ring = cbs->ring; mp = cbs->mp; for (i = 0; i < nb_pkts; i++) { @@ -430,7 +429,7 @@ index b3c8d5c..b73fb8f 100644 p = rte_pktmbuf_copy(pkts[i], mp, 0, UINT32_MAX); if (p) dup_bufs[d_pkts++] = p; -@@ -122,7 +254,7 @@ struct pdump_response { +@@ -122,7 +254,7 @@ pdump_tx(uint16_t port __rte_unused, uint16_t qidx __rte_unused, static int pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, struct rte_ring *ring, struct rte_mempool *mp, @@ -439,7 +438,7 @@ index b3c8d5c..b73fb8f 100644 { uint16_t qid; struct pdump_rxtx_cbs *cbs = NULL; -@@ -140,6 +272,7 @@ struct pdump_response { +@@ -140,6 +272,7 @@ pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, } cbs->ring = ring; cbs->mp = mp; @@ -447,7 +446,7 @@ index b3c8d5c..b73fb8f 100644 cbs->cb = rte_eth_add_first_rx_callback(port, qid, pdump_rx, cbs); if (cbs->cb == NULL) { -@@ -176,7 +309,7 @@ struct pdump_response { +@@ -176,7 +309,7 @@ pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, static int pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, struct rte_ring *ring, struct rte_mempool *mp, @@ -456,7 +455,7 @@ index b3c8d5c..b73fb8f 100644 { uint16_t qid; -@@ -195,6 +328,7 @@ struct pdump_response { +@@ -195,6 +328,7 @@ pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, } cbs->ring = ring; cbs->mp = mp; @@ -464,7 +463,7 @@ index b3c8d5c..b73fb8f 100644 cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx, cbs); if (cbs->cb == NULL) { -@@ -238,6 +372,7 @@ struct pdump_response { +@@ -238,6 +372,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) uint16_t operation; struct rte_ring *ring; struct rte_mempool *mp; @@ -472,7 +471,7 @@ index b3c8d5c..b73fb8f 100644 flags = p->flags; operation = p->op; -@@ -253,6 +388,7 @@ struct pdump_response { +@@ -253,6 +388,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) queue = p->data.en_v1.queue; ring = p->data.en_v1.ring; mp = p->data.en_v1.mp; @@ -480,7 +479,7 @@ index b3c8d5c..b73fb8f 100644 } else { ret = rte_eth_dev_get_port_by_name(p->data.dis_v1.device, &port); -@@ -265,6 +401,7 @@ struct pdump_response { +@@ -265,6 +401,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) queue = p->data.dis_v1.queue; ring = p->data.dis_v1.ring; mp = p->data.dis_v1.mp; @@ -488,7 +487,7 @@ index b3c8d5c..b73fb8f 100644 } /* validation if packet capture is for all queues */ -@@ -303,7 +440,7 @@ struct pdump_response { +@@ -303,7 +440,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) if (flags & RTE_PDUMP_FLAG_RX) { end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1; ret = pdump_register_rx_callbacks(end_q, port, queue, ring, mp, @@ -497,7 +496,7 @@ index b3c8d5c..b73fb8f 100644 if (ret < 0) return ret; } -@@ -312,7 +449,7 @@ struct pdump_response { +@@ -312,7 +449,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) if (flags & RTE_PDUMP_FLAG_TX) { end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1; ret = pdump_register_tx_callbacks(end_q, port, queue, ring, mp, diff --git a/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch b/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch index 848a0cf4e..4d71bc5c0 100644 --- a/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch +++ b/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch @@ -1,6 +1,6 @@ -From 906f4690d1f3cadca260b23fd1c839d12db9e629 Mon Sep 17 00:00:00 2001 -From: wencyu -Date: Fri, 18 Jun 2021 11:43:07 +0800 +From 02aa011332843b7b6b349ffe1845a8e2bb9a1c7f Mon Sep 17 00:00:00 2001 +From: huangyichen +Date: Tue, 29 Jun 2021 11:23:44 +0800 Subject: [PATCH 3/5] [for debug only] enable dpdk eal memory debug The patch is used for memory debug. To use the patch, configure meson with option @@ -8,8 +8,6 @@ The patch is used for memory debug. To use the patch, configure meson with optio meson -Dc_args="-DRTE_MALLOC_DEBUG" -Dbuildtype=debug -Dprefix=$(pwd)/dpdklib dpdkbuild ninja -C dpdkbuild - -Signed-off-by: wencyu --- lib/librte_eal/common/rte_malloc.c | 4 ++++ lib/librte_eal/include/rte_malloc.h | 15 +++++++++++++++ @@ -34,7 +32,7 @@ diff --git a/lib/librte_eal/include/rte_malloc.h b/lib/librte_eal/include/rte_ma index 3af64f8..671e4f2 100644 --- a/lib/librte_eal/include/rte_malloc.h +++ b/lib/librte_eal/include/rte_malloc.h -@@ -248,6 +248,21 @@ struct rte_malloc_socket_stats { +@@ -248,6 +248,21 @@ rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int __rte_alloc_size(2, 3); /** diff --git a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch index ce7126acf..9dc7ad280 100644 --- a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch @@ -1,23 +1,52 @@ -From 83ba9cf5e6eb111f8becc1e9c05301fabb40e16b Mon Sep 17 00:00:00 2001 -From: wencyu -Date: Fri, 18 Jun 2021 14:00:24 +0800 +From 9636cd6bd00d8de7dde8ec969647f4afb0343102 Mon Sep 17 00:00:00 2001 +From: huangyichen +Date: Tue, 29 Jun 2021 11:27:21 +0800 Subject: [PATCH 4/5] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs 1. Ignore fdir flow rule priority attribute. 2. Use different fdir soft-id for flow rules configured for the same queue. 3. Disable fdir mask settings by rte_flow. 4. Allow IPv6 to pass flow rule ETH item validation. - -Signed-off-by: wencyu +5. TCP & UDP flow item dest port = 0 is invalid of ixgbe_parse_ntuple_filter() --- - drivers/net/ixgbe/ixgbe_flow.c | 62 ++++++++++++++++++++++++++++++++++-------- - 1 file changed, 51 insertions(+), 11 deletions(-) + drivers/net/ixgbe/ixgbe_flow.c | 78 ++++++++++++++++++++++++++++++++++++------ + 1 file changed, 67 insertions(+), 11 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c -index 9aeb2e4..97d5ca0 100644 +index 9aeb2e4..c16ddad 100644 --- a/drivers/net/ixgbe/ixgbe_flow.c +++ b/drivers/net/ixgbe/ixgbe_flow.c -@@ -1419,11 +1419,8 @@ const struct rte_flow_action *next_no_void_action( +@@ -468,6 +468,14 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr, + } + + tcp_spec = item->spec; ++ if (tcp_spec->hdr.dst_port == 0 && ++ tcp_mask->hdr.dst_port == 0) { ++ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); ++ rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM, ++ item, "Not supported by ntuple filter"); ++ return -rte_errno; ++ } + filter->dst_port = tcp_spec->hdr.dst_port; + filter->src_port = tcp_spec->hdr.src_port; + filter->tcp_flags = tcp_spec->hdr.tcp_flags; +@@ -501,6 +509,14 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr, + filter->src_port_mask = udp_mask->hdr.src_port; + + udp_spec = item->spec; ++ if (udp_spec->hdr.dst_port == 0 && ++ udp_mask->hdr.dst_port == 0) { ++ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); ++ rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM, ++ item, "Not supported by ntuple filter"); ++ return -rte_errno; ++ } + filter->dst_port = udp_spec->hdr.dst_port; + filter->src_port = udp_spec->hdr.src_port; + } else if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { +@@ -1419,11 +1435,8 @@ ixgbe_parse_fdir_act_attr(const struct rte_flow_attr *attr, /* not supported */ if (attr->priority) { @@ -31,7 +60,7 @@ index 9aeb2e4..97d5ca0 100644 } /* check if the first not void action is QUEUE or DROP. */ -@@ -1642,7 +1639,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) +@@ -1642,7 +1655,7 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, * value. So, we need not do anything for the not provided fields later. */ memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); @@ -40,7 +69,7 @@ index 9aeb2e4..97d5ca0 100644 rule->mask.vlan_tci_mask = 0; rule->mask.flex_bytes_mask = 0; -@@ -1760,6 +1757,8 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) +@@ -1760,6 +1773,8 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, } } else { if (item->type != RTE_FLOW_ITEM_TYPE_IPV4 && @@ -49,7 +78,7 @@ index 9aeb2e4..97d5ca0 100644 item->type != RTE_FLOW_ITEM_TYPE_VLAN) { memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, -@@ -1888,6 +1887,9 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) +@@ -1888,6 +1903,9 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, rule->ixgbe_fdir.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_IPV6; @@ -59,7 +88,7 @@ index 9aeb2e4..97d5ca0 100644 /** * 1. must signature match * 2. not support last -@@ -2748,12 +2750,45 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) +@@ -2748,12 +2766,45 @@ ixgbe_parse_fdir_filter_tunnel(const struct rte_flow_attr *attr, return ixgbe_parse_fdir_act_attr(attr, actions, rule, error); } @@ -106,7 +135,7 @@ index 9aeb2e4..97d5ca0 100644 struct rte_flow_error *error) { int ret; -@@ -2787,13 +2822,18 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) +@@ -2787,13 +2838,18 @@ step_next: rule->ixgbe_fdir.formatted.dst_port != 0)) return -ENOTSUP; @@ -127,7 +156,7 @@ index 9aeb2e4..97d5ca0 100644 return ret; } -@@ -3128,7 +3168,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) +@@ -3128,7 +3184,7 @@ ixgbe_flow_create(struct rte_eth_dev *dev, memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); ret = ixgbe_parse_fdir_filter(dev, attr, pattern, @@ -136,7 +165,7 @@ index 9aeb2e4..97d5ca0 100644 if (!ret) { /* A mask cannot be deleted. */ if (fdir_rule.b_mask) { -@@ -3299,7 +3339,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) +@@ -3299,7 +3355,7 @@ ixgbe_flow_validate(struct rte_eth_dev *dev, memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); ret = ixgbe_parse_fdir_filter(dev, attr, pattern, diff --git a/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch b/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch index d20fa5cb7..42f8cf580 100644 --- a/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch +++ b/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch @@ -1,12 +1,10 @@ -From b83dc422e4de55dbaa2f1376d56254d8fa00dbea Mon Sep 17 00:00:00 2001 -From: wencyu -Date: Thu, 24 Jun 2021 17:24:51 +0800 +From f90f9a21d7fed4d18ff8fd2429db1d8055737ce7 Mon Sep 17 00:00:00 2001 +From: huangyichen +Date: Tue, 29 Jun 2021 11:28:36 +0800 Subject: [PATCH 5/5] Fix bonding mode 4 problem caused by LACP failure. The problem is disscussed in Issue #725 of iqiyi/dpvs in detail. https://github.com/iqiyi/dpvs/issues/725 - -Signed-off-by: wencyu --- drivers/net/bonding/rte_eth_bond_8023ad.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) @@ -15,7 +13,7 @@ diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte index 5fe004e..52bd960 100644 --- a/drivers/net/bonding/rte_eth_bond_8023ad.c +++ b/drivers/net/bonding/rte_eth_bond_8023ad.c -@@ -831,7 +831,6 @@ +@@ -831,7 +831,6 @@ bond_mode_8023ad_periodic_cb(void *arg) struct port *port; struct rte_eth_link link_info; struct rte_ether_addr slave_addr; @@ -23,7 +21,7 @@ index 5fe004e..52bd960 100644 uint16_t slave_id; uint16_t i; -@@ -903,6 +902,7 @@ +@@ -903,6 +902,7 @@ bond_mode_8023ad_periodic_cb(void *arg) /* Find LACP packet to this port. Do not check subtype, * it is done in function that queued packet */ @@ -31,7 +29,7 @@ index 5fe004e..52bd960 100644 int retval = rte_ring_dequeue(port->rx_ring, (void **)&lacp_pkt); -@@ -911,15 +911,17 @@ +@@ -911,15 +911,17 @@ bond_mode_8023ad_periodic_cb(void *arg) rx_machine_update(internals, slave_id, lacp_pkt); } else { From 2ec96f021bdf2d24debdbaaa7b09b5ecdfa14067 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Fri, 2 Jul 2021 11:22:11 +0800 Subject: [PATCH 27/41] The reason of error return in cons_parse_ntuple_filter() comment And safe free rte_flow_list item in rte_flow_destroy --- ...ink-event-for-multicast-driver-part.patch} | 8 +- ...ump-change-dpdk-pdump-tool-for-dpvs.patch} | 8 +- ...ug-only-enable-dpdk-eal-memory-debug.patch | 6 +- ...w-patch-ixgbe-fdir-rte_flow-for-dpvs.patch | 111 +++++++++++++++--- ...ode-4-problem-caused-by-LACP-failure.patch | 6 +- 5 files changed, 108 insertions(+), 31 deletions(-) rename patch/dpdk-stable-20.11.1/{0001-kni-use-netlink-event-for-multicast.patch => 0001-kni-use-netlink-event-for-multicast-driver-part.patch} (95%) rename patch/dpdk-stable-20.11.1/{0002-change-dpdk-pdump-tool-for-dpvs.patch => 0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch} (98%) diff --git a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast.patch b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch similarity index 95% rename from patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast.patch rename to patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch index d12d30b97..6baf6bcc0 100644 --- a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast.patch +++ b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch @@ -1,7 +1,7 @@ -From db292243dcd6ef371fc6268e54b9ec16116a9d87 Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Tue, 29 Jun 2021 11:19:12 +0800 -Subject: [PATCH 1/5] kni: use netlink event for multicast +From 8e66b7600c7d8d0e59dd85565986436c0e03a2c8 Mon Sep 17 00:00:00 2001 +From: huangyichen +Date: Thu, 1 Jul 2021 21:21:16 +0800 +Subject: [PATCH 1/5] kni: use netlink event for multicast (driver part) Kni driver sends netlink event every time hw-multicast list updated by kernel, the user kni app should capture the event and update multicast diff --git a/patch/dpdk-stable-20.11.1/0002-change-dpdk-pdump-tool-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch similarity index 98% rename from patch/dpdk-stable-20.11.1/0002-change-dpdk-pdump-tool-for-dpvs.patch rename to patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch index 4ded5ade0..7003eed4d 100644 --- a/patch/dpdk-stable-20.11.1/0002-change-dpdk-pdump-tool-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch @@ -1,7 +1,7 @@ -From fad87988c197755a353f113613d8f3b96be7025a Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Tue, 29 Jun 2021 11:22:43 +0800 -Subject: [PATCH 2/5] change dpdk-pdump tool for dpvs +From 0ef2e126c8ca9b4a246f680a2a5110b734e9782b Mon Sep 17 00:00:00 2001 +From: huangyichen +Date: Thu, 1 Jul 2021 21:23:50 +0800 +Subject: [PATCH 2/5] pdump: change dpdk-pdump tool for dpvs --- app/pdump/main.c | 167 ++++++++++++++++++++++++++++++++++++++++--- diff --git a/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch b/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch index 4d71bc5c0..4b05ab2ae 100644 --- a/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch +++ b/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch @@ -1,6 +1,6 @@ -From 02aa011332843b7b6b349ffe1845a8e2bb9a1c7f Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Tue, 29 Jun 2021 11:23:44 +0800 +From 446e9d2ae65c25ca382323bdd7cdec765f357886 Mon Sep 17 00:00:00 2001 +From: huangyichen +Date: Thu, 1 Jul 2021 21:24:47 +0800 Subject: [PATCH 3/5] [for debug only] enable dpdk eal memory debug The patch is used for memory debug. To use the patch, configure meson with option diff --git a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch index 9dc7ad280..0699c914e 100644 --- a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch @@ -1,6 +1,6 @@ -From 9636cd6bd00d8de7dde8ec969647f4afb0343102 Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Tue, 29 Jun 2021 11:27:21 +0800 +From fc81d45d6bf23bd6d9b0e7eb9fdc7fe41b559e65 Mon Sep 17 00:00:00 2001 +From: huangyichen +Date: Fri, 2 Jul 2021 11:55:47 +0800 Subject: [PATCH 4/5] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs 1. Ignore fdir flow rule priority attribute. @@ -8,18 +8,50 @@ Subject: [PATCH 4/5] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs 3. Disable fdir mask settings by rte_flow. 4. Allow IPv6 to pass flow rule ETH item validation. 5. TCP & UDP flow item dest port = 0 is invalid of ixgbe_parse_ntuple_filter() +6. Safe free ixgbe_flow_list item of MARCO RTE_MALLOC_DEBUG is define (configure meson with option -Dc_args="-DRTE_MALLOC_DEBUG") --- - drivers/net/ixgbe/ixgbe_flow.c | 78 ++++++++++++++++++++++++++++++++++++------ - 1 file changed, 67 insertions(+), 11 deletions(-) + drivers/net/ixgbe/ixgbe_flow.c | 119 ++++++++++++++++++++++++++++++++++++----- + 1 file changed, 105 insertions(+), 14 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c -index 9aeb2e4..c16ddad 100644 +index 9aeb2e4..481a06f 100644 --- a/drivers/net/ixgbe/ixgbe_flow.c +++ b/drivers/net/ixgbe/ixgbe_flow.c -@@ -468,6 +468,14 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr, +@@ -2,7 +2,6 @@ + * Copyright(c) 2010-2016 Intel Corporation + */ + +-#include + #include + #include + #include +@@ -15,6 +14,7 @@ + #include + #include + ++#include + #include + #include + #include +@@ -468,6 +468,29 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr, } tcp_spec = item->spec; ++ /* ++ * DPVS filted by fdir is expected, ++ * With dpvs single worker mode pattern had set: ++ * ----------------------------------------------- ++ * ITEM Spec Mask ++ * ETH NULL NULL ++ * IPV4|6 src_addr 0 0 ++ * dst_addr laddr 0xFFFFFFFF ++ * UDP|TCP src_port 0 0 ++ * dst_port 0 0 ++ * END ++ * ----------------------------------------------- ++ * It should return error here ++ * And continue by ixgbe_parse_fdir_filter() ++ * */ + if (tcp_spec->hdr.dst_port == 0 && + tcp_mask->hdr.dst_port == 0) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); @@ -31,10 +63,26 @@ index 9aeb2e4..c16ddad 100644 filter->dst_port = tcp_spec->hdr.dst_port; filter->src_port = tcp_spec->hdr.src_port; filter->tcp_flags = tcp_spec->hdr.tcp_flags; -@@ -501,6 +509,14 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr, +@@ -501,6 +524,30 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr, filter->src_port_mask = udp_mask->hdr.src_port; udp_spec = item->spec; ++ /* ++ * DPVS filted by fdir is expected, ++ * With dpvs single worker mode pattern had set: ++ * ----------------------------------------------- ++ * ITEM Spec Mask ++ * ETH NULL NULL ++ * IPV4|6 src_addr 0 0 ++ * dst_addr laddr 0xFFFFFFFF ++ * UDP|TCP src_port 0 0 ++ * dst_port 0 0 ++ * END ++ * ----------------------------------------------- ++ * It should return error here ++ * And continue by ixgbe_parse_fdir_filter() ++ * */ ++ + if (udp_spec->hdr.dst_port == 0 && + udp_mask->hdr.dst_port == 0) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); @@ -46,7 +94,7 @@ index 9aeb2e4..c16ddad 100644 filter->dst_port = udp_spec->hdr.dst_port; filter->src_port = udp_spec->hdr.src_port; } else if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { -@@ -1419,11 +1435,8 @@ ixgbe_parse_fdir_act_attr(const struct rte_flow_attr *attr, +@@ -1419,11 +1466,8 @@ ixgbe_parse_fdir_act_attr(const struct rte_flow_attr *attr, /* not supported */ if (attr->priority) { @@ -55,12 +103,12 @@ index 9aeb2e4..c16ddad 100644 - RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, - attr, "Not support priority."); - return -rte_errno; -+ PMD_DRV_LOG(WARNING, "ixgbe flow doesn't support priority %d " ++ PMD_DRV_LOG(INFO, "ixgbe flow doesn't support priority %d " + "(priority must be 0), ignore and continue....\n", attr->priority); } /* check if the first not void action is QUEUE or DROP. */ -@@ -1642,7 +1655,7 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, +@@ -1642,7 +1686,7 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, * value. So, we need not do anything for the not provided fields later. */ memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); @@ -69,7 +117,7 @@ index 9aeb2e4..c16ddad 100644 rule->mask.vlan_tci_mask = 0; rule->mask.flex_bytes_mask = 0; -@@ -1760,6 +1773,8 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, +@@ -1760,6 +1804,8 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, } } else { if (item->type != RTE_FLOW_ITEM_TYPE_IPV4 && @@ -78,7 +126,18 @@ index 9aeb2e4..c16ddad 100644 item->type != RTE_FLOW_ITEM_TYPE_VLAN) { memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, -@@ -1888,6 +1903,9 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, +@@ -1815,6 +1861,10 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, + */ + rule->ixgbe_fdir.formatted.flow_type = + IXGBE_ATR_FLOW_TYPE_IPV4; ++ ++ /* Update flow rule mode by global param. */ ++ rule->mode = dev->data->dev_conf.fdir_conf.mode; ++ + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, +@@ -1888,6 +1938,9 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, rule->ixgbe_fdir.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_IPV6; @@ -88,7 +147,7 @@ index 9aeb2e4..c16ddad 100644 /** * 1. must signature match * 2. not support last -@@ -2748,12 +2766,45 @@ ixgbe_parse_fdir_filter_tunnel(const struct rte_flow_attr *attr, +@@ -2748,12 +2801,45 @@ ixgbe_parse_fdir_filter_tunnel(const struct rte_flow_attr *attr, return ixgbe_parse_fdir_act_attr(attr, actions, rule, error); } @@ -135,7 +194,7 @@ index 9aeb2e4..c16ddad 100644 struct rte_flow_error *error) { int ret; -@@ -2787,13 +2838,18 @@ step_next: +@@ -2787,13 +2873,18 @@ step_next: rule->ixgbe_fdir.formatted.dst_port != 0)) return -ENOTSUP; @@ -156,7 +215,7 @@ index 9aeb2e4..c16ddad 100644 return ret; } -@@ -3128,7 +3184,7 @@ ixgbe_flow_create(struct rte_eth_dev *dev, +@@ -3128,7 +3219,7 @@ ixgbe_flow_create(struct rte_eth_dev *dev, memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); ret = ixgbe_parse_fdir_filter(dev, attr, pattern, @@ -165,7 +224,7 @@ index 9aeb2e4..c16ddad 100644 if (!ret) { /* A mask cannot be deleted. */ if (fdir_rule.b_mask) { -@@ -3299,7 +3355,7 @@ ixgbe_flow_validate(struct rte_eth_dev *dev, +@@ -3299,7 +3390,7 @@ ixgbe_flow_validate(struct rte_eth_dev *dev, memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); ret = ixgbe_parse_fdir_filter(dev, attr, pattern, @@ -174,6 +233,24 @@ index 9aeb2e4..c16ddad 100644 if (!ret) return 0; +@@ -3335,7 +3426,7 @@ ixgbe_flow_destroy(struct rte_eth_dev *dev, + struct ixgbe_eth_syn_filter_ele *syn_filter_ptr; + struct ixgbe_eth_l2_tunnel_conf_ele *l2_tn_filter_ptr; + struct ixgbe_fdir_rule_ele *fdir_rule_ptr; +- struct ixgbe_flow_mem *ixgbe_flow_mem_ptr; ++ struct ixgbe_flow_mem *ixgbe_flow_mem_ptr, *next_ptr; + struct ixgbe_hw_fdir_info *fdir_info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); + struct ixgbe_rss_conf_ele *rss_filter_ptr; +@@ -3432,7 +3523,7 @@ ixgbe_flow_destroy(struct rte_eth_dev *dev, + return ret; + } + +- TAILQ_FOREACH(ixgbe_flow_mem_ptr, &ixgbe_flow_list, entries) { ++ TAILQ_FOREACH_SAFE(ixgbe_flow_mem_ptr, &ixgbe_flow_list, entries, next_ptr) { + if (ixgbe_flow_mem_ptr->flow == pmd_flow) { + TAILQ_REMOVE(&ixgbe_flow_list, + ixgbe_flow_mem_ptr, entries); -- 1.8.3.1 diff --git a/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch b/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch index 42f8cf580..665ba4e56 100644 --- a/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch +++ b/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch @@ -1,6 +1,6 @@ -From f90f9a21d7fed4d18ff8fd2429db1d8055737ce7 Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Tue, 29 Jun 2021 11:28:36 +0800 +From ad725a88c0acbd795646cf1d6280c5de7a012044 Mon Sep 17 00:00:00 2001 +From: huangyichen +Date: Fri, 2 Jul 2021 11:57:03 +0800 Subject: [PATCH 5/5] Fix bonding mode 4 problem caused by LACP failure. The problem is disscussed in Issue #725 of iqiyi/dpvs in detail. From 294796361d8d3dd9ea051a3dc5ffb3161bcc417a Mon Sep 17 00:00:00 2001 From: ywc689 Date: Thu, 8 Jul 2021 18:14:17 +0800 Subject: [PATCH 28/41] fix mbuf dynfield size problem and uniform some coding styles --- doc/faq.md | 4 ++-- include/netif.h | 4 ++-- src/mbuf.c | 4 ++-- src/neigh.c | 18 +++++++++--------- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/doc/faq.md b/doc/faq.md index f9e7a5da5..e61ae86e9 100644 --- a/doc/faq.md +++ b/doc/faq.md @@ -118,9 +118,9 @@ Yes, it does support UDP. In order to get the real client IP/port in FullNAT mod ### Does DPVS support IP fragment ? -No, since connection table is per-lcore (per-CPU), and RSS/rte_flow are used for FNAT. Assuming RSS mode is TCP and rte_flow uses L4 info ``. Considered that IP fragment doesn't have L4 info, it needs reassembling first and re-schedule the pkt to **correct** lcore which the 5-tuple flow (connection) belongs to. +No, since connection table is per-lcore (per-CPU), and RSS/rte_flow are used for FNAT. Assuming RSS mode is TCP and rte_flow uses L4 info ``. Considered that IP fragment doesn't have L4 info, it needs reassembling first and re-schedule the packet to **correct** lcore which the 5-tuple flow (connection) belongs to. -May be someday in the future, we will support "pkt re-schedule" on lcores or use L3 (IP) info only for `RSS`/`FDIR`, then we may support fragment. But even we support fragment, it may hurt the performance (reassemble, re-schedule effort) or security. +May be someday in the future, we will support "packet re-schedule" on lcores or use L3 (IP) info only for `RSS` or `flow control`, then we may support fragment. But even we support fragment, it may hurt the performance (reassemble, re-schedule effort) or security. Actually, IPv4 fragment is not recommended, while IPv6 even not support fragment by fixed header, and do not allow re-fragment on middle-boxes. The applications, especially for the datagram-oriented apps, like UDP-apps, should perform PMTU discover algorithm to avoid fragment. TCP is sending sliced *segments*, notifying MSS to peer side and *PMTU discover* is built-in, TCP-app should not need worry about fragment. diff --git a/include/netif.h b/include/netif.h index 8338f0ab6..f95998335 100644 --- a/include/netif.h +++ b/include/netif.h @@ -168,11 +168,11 @@ typedef enum { struct netif_kni { char name[IFNAMSIZ]; - struct rte_kni * kni; + struct rte_kni *kni; struct rte_ether_addr addr; struct dpvs_timer kni_rtnl_timer; int kni_rtnl_fd; - struct rte_ring * rx_ring; + struct rte_ring *rx_ring; } __rte_cache_aligned; union netif_bond { diff --git a/src/mbuf.c b/src/mbuf.c index ab3d930ac..444082e2d 100644 --- a/src/mbuf.c +++ b/src/mbuf.c @@ -197,12 +197,12 @@ int mbuf_init(void) const struct rte_mbuf_dynfield rte_mbuf_userdata_fields[] = { [ MBUF_FIELD_PROTO ] = { - .name = "route", + .name = "protocol", .size = sizeof(mbuf_userdata_field_proto_t), .align = 8, }, [ MBUF_FIELD_ROUTE ] = { - .name = "protocol", + .name = "route", .size = sizeof(mbuf_userdata_field_route_t), .align = 8, }, diff --git a/src/neigh.c b/src/neigh.c index acd0287f4..80341b763 100644 --- a/src/neigh.c +++ b/src/neigh.c @@ -54,7 +54,7 @@ struct raw_neigh { int af; union inet_addr ip_addr; struct rte_ether_addr eth_addr; - struct netif_port * port; + struct netif_port *port; bool add; uint8_t flag; } __rte_cache_aligned; @@ -147,7 +147,7 @@ static lcoreid_t master_cid = 0; static struct list_head neigh_table[DPVS_MAX_LCORE][NEIGH_TAB_SIZE]; -static struct raw_neigh* neigh_ring_clone_entry(const struct neighbour_entry* neighbour, +static struct raw_neigh *neigh_ring_clone_entry(const struct neighbour_entry *neighbour, bool add); static int neigh_send_arp(struct netif_port *port, uint32_t src_ip, uint32_t dst_ip); @@ -245,7 +245,7 @@ static inline int neigh_unhash(struct neighbour_entry *neighbour) } static inline bool neigh_key_cmp(int af, const struct neighbour_entry *neighbour, - const union inet_addr *key, const struct netif_port* port) + const union inet_addr *key, const struct netif_port *port) { return (inet_addr_equal(af, key, &neighbour->ip_addr)) && @@ -333,7 +333,7 @@ static int neighbour_timer_event(void *data) } struct neighbour_entry *neigh_lookup_entry(int af, const union inet_addr *key, - const struct netif_port* port, + const struct netif_port *port, unsigned int hashkey) { struct neighbour_entry *neighbour; @@ -501,7 +501,7 @@ int neigh_resolve_input(struct rte_mbuf *m, struct netif_port *port) unsigned int hashkey; struct inet_ifaddr *ifa; - ifa = inet_addr_ifa_get(AF_INET, port, (union inet_addr*)&arp->arp_data.arp_tip); + ifa = inet_addr_ifa_get(AF_INET, port, (union inet_addr *)&arp->arp_data.arp_tip); if (!ifa) return EDPVS_KNICONTINUE; inet_addr_ifa_put(ifa); @@ -760,10 +760,10 @@ static int neigh_ring_init(void) return EDPVS_OK; } -static struct raw_neigh* neigh_ring_clone_entry(const struct neighbour_entry* neighbour, +static struct raw_neigh *neigh_ring_clone_entry(const struct neighbour_entry *neighbour, bool add) { - struct raw_neigh* mac_param; + struct raw_neigh *mac_param; mac_param = dpvs_mempool_get(neigh_mempool, sizeof(struct raw_neigh)); if (unlikely(mac_param == NULL)) @@ -779,11 +779,11 @@ static struct raw_neigh* neigh_ring_clone_entry(const struct neighbour_entry* ne return mac_param; } -static struct raw_neigh* neigh_ring_clone_param(const struct dp_vs_neigh_conf *param, +static struct raw_neigh *neigh_ring_clone_param(const struct dp_vs_neigh_conf *param, bool add) { struct netif_port *port; - struct raw_neigh* mac_param; + struct raw_neigh *mac_param; mac_param = dpvs_mempool_get(neigh_mempool, sizeof(struct raw_neigh)); if (unlikely(mac_param == NULL)) From 4cdb9e58ec625e41b5e55f32a02a098ecde9d8b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9E=97=E5=89=91=E5=BD=B1?= Date: Wed, 28 Jul 2021 18:03:02 +0800 Subject: [PATCH 29/41] fix dpvs_sockopts sockoptid_t register duplicated in sockopts_exist Signed-off-by: linjianying --- src/ctrl.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/ctrl.c b/src/ctrl.c index 9487e70c8..4b0c55b09 100644 --- a/src/ctrl.c +++ b/src/ctrl.c @@ -1198,10 +1198,18 @@ static inline int sockopts_exist(struct dpvs_sockopts *sockopts) judge_id_betw(sockopts->set_opt_max, skopt->set_opt_min, skopt->set_opt_max)) { return 1; } + if (judge_id_betw(skopt->set_opt_min, sockopts->set_opt_min, sockopts->set_opt_max) || + judge_id_betw(skopt->set_opt_max, sockopts->set_opt_min, sockopts->set_opt_max)) { + return 1; + } if (judge_id_betw(sockopts->get_opt_min, skopt->get_opt_min, skopt->get_opt_max) || judge_id_betw(sockopts->get_opt_max, skopt->get_opt_min, skopt->get_opt_max)) { return 1; } + if (judge_id_betw(skopt->get_opt_min, sockopts->get_opt_min, sockopts->get_opt_max) || + judge_id_betw(skopt->get_opt_max, sockopts->get_opt_min, sockopts->get_opt_max)) { + return 1; + } } return 0; } From c68bd026bb5376ea14313e9e5274715e2e19cdb9 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Mon, 2 Aug 2021 14:12:31 +0800 Subject: [PATCH 30/41] patch: allow bonding slaves from different numa nodes Note the patch may have a negative influnce on performance. It's not a good practice to bonding slaves across numa nodes. Signed-off-by: ywc689 --- ...link-event-for-multicast-driver-part.patch | 4 +- ...dump-change-dpdk-pdump-tool-for-dpvs.patch | 4 +- ...ug-only-enable-dpdk-eal-memory-debug.patch | 4 +- ...w-patch-ixgbe-fdir-rte_flow-for-dpvs.patch | 4 +- ...ode-4-problem-caused-by-LACP-failure.patch | 4 +- ...low-slaves-from-different-numa-nodes.patch | 52 +++++++++++++++++++ 6 files changed, 62 insertions(+), 10 deletions(-) create mode 100644 patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch diff --git a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch index 6baf6bcc0..4672afcc1 100644 --- a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch +++ b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch @@ -1,7 +1,7 @@ -From 8e66b7600c7d8d0e59dd85565986436c0e03a2c8 Mon Sep 17 00:00:00 2001 +From 09760dd24ceaa9a6ce2435cdf9105e1da5d95616 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Thu, 1 Jul 2021 21:21:16 +0800 -Subject: [PATCH 1/5] kni: use netlink event for multicast (driver part) +Subject: [PATCH 1/6] kni: use netlink event for multicast (driver part) Kni driver sends netlink event every time hw-multicast list updated by kernel, the user kni app should capture the event and update multicast diff --git a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch index 7003eed4d..3c87c614a 100644 --- a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch @@ -1,7 +1,7 @@ -From 0ef2e126c8ca9b4a246f680a2a5110b734e9782b Mon Sep 17 00:00:00 2001 +From 60e1eb0a50fc10ca69bcdc4148fdec35576eeedc Mon Sep 17 00:00:00 2001 From: huangyichen Date: Thu, 1 Jul 2021 21:23:50 +0800 -Subject: [PATCH 2/5] pdump: change dpdk-pdump tool for dpvs +Subject: [PATCH 2/6] pdump: change dpdk-pdump tool for dpvs --- app/pdump/main.c | 167 ++++++++++++++++++++++++++++++++++++++++--- diff --git a/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch b/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch index 4b05ab2ae..40d1cbc6f 100644 --- a/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch +++ b/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch @@ -1,7 +1,7 @@ -From 446e9d2ae65c25ca382323bdd7cdec765f357886 Mon Sep 17 00:00:00 2001 +From c9763a0b83bdf0fbf9a5a0ca8c96d999cca62196 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Thu, 1 Jul 2021 21:24:47 +0800 -Subject: [PATCH 3/5] [for debug only] enable dpdk eal memory debug +Subject: [PATCH 3/6] [for debug only] enable dpdk eal memory debug The patch is used for memory debug. To use the patch, configure meson with option -Dc_args="-DRTE_MALLOC_DEBUG" when building dpdk. For example, diff --git a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch index 0699c914e..e77189368 100644 --- a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch @@ -1,7 +1,7 @@ -From fc81d45d6bf23bd6d9b0e7eb9fdc7fe41b559e65 Mon Sep 17 00:00:00 2001 +From 54aad262b48f5904d7f1688b56526908151f5574 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Fri, 2 Jul 2021 11:55:47 +0800 -Subject: [PATCH 4/5] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs +Subject: [PATCH 4/6] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs 1. Ignore fdir flow rule priority attribute. 2. Use different fdir soft-id for flow rules configured for the same queue. diff --git a/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch b/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch index 665ba4e56..da517b839 100644 --- a/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch +++ b/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch @@ -1,7 +1,7 @@ -From ad725a88c0acbd795646cf1d6280c5de7a012044 Mon Sep 17 00:00:00 2001 +From 0d0ba57681393cd9c11a0ac069c6f676b9f34707 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Fri, 2 Jul 2021 11:57:03 +0800 -Subject: [PATCH 5/5] Fix bonding mode 4 problem caused by LACP failure. +Subject: [PATCH 5/6] Fix bonding mode 4 problem caused by LACP failure. The problem is disscussed in Issue #725 of iqiyi/dpvs in detail. https://github.com/iqiyi/dpvs/issues/725 diff --git a/patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch b/patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch new file mode 100644 index 000000000..805fefca7 --- /dev/null +++ b/patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch @@ -0,0 +1,52 @@ +From e6d97675f818b6225209a3cada1e53a0756daedb Mon Sep 17 00:00:00 2001 +From: wencyu +Date: Mon, 2 Aug 2021 13:52:24 +0800 +Subject: [PATCH 6/6] bonding: allow slaves from different numa nodes + +Note the patch may have a negative influnce on performance. +It's not a good practice to bonding slaves across numa nodes. + +Signed-off-by: wencyu +--- + drivers/net/bonding/rte_eth_bond_pmd.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c +index 057b1ad..53f8ba3 100644 +--- a/drivers/net/bonding/rte_eth_bond_pmd.c ++++ b/drivers/net/bonding/rte_eth_bond_pmd.c +@@ -1762,7 +1762,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, + + errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id, + bd_rx_q->nb_rx_desc, +- rte_eth_dev_socket_id(slave_eth_dev->data->port_id), ++ // In spite of performance problem, bonding slaves had better to support ++ // slaves from different numa nodes. Considering that numa node on which ++ // the resources of bonding port is allocated from is specified by ++ // rte_eth_bond_create() at bonding creation, the slave's queue_setup ++ // would fail if specified with the slave's numa node id that is different ++ // from the one of the bonding port. See rte_eth_dma_zone_reserve() for ++ // details. ++ SOCKET_ID_ANY, + &(bd_rx_q->rx_conf), bd_rx_q->mb_pool); + if (errval != 0) { + RTE_BOND_LOG(ERR, +@@ -1778,7 +1785,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, + + errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id, + bd_tx_q->nb_tx_desc, +- rte_eth_dev_socket_id(slave_eth_dev->data->port_id), ++ // In spite of performance problem, bonding slaves had better to support ++ // slaves from different numa nodes. Considering that numa node on which ++ // the resources of bonding port is allocated from is specified by ++ // rte_eth_bond_create() at bonding creation, the slave's queue_setup ++ // would fail if specified with the slave's numa node id that is different ++ // from the one of the bonding port. See rte_eth_dma_zone_reserve() for ++ // details. ++ SOCKET_ID_ANY, + &bd_tx_q->tx_conf); + if (errval != 0) { + RTE_BOND_LOG(ERR, +-- +1.8.3.1 + From 19b3475aaa619db006c02d7789057c2e759abf49 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Mon, 2 Aug 2021 17:37:15 +0800 Subject: [PATCH 31/41] netif: make bonding numa node configurable Signed-off-by: ywc689 --- conf/dpvs.bond.conf.sample | 2 + conf/dpvs.conf.items | 3 +- conf/dpvs.conf.single-bond.sample | 1 + src/netif.c | 125 +++++++++++++++++++----------- 4 files changed, 84 insertions(+), 47 deletions(-) diff --git a/conf/dpvs.bond.conf.sample b/conf/dpvs.bond.conf.sample index a03f2f0e1..7e0d9b3db 100644 --- a/conf/dpvs.bond.conf.sample +++ b/conf/dpvs.bond.conf.sample @@ -90,6 +90,7 @@ netif_defs { slave dpdk0 slave dpdk1 primary dpdk0 + ! numa_node 1 ! /sys/bus/pci/devices/[slaves' pci]/numa_node kni_name bond0.kni } @@ -98,6 +99,7 @@ netif_defs { slave dpdk2 slave dpdk3 primary dpdk2 + ! numa_node 1 ! /sys/bus/pci/devices/[slaves' pci]/numa_node kni_name bond1.kni } } diff --git a/conf/dpvs.conf.items b/conf/dpvs.conf.items index 585f97813..e23c0316a 100644 --- a/conf/dpvs.conf.items +++ b/conf/dpvs.conf.items @@ -56,11 +56,12 @@ netif_defs { ! kni_name dpdk1.kni } - device bond0 { + bonding bond0 { mode 4 <0-6> slave dpdk0 slave dpdk1 primary dpdk0 + numa_node 0 <0, int value from /sys/bus/pci/devices/[pci_bus]/numa_node> kni_name bond0.kni ! supported options: diff --git a/conf/dpvs.conf.single-bond.sample b/conf/dpvs.conf.single-bond.sample index 7df1a4a78..9532e257b 100644 --- a/conf/dpvs.conf.single-bond.sample +++ b/conf/dpvs.conf.single-bond.sample @@ -57,6 +57,7 @@ netif_defs { slave dpdk0 slave dpdk2 primary dpdk0 + ! numa_node 1 ! /sys/bus/pci/devices/[slaves' pci]/numa_node kni_name bond0.kni options dedicated_queues=off } diff --git a/src/netif.c b/src/netif.c index 67a26fe17..5691021cd 100644 --- a/src/netif.c +++ b/src/netif.c @@ -85,7 +85,8 @@ static uint16_t g_nports; /*for arp process*/ static struct rte_ring *arp_ring[DPVS_MAX_LCORE]; -#define NETIF_BOND_MODE_DEF BONDING_MODE_ROUND_ROBIN +#define NETIF_BOND_MODE_DEF BONDING_MODE_ROUND_ROBIN +#define NETIF_BOND_NUMA_NODE_DEF 0 struct port_conf_stream { int port_id; @@ -114,6 +115,7 @@ struct bond_conf_stream { char name[32]; char kni_name[32]; int mode; + int numa_node; char primary[32]; char slaves[NETIF_MAX_BOND_SLAVES][32]; struct bond_options options; @@ -468,6 +470,7 @@ static void bonding_handler(vector_t tokens) RTE_LOG(INFO, NETIF, "netif bonding config: %s\n", str); strncpy(bond_cfg->name, str, sizeof(bond_cfg->name)); bond_cfg->mode = NETIF_BOND_MODE_DEF; + bond_cfg->numa_node = NETIF_BOND_NUMA_NODE_DEF; bond_cfg->options.dedicated_queues_enable = true; list_add(&bond_cfg->bond_list_node, &bond_list); @@ -544,6 +547,27 @@ static void bonding_primary_handler(vector_t tokens) FREE_PTR(str); } +static void bonding_numa_node_handler(vector_t tokens) +{ + char *str = set_value(tokens); + int numa_node; + struct bond_conf_stream *current_bond = list_entry(bond_list.next, + struct bond_conf_stream, bond_list_node); + assert(str); + + numa_node = atoi(str); + if (numa_node >= get_numa_nodes()) { + RTE_LOG(WARNING, NETIF, "invalid bonding %s:numa_node %d, using default %d\n", + current_bond->name, numa_node, NETIF_BOND_NUMA_NODE_DEF); + current_bond->mode = NETIF_BOND_NUMA_NODE_DEF; + } else { + RTE_LOG(INFO, NETIF, "bonding %s:numa_node=%d\n", current_bond->name, numa_node); + current_bond->numa_node = numa_node; + } + + FREE_PTR(str); +} + static void bonding_kni_name_handler(vector_t tokens) { char *str = set_value(tokens); @@ -879,6 +903,7 @@ void install_netif_keywords(void) install_keyword("mode", bonding_mode_handler, KW_TYPE_INIT); install_keyword("slave", bonding_slave_handler, KW_TYPE_INIT); install_keyword("primary", bonding_primary_handler, KW_TYPE_INIT); + install_keyword("numa_node", bonding_numa_node_handler, KW_TYPE_INIT); install_keyword("kni_name", bonding_kni_name_handler, KW_TYPE_INIT); install_keyword("options", bonding_options_handler, KW_TYPE_INIT); install_sublevel_end(); @@ -3767,8 +3792,13 @@ static int relate_bonding_device(void) if (!strcmp(bond_conf->slaves[i], bond_conf->primary)) mport->bond->master.primary = sport; assert(sport->type == PORT_TYPE_GENERAL); - /* FIXME: all slaves share the same socket with master, otherwise kernel crash */ - sport->socket = mport->socket; + if (sport->socket != mport->socket) { + /* FIXME: all slaves share the same socket with master, otherwise kernel crash */ + RTE_LOG(WARNING, NETIF, "%s: %s is created on numa node %d, while its slave %s" + " is on numa node %d\n", __func__, mport->name, mport->socket, + sport->name, sport->socket); + sport->socket = mport->socket; + } sport->type = PORT_TYPE_BOND_SLAVE; sport->bond->slave.master = mport; } @@ -3963,88 +3993,91 @@ static int obtain_dpdk_bond_name(char *dst, const char *ori, size_t size) } /* - * netif_virtual_devices_add must be called before lcore_init and port_init, so calling the - * function immediately after cfgfile_init is recommended. + * netif_virtual_devices_add must be called before lcore_init and port_init, + * so it's recommended to call this function immediately after cfgfile_init. */ int netif_vdevs_add(void) { - portid_t pid; - int socket_id; + int ret; struct bond_conf_stream *bond_cfg; #ifdef NETIF_BONDING_DEBUG - int ii; + int ii, len = 0; + char slavenames[NETIF_MAX_BOND_SLAVES*IFNAMSIZ]; list_for_each_entry_reverse(bond_cfg, &bond_list, bond_list_node) { - if (!bond_cfg->primary[0]) - strncpy(bond_cfg->primary, bond_cfg->slaves[0], sizeof(bond_cfg->primary)); - printf("Add bonding device \"%s\": mode=%d, primary=%s, slaves=\"", - bond_cfg->name, bond_cfg->mode, bond_cfg->primary); - for (ii = 0; ii < NETIF_MAX_BOND_SLAVES && bond_cfg->slaves[ii][0]; ii++) - printf("%s ", bond_cfg->slaves[ii]); - printf("\"\n"); + for (ii = 0; ii < NETIF_MAX_BOND_SLAVES && bond_cfg->slaves[ii][0]; ii++) { + ret = snprintf(&slavenames[len], sizeof(slavenames)-len-1, "%s ", bond_cfg->slaves[ii]); + if (ret >= 0) + len += ret; + } + RTE_LOG(DEBUG, NETIF, "Add bonding device \"%s\"" + "\n\tmode: %d" + "\n\tprimary: %s" + "\n\tnuma_node: %d" + "\n\tslaves: %s\n", + bond_cfg->name, + bond_cfg->mode, + bond_cfg->primary[0] ? bond_cfg->primary : ii > 0 ? bond_cfg->slaves[0] : "", + bond_cfg->numa_node, + slavenames); } #endif + /* set phy_pid_end/bond_pid_base before create bonding device */ phy_pid_end = dpvs_rte_eth_dev_count(); - port_id_end = max(port_id_end, phy_pid_end); - /* set bond_pid_offset before create bonding device */ if (!list_empty(&bond_list)) bond_pid_base = phy_pid_end; list_for_each_entry_reverse(bond_cfg, &bond_list, bond_list_node) { + char bondname[IFNAMSIZ] = {'\0'}; + if (!bond_cfg->slaves[0][0]) { RTE_LOG(WARNING, NETIF, "%s: no slaves configured for %s, skip ...\n", __func__, bond_cfg->name); return EDPVS_INVAL; } + /* use the first slave as primary if not configured */ - if (!bond_cfg->primary[0]) + if (!bond_cfg->primary[0]) { + RTE_LOG(INFO, NETIF, "%s: %s primary slave is not configured, using %s\n", + __func__, bond_cfg->name, bond_cfg->slaves[0]); strncpy(bond_cfg->primary, bond_cfg->slaves[0], sizeof(bond_cfg->primary)); - /* FIXME: which socket should bonding device located on? Ideally, socket of the primary - * bonding slave. But here we cannot obtain slave port id from its name by - * "rte_lcore_to_socket_id" due to the uninitialized netif_port list. - * Here we use master lcore's socket as the compromise. Another solution is to appoint - * the socket id in the cfgfile. - * */ - socket_id = rte_lcore_to_socket_id(rte_lcore_id()); - if (socket_id < 0) { - RTE_LOG(ERR, NETIF, "%s: fail to get socket id for %s\n", - __func__, bond_cfg->name); - return EDPVS_INVAL; } - char dummy_name[IFNAMSIZ] = {'\0'}; - int rc = obtain_dpdk_bond_name(dummy_name, bond_cfg->name, IFNAMSIZ); - if (rc != EDPVS_OK) { - RTE_LOG(ERR, NETIF, "%s: wrong bond device name in config file %s\n", + ret = obtain_dpdk_bond_name(bondname, bond_cfg->name, IFNAMSIZ); + if (ret != EDPVS_OK) { + RTE_LOG(ERR, NETIF, "%s: invalid bonding device name in config file %s\n", __func__, bond_cfg->name); return EDPVS_INVAL; } - /* int pid_rc = rte_eth_bond_create(bond_cfg->name, bond_cfg->mode, socket_id); */ - int pid_rc = rte_eth_bond_create(dummy_name, bond_cfg->mode, socket_id); - if (pid_rc < 0) { - RTE_LOG(ERR, NETIF, "%s: fail to create bonding device %s(mode=%d, socket=%d)\n", - __func__, bond_cfg->name, bond_cfg->mode, socket_id); + + /* Note that all slaves' numa nodes should be the same as the one of bonding, + * otherwise the bonding and slaves cannot link up. Nevertheless, if you are + * to use slaves from different numa nodes, the dpdk patch + * [bonding: allow slaves from different numa nodes] + * should be applied, which may cause negative influence on performance. */ + ret = rte_eth_bond_create(bondname, bond_cfg->mode, bond_cfg->numa_node); + if (ret < 0) { + RTE_LOG(ERR, NETIF, "%s: fail to create bonding device %s: mode=%d, numa_node=%d\n", + __func__, bond_cfg->name, bond_cfg->mode, bond_cfg->numa_node); return EDPVS_CALLBACKFAIL; } - pid = pid_rc; - RTE_LOG(INFO, NETIF, "create bondig device %s: mode=%d, primary=%s, socket=%d\n", - bond_cfg->name, bond_cfg->mode, bond_cfg->primary, socket_id); - bond_cfg->port_id = pid; /* relate port_id with port_name, used by netif_rte_port_alloc */ + bond_cfg->port_id = ret; /* relate port_id with port_name, used by netif_rte_port_alloc */ + RTE_LOG(INFO, NETIF, "created bondig device %s: mode=%d, primary=%s, numa_node=%d\n", + bond_cfg->name, bond_cfg->mode, bond_cfg->primary, bond_cfg->numa_node); + if (bond_cfg->mode == BONDING_MODE_8023AD && bond_cfg->options.dedicated_queues_enable) { if (!rte_eth_bond_8023ad_dedicated_queues_enable(bond_cfg->port_id)) { - RTE_LOG(INFO, NETIF, "bonding mode4 dedicated queues enable failed!\n"); + RTE_LOG(INFO, NETIF, "%s: bonding mode4 dedicated queues enable failed!\n", __func__); } } } if (!list_empty(&bond_list)) { bond_pid_end = dpvs_rte_eth_dev_count(); - port_id_end = max(port_id_end, bond_pid_end); - RTE_LOG(INFO, NETIF, "bonding device port id range: [%d, %d)\n", - bond_pid_base, bond_pid_end); + RTE_LOG(INFO, NETIF, "bonding device port id range: [%d, %d)\n", bond_pid_base, bond_pid_end); } return EDPVS_OK; From 531b178384eac8ecf980c846b7000579e00f49b3 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Tue, 3 Aug 2021 18:47:37 +0800 Subject: [PATCH 32/41] netif: fix kni mac address update problem Signed-off-by: ywc689 --- conf/dpvs.bond.conf.sample | 2 +- conf/dpvs.conf.sample | 2 +- conf/dpvs.conf.single-bond.sample | 2 +- conf/dpvs.conf.single-nic.sample | 2 +- include/conf/common.h | 1 + include/netif.h | 1 + src/common.c | 67 ++++++++++++++++++++++++++----- src/ctrl.c | 2 +- src/ipv6/ipv6.c | 2 +- src/kni.c | 22 ++++------ src/netif.c | 48 +++++++++++----------- 11 files changed, 99 insertions(+), 52 deletions(-) diff --git a/conf/dpvs.bond.conf.sample b/conf/dpvs.bond.conf.sample index 7e0d9b3db..aaea16e98 100644 --- a/conf/dpvs.bond.conf.sample +++ b/conf/dpvs.bond.conf.sample @@ -233,7 +233,7 @@ worker_defs { worker cpu8 { type slave cpu_id 8 - icmp_redirect_core + ! icmp_redirect_core port bond0 { rx_queue_ids 7 tx_queue_ids 7 diff --git a/conf/dpvs.conf.sample b/conf/dpvs.conf.sample index a79622ff2..0585b80c4 100644 --- a/conf/dpvs.conf.sample +++ b/conf/dpvs.conf.sample @@ -193,7 +193,7 @@ worker_defs { worker cpu8 { type slave cpu_id 8 - icmp_redirect_core + ! icmp_redirect_core port dpdk0 { rx_queue_ids 7 tx_queue_ids 7 diff --git a/conf/dpvs.conf.single-bond.sample b/conf/dpvs.conf.single-bond.sample index 9532e257b..ce6a16562 100644 --- a/conf/dpvs.conf.single-bond.sample +++ b/conf/dpvs.conf.single-bond.sample @@ -150,7 +150,7 @@ worker_defs { worker cpu8 { type slave cpu_id 8 - icmp_redirect_core + ! icmp_redirect_core port bond0 { rx_queue_ids 7 tx_queue_ids 7 diff --git a/conf/dpvs.conf.single-nic.sample b/conf/dpvs.conf.single-nic.sample index 7fed5b4e9..faa58b9e4 100644 --- a/conf/dpvs.conf.single-nic.sample +++ b/conf/dpvs.conf.single-nic.sample @@ -125,7 +125,7 @@ worker_defs { worker cpu8 { type slave cpu_id 8 - icmp_redirect_core + ! icmp_redirect_core port dpdk0 { rx_queue_ids 7 tx_queue_ids 7 diff --git a/include/conf/common.h b/include/conf/common.h index 00648ed67..7472ad8f1 100644 --- a/include/conf/common.h +++ b/include/conf/common.h @@ -138,6 +138,7 @@ extern const char *dpvs_strerror(int err); int get_numa_nodes(void); +int linux_get_link_status(const char *ifname, int *if_flags, char *if_flags_str, size_t len); int linux_set_if_mac(const char *ifname, const unsigned char mac[ETH_ALEN]); int linux_hw_mc_add(const char *ifname, const uint8_t hwma[ETH_ALEN]); int linux_hw_mc_del(const char *ifname, const uint8_t hwma[ETH_ALEN]); diff --git a/include/netif.h b/include/netif.h index f95998335..c80a1746c 100644 --- a/include/netif.h +++ b/include/netif.h @@ -193,6 +193,7 @@ struct netif_ops { int (*op_open)(struct netif_port *dev); int (*op_stop)(struct netif_port *dev); int (*op_xmit)(struct rte_mbuf *m, struct netif_port *dev); + int (*op_update_addr)(struct netif_port *dev); int (*op_set_mc_list)(struct netif_port *dev); int (*op_get_queue)(struct netif_port *dev, lcoreid_t cid, queueid_t *qid); int (*op_get_link)(struct netif_port *dev, struct rte_eth_link *link); diff --git a/src/common.c b/src/common.c index 2cde7a9dd..c10505cf4 100644 --- a/src/common.c +++ b/src/common.c @@ -130,34 +130,83 @@ bool is_power2(int num, int offset, int *lower) return ret; } -int linux_set_if_mac(const char *ifname, const unsigned char mac[ETH_ALEN]) +int linux_get_link_status(const char *ifname, int *if_flags, char *if_flags_str, size_t len) { int sock_fd; struct ifreq ifr = {}; - if (!ifname || !mac || !strncmp(ifname, "lo", 2)) + if (!ifname || !if_flags) return EDPVS_INVAL; + *if_flags= 0; + sock_fd = socket(PF_INET, SOCK_DGRAM, 0); if (sock_fd < 0) return EDPVS_SYSCALL; snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", ifname); + if (ioctl(sock_fd, SIOCGIFFLAGS, &ifr)) { + fprintf(stderr, "%s: fail to get %s's flags -- %s\n", + __func__, ifname, strerror(errno)); + close(sock_fd); + return EDPVS_IO; + } + close(sock_fd); + + *if_flags = ifr.ifr_flags; + + if (if_flags_str) { + int idx = 0; + idx += snprintf(&if_flags_str[idx], len-idx-1, "%s:", ifname); + if(*if_flags & IFF_UP) + idx += snprintf(&if_flags_str[idx], len-idx-1, " UP"); + if(*if_flags & IFF_MULTICAST) + idx += snprintf(&if_flags_str[idx], len-idx-1, " MULTICAST"); + if(*if_flags & IFF_BROADCAST) + idx += snprintf(&if_flags_str[idx], len-idx-1, " BROADCAST"); + if(*if_flags & IFF_LOOPBACK) + idx += snprintf(&if_flags_str[idx], len-idx-1, " LOOPBACK"); + if(*if_flags & IFF_POINTOPOINT) + idx += snprintf(&if_flags_str[idx], len-idx-1, " P2P"); + } + + return EDPVS_OK; +} + +int linux_set_if_mac(const char *ifname, const unsigned char mac[ETH_ALEN]) +{ + int err; + int sock_fd, if_flags; + struct ifreq ifr = {}; + + if (!ifname || !mac || !strncmp(ifname, "lo", 2)) + return EDPVS_INVAL; + + err = linux_get_link_status(ifname, &if_flags, NULL, 0); + if (err != EDPVS_OK) + return err; + + if (!(if_flags & IFF_UP)) { + fprintf(stderr, "%s: skip MAC address update of link down device %s\n", + __func__, ifname); + return EDPVS_RESOURCE; + } + + sock_fd = socket(PF_INET, SOCK_DGRAM, 0); + if (sock_fd < 0) + return EDPVS_SYSCALL; + snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", ifname); ifr.ifr_hwaddr.sa_family = 1; memcpy(ifr.ifr_hwaddr.sa_data, mac, ETH_ALEN); if (ioctl(sock_fd, SIOCSIFHWADDR, &ifr)) { - /* DPDK 18.11, 'kni_net_process_request' is called when updating - * device's mac address, in which 'wait_event_interruptible_timeout' - * is used to wait for setting results, which may easily get timeout and - * return fail. We ignore the error here and return OK nevertheless.*/ - fprintf(stderr, "%s: fail to set %s's MAC address: %s\n", + fprintf(stderr, "%s: fail to set %s's MAC address -- %s\n", __func__, ifname, strerror(errno)); close(sock_fd); - return EDPVS_OK; + return EDPVS_IO; } - close(sock_fd); + return EDPVS_OK; } diff --git a/src/ctrl.c b/src/ctrl.c index 9487e70c8..512f06ee0 100644 --- a/src/ctrl.c +++ b/src/ctrl.c @@ -852,7 +852,7 @@ int msg_type_table_print(char *buf, int len) rte_rwlock_read_lock(&mt_lock[ii][jj]); list_for_each_entry(mt, &mt_array[ii][jj], list) { memset(line, 0, sizeof(line)); - snprintf(line, sizeof(line), "mt_array[%-2d][%-4d] type %-8d mode %-12s" + snprintf(line, sizeof(line), "mt_array[%-2d][%-2d] type %-8d mode %-12s" " unicast_cb %p multicast_cb %p\n", ii, jj, mt->type, mt->mode == DPVS_MSG_UNICAST ? "UNICAST" : "MULITICAST", mt->unicast_msg_cb, mt->multicast_msg_cb); diff --git a/src/ipv6/ipv6.c b/src/ipv6/ipv6.c index f7d3a0f11..1fa712110 100644 --- a/src/ipv6/ipv6.c +++ b/src/ipv6/ipv6.c @@ -139,7 +139,7 @@ static void ip6_conf_disable(vector_t tokens) else RTE_LOG(WARNING, IPV6, "invalid ipv6:disable %s\n", str); - RTE_LOG(INFO, IPV6, "ipv6:disable = %s", conf_ipv6_disable ? "on" : "off"); + RTE_LOG(INFO, IPV6, "ipv6:disable = %s\n", conf_ipv6_disable ? "on" : "off"); FREE_PTR(str); } diff --git a/src/kni.c b/src/kni.c index cca74ac81..475f2fdcf 100644 --- a/src/kni.c +++ b/src/kni.c @@ -55,6 +55,14 @@ static void kni_fill_conf(const struct netif_port *dev, const char *ifname, conf->group_id = dev->id; conf->mbuf_size = KNI_DEF_MBUF_SIZE; + /* + * kni device should use same mac as real device, + * because it may config same IP of real device. + * diff mac means kni cannot accept packets sent + * to real-device. + */ + memcpy(conf->mac_addr, dev->addr.addr_bytes, sizeof(conf->mac_addr)); + if (dev->type == PORT_TYPE_GENERAL) { /* dpdk phy device */ rte_eth_dev_info_get(dev->id, &info); #if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0) @@ -361,20 +369,6 @@ int kni_add_dev(struct netif_port *dev, const char *kniname) return err; } - /* - * kni device should use same mac as real device, - * because it may config same IP of real device. - * diff mac means kni cannot accept packets sent - * to real-device. - */ - err = linux_set_if_mac(conf.name, (unsigned char *)&dev->addr); - if (err != EDPVS_OK) { - char mac[18]; - rte_ether_format_addr(mac, sizeof(mac), &dev->addr); - RTE_LOG(WARNING, Kni, "%s: fail to set mac %s for %s: %s\n", - __func__, mac, conf.name, strerror(errno)); - } - snprintf(ring_name, sizeof(ring_name), "kni_rx_ring_%s", conf.name); rb = rte_ring_create(ring_name, KNI_DEF_MBUF_SIZE, diff --git a/src/netif.c b/src/netif.c index 5691021cd..4c287bbd8 100644 --- a/src/netif.c +++ b/src/netif.c @@ -2652,15 +2652,17 @@ static int update_bond_macaddr(struct netif_port *port) { assert(port->type == PORT_TYPE_BOND_MASTER); - int ret = EDPVS_OK; - rte_eth_macaddr_get(port->id, &port->addr); + if (rte_eth_macaddr_get(port->id, &port->addr)) + return EDPVS_NOTEXIST; + if (kni_dev_exist(port)) { - ret = linux_set_if_mac(port->kni.name, (unsigned char *)&port->addr); - if (ret == EDPVS_OK) - rte_ether_addr_copy(&port->addr, &port->kni.addr); + /* if kni device isn't link up, linux_set_if_mac would fail(Timer expired), + * and in this case the warning can be ingored.*/ + linux_set_if_mac(port->kni.name, (unsigned char *)&port->addr); + rte_ether_addr_copy(&port->addr, &port->kni.addr); } - return ret; + return EDPVS_OK; } static inline void free_mbufs(struct rte_mbuf **pkts, unsigned num) @@ -3003,6 +3005,7 @@ static struct netif_ops dpdk_netif_ops = { }; static struct netif_ops bond_netif_ops = { + .op_update_addr = update_bond_macaddr, .op_set_mc_list = bond_set_mc_list, }; @@ -3094,7 +3097,7 @@ static struct netif_port* netif_rte_port_alloc(portid_t id, int nrxq, if (port->socket == SOCKET_ID_ANY) port->socket = rte_socket_id(); port->mbuf_pool = pktmbuf_pool[port->socket]; - rte_eth_macaddr_get((uint8_t)id, &port->addr); + rte_eth_macaddr_get((uint8_t)id, &port->addr); // bonding mac is zero here rte_eth_dev_get_mtu((uint8_t)id, &port->mtu); rte_eth_dev_info_get((uint8_t)id, &port->dev_info); port->dev_conf = *conf; @@ -3452,10 +3455,6 @@ static int add_bond_slaves(struct netif_port *port) __func__, port->name, port->bond->master.primary->name); } - if (update_bond_macaddr(port) != EDPVS_OK) { - RTE_LOG(ERR, NETIF, "%s: fail to update %s's macaddr!\n", __func__, port->name); - return EDPVS_INVAL; - } /* Add a MAC address to an internal array of addresses used to enable whitelist * * filtering to accept packets only if the destination MAC address matches */ for (ii = 0; ii < port->bond->master.slave_nb; ii++) { @@ -3589,7 +3588,7 @@ int netif_port_start(struct netif_port *port) } netif_print_port_conf(&port->dev_conf, buf, &buflen); - RTE_LOG(INFO, NETIF, "device %s configuration:\n%s\n\n", port->name, buf); + RTE_LOG(INFO, NETIF, "device %s configuration:\n%s\n", port->name, buf); // build port-queue-lcore mapping array build_port_queue_lcore_map(); @@ -3627,10 +3626,9 @@ int netif_port_start(struct netif_port *port) rte_eth_promiscuous_enable(port->id); } - /* bonding device's macaddr is updated by its primary device when start, - * so we should update its macaddr after start. */ - if (port->type == PORT_TYPE_BOND_MASTER) - update_bond_macaddr(port); + /* update mac addr to netif_port and netif_kni after start */ + if (port->netif_ops->op_update_addr) + port->netif_ops->op_update_addr(port); /* add in6_addr multicast address */ int cid = 0; @@ -3789,8 +3787,10 @@ static int relate_bonding_device(void) return EDPVS_EXIST; } mport->bond->master.slaves[i] = sport; - if (!strcmp(bond_conf->slaves[i], bond_conf->primary)) + if (!strcmp(bond_conf->slaves[i], bond_conf->primary)) { mport->bond->master.primary = sport; + rte_ether_addr_copy(&sport->addr, &mport->addr); /* use primary slave's macaddr for bonding */ + } assert(sport->type == PORT_TYPE_GENERAL); if (sport->socket != mport->socket) { /* FIXME: all slaves share the same socket with master, otherwise kernel crash */ @@ -4900,9 +4900,10 @@ static int set_bond(struct netif_port *port, const netif_bond_set_t *bond_cfg) port->bond->master.slave_nb--; } } - /* ATTENITON: neighbor get macaddr from port->addr, thus it should be updated */ - if (update_bond_macaddr(port) != EDPVS_OK) - RTE_LOG(ERR, NETIF, "%s: fail to update %s's macaddr!\n", __func__, port->name); + if (port->netif_ops->op_update_addr) { + if (port->netif_ops->op_update_addr(port) != EDPVS_OK) + RTE_LOG(ERR, NETIF, "%s: fail to update %s's mac address!\n", __func__, port->name); + } break; } case OPT_PRIMARY: @@ -4916,9 +4917,10 @@ static int set_bond(struct netif_port *port, const netif_bond_set_t *bond_cfg) port->name, port->bond->master.primary->name, primary->name); port->bond->master.primary = primary; } - /* ATTENITON: neighbor get macaddr from port->addr, thus it should be updated */ - if (update_bond_macaddr(port) != EDPVS_OK) - RTE_LOG(ERR, NETIF, "%s: fail to update %s's macaddr!\n", __func__, port->name); + if (port->netif_ops->op_update_addr) { + if (port->netif_ops->op_update_addr(port) != EDPVS_OK) + RTE_LOG(ERR, NETIF, "%s: fail to update %s's mac address!\n", __func__, port->name); + } break; } case OPT_XMIT_POLICY: From 1e9d03690bf826b98fc9e4a9bccb2c5cfcdd8629 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Wed, 4 Aug 2021 14:59:08 +0800 Subject: [PATCH 33/41] update bonding mode 4 patch --- ...link-event-for-multicast-driver-part.patch | 2 +- ...dump-change-dpdk-pdump-tool-for-dpvs.patch | 2 +- ...ug-only-enable-dpdk-eal-memory-debug.patch | 2 +- ...w-patch-ixgbe-fdir-rte_flow-for-dpvs.patch | 2 +- ... => 0005-Fix-bonding-mode-4-problem.patch} | 33 +++++++++++++++---- ...low-slaves-from-different-numa-nodes.patch | 14 ++++---- 6 files changed, 37 insertions(+), 18 deletions(-) rename patch/dpdk-stable-20.11.1/{0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch => 0005-Fix-bonding-mode-4-problem.patch} (56%) diff --git a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch index 4672afcc1..cc036a39b 100644 --- a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch +++ b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch @@ -1,4 +1,4 @@ -From 09760dd24ceaa9a6ce2435cdf9105e1da5d95616 Mon Sep 17 00:00:00 2001 +From 8e66b7600c7d8d0e59dd85565986436c0e03a2c8 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Thu, 1 Jul 2021 21:21:16 +0800 Subject: [PATCH 1/6] kni: use netlink event for multicast (driver part) diff --git a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch index 3c87c614a..6d2598728 100644 --- a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch @@ -1,4 +1,4 @@ -From 60e1eb0a50fc10ca69bcdc4148fdec35576eeedc Mon Sep 17 00:00:00 2001 +From 0ef2e126c8ca9b4a246f680a2a5110b734e9782b Mon Sep 17 00:00:00 2001 From: huangyichen Date: Thu, 1 Jul 2021 21:23:50 +0800 Subject: [PATCH 2/6] pdump: change dpdk-pdump tool for dpvs diff --git a/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch b/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch index 40d1cbc6f..cfd9473b1 100644 --- a/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch +++ b/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch @@ -1,4 +1,4 @@ -From c9763a0b83bdf0fbf9a5a0ca8c96d999cca62196 Mon Sep 17 00:00:00 2001 +From 446e9d2ae65c25ca382323bdd7cdec765f357886 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Thu, 1 Jul 2021 21:24:47 +0800 Subject: [PATCH 3/6] [for debug only] enable dpdk eal memory debug diff --git a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch index e77189368..09c791e95 100644 --- a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch @@ -1,4 +1,4 @@ -From 54aad262b48f5904d7f1688b56526908151f5574 Mon Sep 17 00:00:00 2001 +From fc81d45d6bf23bd6d9b0e7eb9fdc7fe41b559e65 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Fri, 2 Jul 2021 11:55:47 +0800 Subject: [PATCH 4/6] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs diff --git a/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch b/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem.patch similarity index 56% rename from patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch rename to patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem.patch index da517b839..352059891 100644 --- a/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem-caused-by-LACP-failure.patch +++ b/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem.patch @@ -1,13 +1,14 @@ -From 0d0ba57681393cd9c11a0ac069c6f676b9f34707 Mon Sep 17 00:00:00 2001 +From b6d30dfa410c5515ecc153dbe68df374f148ee76 Mon Sep 17 00:00:00 2001 From: huangyichen -Date: Fri, 2 Jul 2021 11:57:03 +0800 -Subject: [PATCH 5/6] Fix bonding mode 4 problem caused by LACP failure. +Date: Wed, 4 Aug 2021 14:02:09 +0800 +Subject: [PATCH 5/6] Fix bonding mode 4 problem: -The problem is disscussed in Issue #725 of iqiyi/dpvs in detail. -https://github.com/iqiyi/dpvs/issues/725 +1. Faulted lacp negotiation, The problem is disscussed in Issue [#725](https://github.com/iqiyi/dpvs/issues/725) of iqiyi/dpvs in detail. +2. Multicast packet miss fetch caused by slow protocol process. --- drivers/net/bonding/rte_eth_bond_8023ad.c | 20 +++++++++++--------- - 1 file changed, 11 insertions(+), 9 deletions(-) + drivers/net/bonding/rte_eth_bond_pmd.c | 9 +++++++++ + 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c index 5fe004e..52bd960 100644 @@ -55,6 +56,26 @@ index 5fe004e..52bd960 100644 } periodic_machine(internals, slave_id); +diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c +index 057b1ad..37973ce 100644 +--- a/drivers/net/bonding/rte_eth_bond_pmd.c ++++ b/drivers/net/bonding/rte_eth_bond_pmd.c +@@ -322,6 +322,15 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, + + /* Handle slow protocol packets. */ + while (j < num_rx_total) { ++ /* If packet is not pure L2 and is known: ++ * Such as OSPF protocol multcast packet, ++ * we want to handle it in user mode by ourselves, ++ * skip slow protocol flow*/ ++ if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) { ++ j++; ++ continue; ++ } ++ + if (j + 3 < num_rx_total) + rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *)); + -- 1.8.3.1 diff --git a/patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch b/patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch index 805fefca7..3ee872e84 100644 --- a/patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch +++ b/patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch @@ -1,21 +1,19 @@ -From e6d97675f818b6225209a3cada1e53a0756daedb Mon Sep 17 00:00:00 2001 -From: wencyu -Date: Mon, 2 Aug 2021 13:52:24 +0800 +From 256eb959c5a3ae3661568028271367f2facd709b Mon Sep 17 00:00:00 2001 +From: huangyichen +Date: Wed, 4 Aug 2021 14:27:34 +0800 Subject: [PATCH 6/6] bonding: allow slaves from different numa nodes Note the patch may have a negative influnce on performance. It's not a good practice to bonding slaves across numa nodes. - -Signed-off-by: wencyu --- drivers/net/bonding/rte_eth_bond_pmd.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c -index 057b1ad..53f8ba3 100644 +index 37973ce..f31cd4d 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c -@@ -1762,7 +1762,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, +@@ -1771,7 +1771,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id, bd_rx_q->nb_rx_desc, @@ -31,7 +29,7 @@ index 057b1ad..53f8ba3 100644 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool); if (errval != 0) { RTE_BOND_LOG(ERR, -@@ -1778,7 +1785,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, +@@ -1787,7 +1794,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id, bd_tx_q->nb_tx_desc, From 7dae11815426a8af66ac32befbdb75d8c41e7e8b Mon Sep 17 00:00:00 2001 From: huangyichen Date: Wed, 4 Aug 2021 15:24:44 +0800 Subject: [PATCH 34/41] clean whitespace --- .../0005-Fix-bonding-mode-4-problem.patch | 14 +++++++------- ...ng-allow-slaves-from-different-numa-nodes.patch | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem.patch b/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem.patch index 352059891..a89e4e57f 100644 --- a/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem.patch +++ b/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem.patch @@ -1,6 +1,6 @@ -From b6d30dfa410c5515ecc153dbe68df374f148ee76 Mon Sep 17 00:00:00 2001 +From 3faf16f377052bb9f40c0d17c2590c1a0884abd1 Mon Sep 17 00:00:00 2001 From: huangyichen -Date: Wed, 4 Aug 2021 14:02:09 +0800 +Date: Wed, 4 Aug 2021 15:14:04 +0800 Subject: [PATCH 5/6] Fix bonding mode 4 problem: 1. Faulted lacp negotiation, The problem is disscussed in Issue [#725](https://github.com/iqiyi/dpvs/issues/725) of iqiyi/dpvs in detail. @@ -57,17 +57,17 @@ index 5fe004e..52bd960 100644 periodic_machine(internals, slave_id); diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c -index 057b1ad..37973ce 100644 +index 057b1ad..27f35fb 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c @@ -322,6 +322,15 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, /* Handle slow protocol packets. */ while (j < num_rx_total) { -+ /* If packet is not pure L2 and is known: -+ * Such as OSPF protocol multcast packet, -+ * we want to handle it in user mode by ourselves, -+ * skip slow protocol flow*/ ++ /* If packet is not pure L2 and is known: ++ * Such as OSPF protocol multcast packet, ++ * we want to handle it in user mode by ourselves, ++ * skip slow protocol flow*/ + if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) { + j++; + continue; diff --git a/patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch b/patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch index 3ee872e84..29a7aa050 100644 --- a/patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch +++ b/patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch @@ -1,6 +1,6 @@ -From 256eb959c5a3ae3661568028271367f2facd709b Mon Sep 17 00:00:00 2001 +From ca13a127a220fecbcc080f208339c455e12a61a8 Mon Sep 17 00:00:00 2001 From: huangyichen -Date: Wed, 4 Aug 2021 14:27:34 +0800 +Date: Wed, 4 Aug 2021 15:16:04 +0800 Subject: [PATCH 6/6] bonding: allow slaves from different numa nodes Note the patch may have a negative influnce on performance. @@ -10,7 +10,7 @@ It's not a good practice to bonding slaves across numa nodes. 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c -index 37973ce..f31cd4d 100644 +index 27f35fb..7184e0b 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c @@ -1771,7 +1771,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, From a1fa2a0c7f26f97e0f407ee3a6382a84d3a3c888 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Thu, 5 Aug 2021 11:15:58 +0800 Subject: [PATCH 35/41] neigh: fix -Wpacked-not-aligned error Signed-off-by: ywc689 --- include/conf/neigh.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/conf/neigh.h b/include/conf/neigh.h index afd874d06..618cad2a7 100644 --- a/include/conf/neigh.h +++ b/include/conf/neigh.h @@ -34,7 +34,6 @@ enum { struct dp_vs_neigh_conf { int af; - uint8_t flag; uint32_t state; union inet_addr ip_addr; #ifdef __DPVS__ @@ -44,6 +43,7 @@ struct dp_vs_neigh_conf { #endif uint32_t que_num; char ifname[IFNAMSIZ]; + uint8_t flag; uint8_t cid; }__attribute__((__packed__)); From 2352f549870fbd6092396ae21f357317d41aac5d Mon Sep 17 00:00:00 2001 From: ywc689 Date: Wed, 11 Aug 2021 13:48:50 +0800 Subject: [PATCH 36/41] patch: don't drop multicast/broadcast packets when all-multicast isn't enabled in rx_burst_8023ad Signed-off-by: ywc689 --- ...link-event-for-multicast-driver-part.patch | 2 +- ...dump-change-dpdk-pdump-tool-for-dpvs.patch | 2 +- ...> 0003-enable-dpdk-eal-memory-debug.patch} | 4 +- ...w-patch-ixgbe-fdir-rte_flow-for-dpvs.patch | 2 +- .../0005-Fix-bonding-mode-4-problem.patch | 81 ------------ ...ow-slaves-from-different-numa-nodes.patch} | 10 +- .../0006-fix-bonding-mode-4-problems.patch | 117 ++++++++++++++++++ 7 files changed, 127 insertions(+), 91 deletions(-) rename patch/dpdk-stable-20.11.1/{0003-for-debug-only-enable-dpdk-eal-memory-debug.patch => 0003-enable-dpdk-eal-memory-debug.patch} (93%) delete mode 100644 patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem.patch rename patch/dpdk-stable-20.11.1/{0006-bonding-allow-slaves-from-different-numa-nodes.patch => 0005-bonding-allow-slaves-from-different-numa-nodes.patch} (87%) create mode 100644 patch/dpdk-stable-20.11.1/0006-fix-bonding-mode-4-problems.patch diff --git a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch index cc036a39b..b0da7c08b 100644 --- a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch +++ b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch @@ -1,4 +1,4 @@ -From 8e66b7600c7d8d0e59dd85565986436c0e03a2c8 Mon Sep 17 00:00:00 2001 +From cf3358c7c85f82fab48c7aef1b9e2ac191e550f9 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Thu, 1 Jul 2021 21:21:16 +0800 Subject: [PATCH 1/6] kni: use netlink event for multicast (driver part) diff --git a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch index 6d2598728..17647f9bf 100644 --- a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch @@ -1,4 +1,4 @@ -From 0ef2e126c8ca9b4a246f680a2a5110b734e9782b Mon Sep 17 00:00:00 2001 +From 4cf2ce035c095d3f384f4493e27b37e8f76aba7f Mon Sep 17 00:00:00 2001 From: huangyichen Date: Thu, 1 Jul 2021 21:23:50 +0800 Subject: [PATCH 2/6] pdump: change dpdk-pdump tool for dpvs diff --git a/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch b/patch/dpdk-stable-20.11.1/0003-enable-dpdk-eal-memory-debug.patch similarity index 93% rename from patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch rename to patch/dpdk-stable-20.11.1/0003-enable-dpdk-eal-memory-debug.patch index cfd9473b1..8c74a40e8 100644 --- a/patch/dpdk-stable-20.11.1/0003-for-debug-only-enable-dpdk-eal-memory-debug.patch +++ b/patch/dpdk-stable-20.11.1/0003-enable-dpdk-eal-memory-debug.patch @@ -1,7 +1,7 @@ -From 446e9d2ae65c25ca382323bdd7cdec765f357886 Mon Sep 17 00:00:00 2001 +From 08c6044b4111087f8aa48df2a56b3b802e2636ee Mon Sep 17 00:00:00 2001 From: huangyichen Date: Thu, 1 Jul 2021 21:24:47 +0800 -Subject: [PATCH 3/6] [for debug only] enable dpdk eal memory debug +Subject: [PATCH 3/6] enable dpdk eal memory debug The patch is used for memory debug. To use the patch, configure meson with option -Dc_args="-DRTE_MALLOC_DEBUG" when building dpdk. For example, diff --git a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch index 09c791e95..48ea429c9 100644 --- a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch @@ -1,4 +1,4 @@ -From fc81d45d6bf23bd6d9b0e7eb9fdc7fe41b559e65 Mon Sep 17 00:00:00 2001 +From c33ce99ea12869a590ba2644eeef68c63feae8c5 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Fri, 2 Jul 2021 11:55:47 +0800 Subject: [PATCH 4/6] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs diff --git a/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem.patch b/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem.patch deleted file mode 100644 index a89e4e57f..000000000 --- a/patch/dpdk-stable-20.11.1/0005-Fix-bonding-mode-4-problem.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 3faf16f377052bb9f40c0d17c2590c1a0884abd1 Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Wed, 4 Aug 2021 15:14:04 +0800 -Subject: [PATCH 5/6] Fix bonding mode 4 problem: - -1. Faulted lacp negotiation, The problem is disscussed in Issue [#725](https://github.com/iqiyi/dpvs/issues/725) of iqiyi/dpvs in detail. -2. Multicast packet miss fetch caused by slow protocol process. ---- - drivers/net/bonding/rte_eth_bond_8023ad.c | 20 +++++++++++--------- - drivers/net/bonding/rte_eth_bond_pmd.c | 9 +++++++++ - 2 files changed, 20 insertions(+), 9 deletions(-) - -diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c -index 5fe004e..52bd960 100644 ---- a/drivers/net/bonding/rte_eth_bond_8023ad.c -+++ b/drivers/net/bonding/rte_eth_bond_8023ad.c -@@ -831,7 +831,6 @@ bond_mode_8023ad_periodic_cb(void *arg) - struct port *port; - struct rte_eth_link link_info; - struct rte_ether_addr slave_addr; -- struct rte_mbuf *lacp_pkt = NULL; - uint16_t slave_id; - uint16_t i; - -@@ -903,6 +902,7 @@ bond_mode_8023ad_periodic_cb(void *arg) - /* Find LACP packet to this port. Do not check subtype, - * it is done in function that queued packet - */ -+ struct rte_mbuf *lacp_pkt = NULL; - int retval = rte_ring_dequeue(port->rx_ring, - (void **)&lacp_pkt); - -@@ -911,15 +911,17 @@ bond_mode_8023ad_periodic_cb(void *arg) - - rx_machine_update(internals, slave_id, lacp_pkt); - } else { -- uint16_t rx_count = rte_eth_rx_burst(slave_id, -- internals->mode4.dedicated_queues.rx_qid, -- &lacp_pkt, 1); -- -- if (rx_count == 1) -- bond_mode_8023ad_handle_slow_pkt(internals, -- slave_id, lacp_pkt); -- else -+ uint16_t rx_count, j; -+ struct rte_mbuf *lacp_pkt[16] = { NULL }; -+ -+ rx_count = rte_eth_rx_burst(slave_id, internals->mode4.dedicated_queues.rx_qid, -+ &lacp_pkt[0], sizeof(lacp_pkt)/sizeof(struct rte_mbuf *)); -+ if (rx_count > 0) { -+ for (j = 0; j < rx_count; j++) -+ bond_mode_8023ad_handle_slow_pkt(internals, slave_id, lacp_pkt[j]); -+ } else { - rx_machine_update(internals, slave_id, NULL); -+ } - } - - periodic_machine(internals, slave_id); -diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c -index 057b1ad..27f35fb 100644 ---- a/drivers/net/bonding/rte_eth_bond_pmd.c -+++ b/drivers/net/bonding/rte_eth_bond_pmd.c -@@ -322,6 +322,15 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, - - /* Handle slow protocol packets. */ - while (j < num_rx_total) { -+ /* If packet is not pure L2 and is known: -+ * Such as OSPF protocol multcast packet, -+ * we want to handle it in user mode by ourselves, -+ * skip slow protocol flow*/ -+ if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) { -+ j++; -+ continue; -+ } -+ - if (j + 3 < num_rx_total) - rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *)); - --- -1.8.3.1 - diff --git a/patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch b/patch/dpdk-stable-20.11.1/0005-bonding-allow-slaves-from-different-numa-nodes.patch similarity index 87% rename from patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch rename to patch/dpdk-stable-20.11.1/0005-bonding-allow-slaves-from-different-numa-nodes.patch index 29a7aa050..86ddda763 100644 --- a/patch/dpdk-stable-20.11.1/0006-bonding-allow-slaves-from-different-numa-nodes.patch +++ b/patch/dpdk-stable-20.11.1/0005-bonding-allow-slaves-from-different-numa-nodes.patch @@ -1,7 +1,7 @@ -From ca13a127a220fecbcc080f208339c455e12a61a8 Mon Sep 17 00:00:00 2001 +From fc6f70475f8c4cf6870e0e19d4976aebc4ba1a94 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Wed, 4 Aug 2021 15:16:04 +0800 -Subject: [PATCH 6/6] bonding: allow slaves from different numa nodes +Subject: [PATCH 5/6] bonding: allow slaves from different numa nodes Note the patch may have a negative influnce on performance. It's not a good practice to bonding slaves across numa nodes. @@ -10,10 +10,10 @@ It's not a good practice to bonding slaves across numa nodes. 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c -index 27f35fb..7184e0b 100644 +index 057b1ad..53f8ba3 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c -@@ -1771,7 +1771,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, +@@ -1762,7 +1762,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id, bd_rx_q->nb_rx_desc, @@ -29,7 +29,7 @@ index 27f35fb..7184e0b 100644 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool); if (errval != 0) { RTE_BOND_LOG(ERR, -@@ -1787,7 +1794,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, +@@ -1778,7 +1785,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id, bd_tx_q->nb_tx_desc, diff --git a/patch/dpdk-stable-20.11.1/0006-fix-bonding-mode-4-problems.patch b/patch/dpdk-stable-20.11.1/0006-fix-bonding-mode-4-problems.patch new file mode 100644 index 000000000..d4bdff534 --- /dev/null +++ b/patch/dpdk-stable-20.11.1/0006-fix-bonding-mode-4-problems.patch @@ -0,0 +1,117 @@ +From 05e8db13b7bb119d99760ae8cac33f8c5543eae7 Mon Sep 17 00:00:00 2001 +From: huangyichen +Date: Wed, 4 Aug 2021 15:14:04 +0800 +Subject: [PATCH 6/6] fix bonding mode 4 problems + +1. Faulted lacp negotiation that is disscussed in Issue [#725](https://github.com/iqiyi/dpvs/issues/725) of iqiyi/dpvs in detail. +2. Don't drop multicast/broadcast packets when all-multicast isn't enabled in rx_burst_8023ad. +--- + drivers/net/bonding/rte_eth_bond_8023ad.c | 20 +++++++++++--------- + drivers/net/bonding/rte_eth_bond_pmd.c | 23 +++++++++++++---------- + 2 files changed, 24 insertions(+), 19 deletions(-) + +diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c +index 5fe004e..52bd960 100644 +--- a/drivers/net/bonding/rte_eth_bond_8023ad.c ++++ b/drivers/net/bonding/rte_eth_bond_8023ad.c +@@ -831,7 +831,6 @@ bond_mode_8023ad_periodic_cb(void *arg) + struct port *port; + struct rte_eth_link link_info; + struct rte_ether_addr slave_addr; +- struct rte_mbuf *lacp_pkt = NULL; + uint16_t slave_id; + uint16_t i; + +@@ -903,6 +902,7 @@ bond_mode_8023ad_periodic_cb(void *arg) + /* Find LACP packet to this port. Do not check subtype, + * it is done in function that queued packet + */ ++ struct rte_mbuf *lacp_pkt = NULL; + int retval = rte_ring_dequeue(port->rx_ring, + (void **)&lacp_pkt); + +@@ -911,15 +911,17 @@ bond_mode_8023ad_periodic_cb(void *arg) + + rx_machine_update(internals, slave_id, lacp_pkt); + } else { +- uint16_t rx_count = rte_eth_rx_burst(slave_id, +- internals->mode4.dedicated_queues.rx_qid, +- &lacp_pkt, 1); +- +- if (rx_count == 1) +- bond_mode_8023ad_handle_slow_pkt(internals, +- slave_id, lacp_pkt); +- else ++ uint16_t rx_count, j; ++ struct rte_mbuf *lacp_pkt[16] = { NULL }; ++ ++ rx_count = rte_eth_rx_burst(slave_id, internals->mode4.dedicated_queues.rx_qid, ++ &lacp_pkt[0], sizeof(lacp_pkt)/sizeof(struct rte_mbuf *)); ++ if (rx_count > 0) { ++ for (j = 0; j < rx_count; j++) ++ bond_mode_8023ad_handle_slow_pkt(internals, slave_id, lacp_pkt[j]); ++ } else { + rx_machine_update(internals, slave_id, NULL); ++ } + } + + periodic_machine(internals, slave_id); +diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c +index 53f8ba3..72acb61 100644 +--- a/drivers/net/bonding/rte_eth_bond_pmd.c ++++ b/drivers/net/bonding/rte_eth_bond_pmd.c +@@ -291,7 +291,6 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, + + uint8_t collecting; /* current slave collecting status */ + const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id); +- const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id); + uint8_t subtype; + uint16_t i; + uint16_t j; +@@ -322,6 +321,15 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, + + /* Handle slow protocol packets. */ + while (j < num_rx_total) { ++ /* If packet is not pure L2 and is known: ++ * Such as OSPF protocol multcast packet, ++ * we want to handle it in user mode by ourselves, ++ * skip slow protocol flow */ ++ if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) { ++ j++; ++ continue; ++ } ++ + if (j + 3 < num_rx_total) + rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *)); + +@@ -331,10 +339,8 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, + /* Remove packet from array if: + * - it is slow packet but no dedicated rxq is present, + * - slave is not in collecting state, +- * - bonding interface is not in promiscuous mode: +- * - packet is unicast and address does not match, +- * - packet is multicast and bonding interface +- * is not in allmulti, ++ * - bonding interface is not in promiscuous mode and ++ * packet is unicast and address does not match, + */ + if (unlikely( + (!dedicated_rxq && +@@ -342,12 +348,9 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, + bufs[j])) || + !collecting || + (!promisc && +- ((rte_is_unicast_ether_addr(&hdr->d_addr) && ++ (rte_is_unicast_ether_addr(&hdr->d_addr) && + !rte_is_same_ether_addr(bond_mac, +- &hdr->d_addr)) || +- (!allmulti && +- rte_is_multicast_ether_addr(&hdr->d_addr)))))) { +- ++ &hdr->d_addr))))) { + if (hdr->ether_type == ether_type_slow_be) { + bond_mode_8023ad_handle_slow_pkt( + internals, slaves[idx], bufs[j]); +-- +1.8.3.1 + From 625286a8dbb9ac321c7cbfffa6105a2d487310dd Mon Sep 17 00:00:00 2001 From: ywc689 Date: Mon, 16 Aug 2021 14:24:15 +0800 Subject: [PATCH 37/41] netif: fix several logging problem 1. correct log of bonding mode4 dedicated queue enable 2. polish logs in netif_tx_burst and increase error log level Signed-off-by: ywc689 --- src/netif.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/netif.c b/src/netif.c index 4c287bbd8..3a094ca25 100644 --- a/src/netif.c +++ b/src/netif.c @@ -2008,7 +2008,7 @@ static int netif_print_isol_lcore_conf(lcoreid_t cid, char *buf, int *len, bool static inline void netif_tx_burst(lcoreid_t cid, portid_t pid, queueid_t qindex) { - int ntx, ii; + int ntx; struct netif_queue_conf *txq; unsigned i = 0; struct rte_mbuf *mbuf_copied = NULL; @@ -2024,7 +2024,7 @@ static inline void netif_tx_burst(lcoreid_t cid, portid_t pid, queueid_t qindex) for (; i < txq->len; i++) { if (NULL == (mbuf_copied = mbuf_copy(txq->mbufs[i], pktmbuf_pool[dev->socket]))) - RTE_LOG(WARNING, NETIF, "%s: Failed to copy mbuf\n", __func__); + RTE_LOG(WARNING, NETIF, "%s: fail to copy outbound mbuf into kni\n", __func__); else kni_ingress(mbuf_copied, dev); } @@ -2034,10 +2034,12 @@ static inline void netif_tx_burst(lcoreid_t cid, portid_t pid, queueid_t qindex) lcore_stats[cid].opackets += ntx; /* do not calculate obytes here in consideration of efficency */ if (unlikely(ntx < txq->len)) { - RTE_LOG(DEBUG, NETIF, "Fail to send %d packets on dpdk%d tx%d\n", ntx,pid, txq->id); + RTE_LOG(INFO, NETIF, "fail to send %d of %d packets on dpdk port %d txq %d\n", + txq->len - ntx, txq->len, pid, txq->id); lcore_stats[cid].dropped += txq->len - ntx; - for (ii = ntx; ii < txq->len; ii++) - rte_pktmbuf_free(txq->mbufs[ii]); + do { + rte_pktmbuf_free(txq->mbufs[ntx]); + } while (++ntx < txq->len); } } @@ -4068,7 +4070,7 @@ int netif_vdevs_add(void) bond_cfg->name, bond_cfg->mode, bond_cfg->primary, bond_cfg->numa_node); if (bond_cfg->mode == BONDING_MODE_8023AD && bond_cfg->options.dedicated_queues_enable) { - if (!rte_eth_bond_8023ad_dedicated_queues_enable(bond_cfg->port_id)) { + if (rte_eth_bond_8023ad_dedicated_queues_enable(bond_cfg->port_id)) { RTE_LOG(INFO, NETIF, "%s: bonding mode4 dedicated queues enable failed!\n", __func__); } } From b5368f7d53d680463fdda1f19f00e1bb856ebb1b Mon Sep 17 00:00:00 2001 From: ywc689 Date: Thu, 26 Aug 2021 21:12:14 +0800 Subject: [PATCH 38/41] patch: don't drop lacp packets received from worker queues when dedicated queue enabled Signed-off-by: ywc689 --- ...link-event-for-multicast-driver-part.patch | 2 +- ...dump-change-dpdk-pdump-tool-for-dpvs.patch | 2 +- ...-debug-enable-dpdk-eal-memory-debug.patch} | 4 +- ...w-patch-ixgbe-fdir-rte_flow-for-dpvs.patch | 2 +- ...low-slaves-from-different-numa-nodes.patch | 2 +- ...bonding-fix-bonding-mode-4-problems.patch} | 59 ++++++++++++++----- 6 files changed, 49 insertions(+), 22 deletions(-) rename patch/dpdk-stable-20.11.1/{0003-enable-dpdk-eal-memory-debug.patch => 0003-debug-enable-dpdk-eal-memory-debug.patch} (93%) rename patch/dpdk-stable-20.11.1/{0006-fix-bonding-mode-4-problems.patch => 0006-bonding-fix-bonding-mode-4-problems.patch} (66%) diff --git a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch index b0da7c08b..e39254c24 100644 --- a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch +++ b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch @@ -1,4 +1,4 @@ -From cf3358c7c85f82fab48c7aef1b9e2ac191e550f9 Mon Sep 17 00:00:00 2001 +From 5b032cc0d59f9fe2e9607423a92399254e30a8f7 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Thu, 1 Jul 2021 21:21:16 +0800 Subject: [PATCH 1/6] kni: use netlink event for multicast (driver part) diff --git a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch index 17647f9bf..89d3f4c47 100644 --- a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch @@ -1,4 +1,4 @@ -From 4cf2ce035c095d3f384f4493e27b37e8f76aba7f Mon Sep 17 00:00:00 2001 +From 8d1dc22740a315d62596445beba8b8737c45ffa4 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Thu, 1 Jul 2021 21:23:50 +0800 Subject: [PATCH 2/6] pdump: change dpdk-pdump tool for dpvs diff --git a/patch/dpdk-stable-20.11.1/0003-enable-dpdk-eal-memory-debug.patch b/patch/dpdk-stable-20.11.1/0003-debug-enable-dpdk-eal-memory-debug.patch similarity index 93% rename from patch/dpdk-stable-20.11.1/0003-enable-dpdk-eal-memory-debug.patch rename to patch/dpdk-stable-20.11.1/0003-debug-enable-dpdk-eal-memory-debug.patch index 8c74a40e8..03ff38ba7 100644 --- a/patch/dpdk-stable-20.11.1/0003-enable-dpdk-eal-memory-debug.patch +++ b/patch/dpdk-stable-20.11.1/0003-debug-enable-dpdk-eal-memory-debug.patch @@ -1,7 +1,7 @@ -From 08c6044b4111087f8aa48df2a56b3b802e2636ee Mon Sep 17 00:00:00 2001 +From e31fd685ced591060571375c70c69cd8ccf8dad9 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Thu, 1 Jul 2021 21:24:47 +0800 -Subject: [PATCH 3/6] enable dpdk eal memory debug +Subject: [PATCH 3/6] debug: enable dpdk eal memory debug The patch is used for memory debug. To use the patch, configure meson with option -Dc_args="-DRTE_MALLOC_DEBUG" when building dpdk. For example, diff --git a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch index 48ea429c9..ef7eda6d4 100644 --- a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch +++ b/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch @@ -1,4 +1,4 @@ -From c33ce99ea12869a590ba2644eeef68c63feae8c5 Mon Sep 17 00:00:00 2001 +From 965c6ebd04d49ba578bab321ea87768669a2c7d1 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Fri, 2 Jul 2021 11:55:47 +0800 Subject: [PATCH 4/6] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs diff --git a/patch/dpdk-stable-20.11.1/0005-bonding-allow-slaves-from-different-numa-nodes.patch b/patch/dpdk-stable-20.11.1/0005-bonding-allow-slaves-from-different-numa-nodes.patch index 86ddda763..473bec74c 100644 --- a/patch/dpdk-stable-20.11.1/0005-bonding-allow-slaves-from-different-numa-nodes.patch +++ b/patch/dpdk-stable-20.11.1/0005-bonding-allow-slaves-from-different-numa-nodes.patch @@ -1,4 +1,4 @@ -From fc6f70475f8c4cf6870e0e19d4976aebc4ba1a94 Mon Sep 17 00:00:00 2001 +From a6393a8d04f1c8a4b324782aa5e242e10043a197 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Wed, 4 Aug 2021 15:16:04 +0800 Subject: [PATCH 5/6] bonding: allow slaves from different numa nodes diff --git a/patch/dpdk-stable-20.11.1/0006-fix-bonding-mode-4-problems.patch b/patch/dpdk-stable-20.11.1/0006-bonding-fix-bonding-mode-4-problems.patch similarity index 66% rename from patch/dpdk-stable-20.11.1/0006-fix-bonding-mode-4-problems.patch rename to patch/dpdk-stable-20.11.1/0006-bonding-fix-bonding-mode-4-problems.patch index d4bdff534..d2e53511a 100644 --- a/patch/dpdk-stable-20.11.1/0006-fix-bonding-mode-4-problems.patch +++ b/patch/dpdk-stable-20.11.1/0006-bonding-fix-bonding-mode-4-problems.patch @@ -1,14 +1,15 @@ -From 05e8db13b7bb119d99760ae8cac33f8c5543eae7 Mon Sep 17 00:00:00 2001 +From 38db21e38a36527a0e2e26f01a4b1f1bfd10c3d6 Mon Sep 17 00:00:00 2001 From: huangyichen Date: Wed, 4 Aug 2021 15:14:04 +0800 -Subject: [PATCH 6/6] fix bonding mode 4 problems +Subject: [PATCH 6/6] bonding: fix bonding mode 4 problems -1. Faulted lacp negotiation that is disscussed in Issue [#725](https://github.com/iqiyi/dpvs/issues/725) of iqiyi/dpvs in detail. +1. Fix lacp packet receipt problem that is disscussed in issue [#725](https://github.com/iqiyi/dpvs/issues/725) of iqiyi/dpvs in detail. 2. Don't drop multicast/broadcast packets when all-multicast isn't enabled in rx_burst_8023ad. +3. Don't drop lacp packets received from worker queues when dedicated queue enabled. --- - drivers/net/bonding/rte_eth_bond_8023ad.c | 20 +++++++++++--------- - drivers/net/bonding/rte_eth_bond_pmd.c | 23 +++++++++++++---------- - 2 files changed, 24 insertions(+), 19 deletions(-) + drivers/net/bonding/rte_eth_bond_8023ad.c | 20 ++++++++------ + drivers/net/bonding/rte_eth_bond_pmd.c | 46 +++++++++++++++++++------------ + 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c index 5fe004e..52bd960 100644 @@ -57,7 +58,7 @@ index 5fe004e..52bd960 100644 periodic_machine(internals, slave_id); diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c -index 53f8ba3..72acb61 100644 +index 53f8ba3..42e436c 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c @@ -291,7 +291,6 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, @@ -84,7 +85,7 @@ index 53f8ba3..72acb61 100644 if (j + 3 < num_rx_total) rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *)); -@@ -331,10 +339,8 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, +@@ -331,24 +339,26 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, /* Remove packet from array if: * - it is slow packet but no dedicated rxq is present, * - slave is not in collecting state, @@ -96,22 +97,48 @@ index 53f8ba3..72acb61 100644 + * packet is unicast and address does not match, */ if (unlikely( - (!dedicated_rxq && -@@ -342,12 +348,9 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, - bufs[j])) || - !collecting || - (!promisc && +- (!dedicated_rxq && +- is_lacp_packets(hdr->ether_type, subtype, +- bufs[j])) || +- !collecting || +- (!promisc && - ((rte_is_unicast_ether_addr(&hdr->d_addr) && -+ (rte_is_unicast_ether_addr(&hdr->d_addr) && - !rte_is_same_ether_addr(bond_mac, +- !rte_is_same_ether_addr(bond_mac, - &hdr->d_addr)) || - (!allmulti && - rte_is_multicast_ether_addr(&hdr->d_addr)))))) { - -+ &hdr->d_addr))))) { ++ (is_lacp_packets(hdr->ether_type, subtype, bufs[j])) || ++ !collecting || (!promisc && ++ (rte_is_unicast_ether_addr(&hdr->d_addr) && ++ !rte_is_same_ether_addr(bond_mac, &hdr->d_addr))))) { if (hdr->ether_type == ether_type_slow_be) { ++ if (dedicated_rxq) { ++ /* Error! Lacp packets should never appear here if ++ * dedicated queue enabled. This can be caused by ++ * a lack of support for ethertype rte_flow. Just ++ * issue a warning rather than dropping the packets ++ * so that the lacp state machine can work properly. ++ */ ++ RTE_BOND_LOG(WARNING, "receive lacp packets from queue %d " ++ "of port %d when dedicated queue enabled", ++ bd_rx_q->queue_id, slaves[idx]); ++ } bond_mode_8023ad_handle_slow_pkt( internals, slaves[idx], bufs[j]); + } else +@@ -1271,8 +1281,10 @@ skip_tx_ring: + slave_port_ids[i]; + } + +- if (unlikely(dist_slave_count < 1)) ++ if (unlikely(dist_slave_count < 1)) { ++ RTE_BOND_LOG(WARNING, "no distributing slaves on bonding port %d", internals->port_id); + return 0; ++ } + + return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids, + dist_slave_count); -- 1.8.3.1 From 14a9fb7bda323f9474882dcc76538f46d6337fd7 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Thu, 26 Aug 2021 21:14:59 +0800 Subject: [PATCH 39/41] netif: don't flush flow filters after port starting up PMD drivers may preset some flow rules before netif port starts. For example, a ethertype flow is set by bond driver when dedicated queue is enabled with 8023ad mode. Flush flow filters after port starting up would invalidate the preset flow rules, thus we just do nothing and it should be expected the device drivers reset all the flow filters on initial stage of bootup. Signed-off-by: ywc689 --- src/netif.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/netif.c b/src/netif.c index 3a094ca25..80d9babdd 100644 --- a/src/netif.c +++ b/src/netif.c @@ -3498,6 +3498,7 @@ static int config_fdir_conf(struct rte_fdir_conf *fdir_conf) int netif_port_start(struct netif_port *port) { int ii, ret; + lcoreid_t cid; queueid_t qid; char promisc_on; char buf[512]; @@ -3633,7 +3634,6 @@ int netif_port_start(struct netif_port *port) port->netif_ops->op_update_addr(port); /* add in6_addr multicast address */ - int cid = 0; rte_eal_mp_remote_launch(idev_add_mcast_init, port, CALL_MAIN); RTE_LCORE_FOREACH_WORKER(cid) { if ((ret = rte_eal_wait_lcore(cid)) < 0) { @@ -3643,13 +3643,6 @@ int netif_port_start(struct netif_port *port) } } - /* flush rte_flows */ - ret = netif_flow_flush(port); - if (ret != EDPVS_OK) { - RTE_LOG(WARNING, NETIF, "fail to flush rte_flows on device %s\n", port->name); - return ret; - } - return EDPVS_OK; } From 7ab7052964d0a1729b77a63e5be1a50d4579cba2 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Fri, 23 Jul 2021 13:48:33 +0800 Subject: [PATCH 40/41] bugfix: fix dpvs build problem Signed-off-by: ywc689 --- README.md | 2 +- src/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6bddd6c08..b2bc6ed66 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ $ ./usertools/dpdk-devbind.py -b uio_pci_generic 0000:06:00.0 ## Build DPVS -It's simple, just set `RTE_SDK` and build it. +It's simple, just set `PKG_CONFIG_PATH` and build it. ```bash $ export PKG_CONFIG_PATH= # normally located at dpdklib/lib64/pkgconfig/libdpdk.pc diff --git a/src/Makefile b/src/Makefile index 63d5e0646..1ef63e5ea 100644 --- a/src/Makefile +++ b/src/Makefile @@ -35,7 +35,7 @@ DATE_STRING := $(shell date +%Y.%m.%d.%H:%M:%S) SRCDIR := $(dir $(realpath $(firstword $(MAKEFILE_LIST)))) # Addtional libs below are needed when using dynamic link. -# LIBS += -lpthread -lnuma -lrt -lm -ldl -lcrypto -lpcap +LIBS += -lpthread -lnuma -lrt -lm -ldl -lcrypto include $(SRCDIR)/config.mk include $(SRCDIR)/dpdk.mk From 06ea842eda4f5c30f579133d3d18f9e6943b0a8b Mon Sep 17 00:00:00 2001 From: ywc689 Date: Wed, 28 Jul 2021 10:11:59 +0800 Subject: [PATCH 41/41] version: release v1.9.0 Signed-off-by: ywc689 --- src/VERSION | 54 ++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/VERSION b/src/VERSION index 76f9440e2..8b6c402ce 100755 --- a/src/VERSION +++ b/src/VERSION @@ -1,33 +1,33 @@ #!/bin/sh - # program: dpvs -# Apr 26, 2021 +# Jul 28, 2021 +# +# Major changes: +# - Adapt dpvs to dpdk 20.11 (dpdk-stable-20.11.1). +# - Create branch DPVS-1.8-LTS to support dpdk 18.11. +# - Obsolete supports for dpdk 17.11. +# +# Featurs: +# - Dpvs: Add netif_flow module using generic flow api (rte_flow), and replace flow director with rte_flow. +# - Dpvs: Replace mbuf userdata with mbuf dynfields. +# - Dpvs: Adapt dpvs to several renamed type names in dpdk 20.11. +# - Dpvs: Update Makefiles to support dpdk 20.11. +# - Dpvs: Add config option "dedicated_queues" for bonding mode 4 (802.3ad). +# - Dpdk: Add helper script to facilitate dpdk build. +# - Dpdk: Porting patches to dpdk 20.11 and remove patches of previous dpdk versions (18.11, 17.11). +# - Dpdk: Patch dpdk ixgbe pmd driver to support dpvs's flow api. +# - Dpdk: Patch dpdk bonding mode 4 for mlx5 to fix crash problem when debug. +# - Keeaplived: Add UDP_CHECK health checker. +# - Docs: Refine tutorial doc of section 'Full-NAT with Keepalived (one-arm)'. +# - Docs: Update docs for dpvs use with dpdk 20.11. +# - Ci: Update dpvs ci to support dpdk 20.11. +# +# Bugfix: +# - Dpvs: Fix ipvs rr/wrr/wlc problem of uneven load distribution across dests. +# - Dpvs: Fix bonding mode 4 problem caused by LACP failure. # -# Features -# ---------- -# - CI: Enable CI workflow. -# - Dpvs: TC stability and performance enhancement. -# - Dpvs: TC supports ipv6 and ingress traffic. -# - Dpvs: Add document and examples for dpvs tc. -# - Dpvs: Add supports for ipvs whitelist. -# - Dpvs: Support icmp forwarding with icmp_fwd_core. -# - Dpvs: Support mtu config. -# - Dpvs: Obsolete dpdk 16.07 and 17.05.02. -# - Patch: Add eal memory debug patch for dpdk-stable-18.11.2. -# -# # Bugfix -# -------- -# - Dpvs: Fix traceroute problem of dpvs ip address. -# - Dpvs: Fix flags conflicts for ipvs conn/service/dest. -# - Dpvs: Reset tcp connection when syn-cookie check fails. -# - Dpvs: Use correct mbuf:l4_len for checkout offload. -# - Dpvs: Fix udp checksum problem for uoa when checksum offload is off. -# - Dpvs: Simplify checksum calculations and remove superfluous checksum functions. -# - Dpvs: Refactor netif recv procedure. -# - Dpvs: Fix debug level log problem. -# - Keepalived: Fix problem that local ip config doesn't take effect when restart. -# - Keepalived: Fix crash problem when tunnel is configured. -export VERSION=1.8 -export RELEASE=10 +export VERSION=1.9 +export RELEASE=0 echo $VERSION-$RELEASE