From 7f157f44066636496be441a938de3bf0b3d426ba Mon Sep 17 00:00:00 2001 From: John Ousterhout Date: Mon, 26 Dec 2022 17:25:27 -0800 Subject: [PATCH] Reworked statistics to reflect bypass properly --- homa_impl.h | 10 ++++++++-- homa_offload.c | 36 ++++++++++++++++++++++++------------ homa_utils.c | 7 ++++++- util/metrics.py | 42 +++++++++++++++++++++++++++++------------- 4 files changed, 67 insertions(+), 28 deletions(-) diff --git a/homa_impl.h b/homa_impl.h index 88bae083..72fbab25 100644 --- a/homa_impl.h +++ b/homa_impl.h @@ -2103,11 +2103,17 @@ struct homa_metrics { __u64 softirq_calls; /** - * @softirq_cycles: total time spent executing homa_softirq, as - * measured with get_cycles(). + * @softirq_cycles: total time spent executing homa_softirq when + * invoked under Linux's SoftIRQ handler, as measured with get_cycles(). */ __u64 softirq_cycles; + /** + * @bypass_softirq_cycles: total time spent executing homa_softirq when + * invoked during GRO, bypassing the SoftIRQ mechanism. + */ + __u64 bypass_softirq_cycles; + /** * @linux_softirq_cycles: total time spent executing all softirq * activities, as measured by the linux softirq module, in get_cycles() diff --git a/homa_offload.c b/homa_offload.c index 32b1a8da..c31d7b7f 100644 --- a/homa_offload.c +++ b/homa_offload.c @@ -137,6 +137,7 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list, struct sk_buff *result = NULL; struct homa_core *core = homa_cores[raw_smp_processor_id()]; __u32 hash; + __u64 saved_softirq_metric, softirq_cycles; struct data_header *h_new = (struct data_header *) skb_transport_header(skb); int priority; @@ -170,12 +171,8 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list, * a significant difference in throughput for large * messages, especially when the system is loaded. */ - if (homa->gro_policy & HOMA_GRO_FAST_GRANTS) { - homa_softirq(skb); - - /* Indicates that we have freed skb. */ - return ERR_PTR(-EINPROGRESS); - } + if (homa->gro_policy & HOMA_GRO_FAST_GRANTS) + goto bypass; } else tt_record4("homa_gro_receive got packet from 0x%x " "id %llu, type 0x%x, priority %d", @@ -186,12 +183,8 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list, if ((homa->gro_policy & HOMA_GRO_BYPASS) || ((homa->gro_policy & HOMA_GRO_SHORT_BYPASS) - && (skb->len < 1400))) { - homa_softirq(skb); - - /* This return value indicates that we have freed skb. */ - return ERR_PTR(-EINPROGRESS); - } + && (skb->len < 1400))) + goto bypass; /* The GRO mechanism tries to separate packets onto different * gro_lists by hash. This is bad for us, because we want to batch @@ -266,6 +259,25 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list, done: homa_check_pacer(homa, 1); return result; + + bypass: + saved_softirq_metric = homa_cores[raw_smp_processor_id()] + ->metrics.softirq_cycles; + homa_softirq(skb); + softirq_cycles = homa_cores[raw_smp_processor_id()] + ->metrics.softirq_cycles - saved_softirq_metric; + homa_cores[raw_smp_processor_id()]->metrics.softirq_cycles + = saved_softirq_metric; + INC_METRIC(bypass_softirq_cycles, softirq_cycles); + + /* Record SoftIRQ cycles in a different metric to reflect that + * they happened during bypass. + */ + + + /* This return value indicates that we have freed skb. */ + return ERR_PTR(-EINPROGRESS); + } /** diff --git a/homa_utils.c b/homa_utils.c index d67b8cbc..a2877082 100644 --- a/homa_utils.c +++ b/homa_utils.c @@ -1312,8 +1312,13 @@ char *homa_print_metrics(struct homa *homa) m->softirq_calls); homa_append_metric(homa, "softirq_cycles %15llu " - "Time spent in homa_softirq\n", + "Time spent in homa_softirq during SoftIRQ\n", m->softirq_cycles); + homa_append_metric(homa, + "bypass_softirq_cycles %15llu " + "Time spent in homa_softirq during bypass " + "from GRO\n", + m->bypass_softirq_cycles); homa_append_metric(homa, "linux_softirq_cycles %15llu " "Time spent in all Linux SoftIRQ\n", diff --git a/util/metrics.py b/util/metrics.py index 0a50b7f2..9a74ccbf 100755 --- a/util/metrics.py +++ b/util/metrics.py @@ -271,7 +271,7 @@ def scale_number(number): us_per = 0 else: us_per = (time/calls)/(cpu_khz/1e03) - print("send syscall %6.2f %7.2f us/syscall" % (cores, us_per)) + print("send syscall %6.2f %7.2f us/syscall" % (cores, us_per)) time = float(deltas["recv_cycles"]) - float(deltas["poll_cycles"]) cores = time/time_delta @@ -282,7 +282,7 @@ def scale_number(number): us_per = 0 else: us_per = (time/calls)/(cpu_khz/1e03) - print("recv syscall (-poll) %6.2f %7.2f us/syscall" % (cores, us_per)) + print("recv syscall (-poll) %6.2f %7.2f us/syscall" % (cores, us_per)) time = float(deltas["reply_cycles"]) cores = time/time_delta @@ -293,31 +293,47 @@ def scale_number(number): us_per = 0 else: us_per = (time/calls)/(cpu_khz/1e03) - print("reply syscall %6.2f %7.2f us/syscall" % (cores, us_per)) + print("reply syscall %6.2f %7.2f us/syscall" % (cores, us_per)) for print_name, symbol in [["NAPI", "napi_cycles"], + [" Bypass homa_softirq", "bypass_softirq_cycles"], ["Linux SoftIRQ", "linux_softirq_cycles"], - [" Homa SoftIRQ", "softirq_cycles"], - [" Sending grants", "grant_cycles"]]: + [" Normal homa_softirq", "softirq_cycles"]]: cpu_time = float(deltas[symbol]) cores = cpu_time/time_delta - if (symbol != "softirq_cycles") and (symbol != "grant_cycles"): - total_cores_used += cores; if packets_received > 0: - print("%s %6.2f %7.2f us/packet" % (print_name.ljust(18), + print("%s %6.2f %7.2f us/packet" % (print_name.ljust(22), cores, (cpu_time/packets_received) / (cpu_khz/1e03))) else: - print("%s %6.2f" % (print_name.ljust(18), cores)) + print("%s %6.2f" % (print_name.ljust(22), cores)) + cpu_time = float(deltas["napi_cycles"]) + if cpu_time == 0: + cpu_time = float(deltas["bypass_softirq_cycles"]) + total_cores_used += cpu_time/time_delta + cpu_time = float(deltas["linux_softirq_cycles"]) + if cpu_time == 0: + cpu_time = float(deltas["softirq_cycles"]) + total_cores_used += cpu_time/time_delta for print_name, symbol in [["Pacer", "pacer_cycles"], ["Timer handler", "timer_cycles"]]: cpu_time = float(deltas[symbol]) cores = cpu_time/time_delta total_cores_used += cores; - print("%s %6.2f" % (print_name.ljust(18), cores)) + print("%s %6.2f" % (print_name.ljust(22), cores)) - print("------------------------------"); - print("Total Core Utilization %6.2f" % (total_cores_used)) + print("----------------------------------"); + print("Total Core Utilization %6.2f" % (total_cores_used)) + + cpu_time = float(deltas["grant_cycles"]) + cores = cpu_time/time_delta + num_grants = deltas["packets_sent_GRANT"] + if num_grants != 0: + per_grant = cpu_time/float(deltas["packets_sent_GRANT"])/(cpu_khz/1e03) + else: + per_grant = 0.0 + print("\nSending grants %6.2f %7.2f us/grant" % (cores, + per_grant)) time = float(deltas["poll_cycles"]) cores = time/time_delta @@ -326,7 +342,7 @@ def scale_number(number): us_per = 0 else: us_per = (time/calls)/(cpu_khz/1e03) - print("\nPolling in recv %6.2f %7.2f us/syscall" % (cores, us_per)) + print("Polling in recv %6.2f %7.2f us/syscall" % (cores, us_per)) print("\nLock Misses:") print("------------")