From 2ff453e3e59cf0f38fbf44458afb318d5e152055 Mon Sep 17 00:00:00 2001 From: John Ousterhout Date: Thu, 3 Oct 2024 11:28:11 -0700 Subject: [PATCH] Extract homa_offload.h from homa_impl.h Also created new per-core struct homa_offload_core. Delete the homa_core struct: it's no longer needed. --- homa_impl.h | 101 ---------------------- homa_incoming.c | 8 +- homa_offload.c | 82 +++++++++++------- homa_offload.h | 94 +++++++++++++++++++++ homa_plumbing.c | 22 ++--- homa_utils.c | 49 ----------- test/unit_homa_incoming.c | 17 ++-- test/unit_homa_offload.c | 172 ++++++++++++++++++++------------------ test/utils.h | 2 - 9 files changed, 264 insertions(+), 283 deletions(-) create mode 100644 homa_offload.h diff --git a/homa_impl.h b/homa_impl.h index 43717e7..199b019 100644 --- a/homa_impl.h +++ b/homa_impl.h @@ -139,8 +139,6 @@ struct homa; /* Declarations used in this file, so they can't be made at the end. */ extern void homa_throttle_lock_slow(struct homa *homa); -extern struct homa_core *homa_cores[]; - #define sizeof32(type) ((int) (sizeof(type))) /** define CACHE_LINE_SIZE - The number of bytes in a cache line. */ @@ -876,88 +874,6 @@ struct homa { */ int temp[4]; }; - -/** - * struct homa_core - Homa allocates one of these structures for each - * core, to hold information that needs to be kept on a per-core basis. - */ -struct homa_core { - /** NUMA-specific page pool from which to allocate skb pages. */ - struct homa_page_pool *pool; - - /** - * @last_active: the last time (in get_cycle() units) that - * there was system activity, such NAPI or SoftIRQ, on this - * core. Used for load balancing. - */ - __u64 last_active; - - /** - * @last_gro: the last time (in get_cycle() units) that - * homa_gro_receive returned on this core. Used to determine - * whether GRO is keeping a core busy. - */ - __u64 last_gro; - - /** - * @softirq_backlog: the number of batches of packets that have - * been queued for SoftIRQ processing on this core but haven't - * yet been processed. - */ - atomic_t softirq_backlog; - - /** - * @softirq_offset: used when rotating SoftIRQ assignment among - * the next cores; contains an offset to add to the current core - * to produce the core for SoftIRQ. - */ - int softirq_offset; - - /** - * @gen3_softirq_cores: when the Gen3 load balancer is in use, - * GRO will arrange for SoftIRQ processing to occur on one of - * these cores; -1 values are ignored (see balance.txt for more - * on lewd balancing). This information is filled in via sysctl. - */ -#define NUM_GEN3_SOFTIRQ_CORES 3 - int gen3_softirq_cores[NUM_GEN3_SOFTIRQ_CORES]; - - /** - * @last_app_active: the most recent time (get_cycles() units) - * when an application was actively using Homa on this core (e.g., - * by sending or receiving messages). Used for load balancing - * (see balance.txt). - */ - __u64 last_app_active; - - /** - * held_skb: last packet buffer known to be available for - * merging other packets into on this core (note: may not still - * be available), or NULL if none. - */ - struct sk_buff *held_skb; - - /** - * @held_bucket: the index, within napi->gro_hash, of the list - * containing @held_skb; undefined if @held_skb is NULL. Used to - * verify that @held_skb is still available. - */ - int held_bucket; - - /** - * @thread: the most recent thread to invoke a Homa system call - * on this core, or NULL if none. - */ - struct task_struct *thread; - - /** - * @syscall_end_time: the time, in get_cycle() units, when the last - * Homa system call completed on this core. Meaningless if thread - * is NULL. - */ - __u64 syscall_end_time; -}; - /** * struct homa_skb_info - Additional information needed by Homa for each * outbound DATA packet. Space is allocated for this at the very end of the @@ -1205,17 +1121,6 @@ extern void homa_gap_retry(struct homa_rpc *rpc); extern int homa_get_port(struct sock *sk, unsigned short snum); extern int homa_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *option); -extern int homa_gro_complete(struct sk_buff *skb, int thoff); -extern void homa_gro_gen2(struct sk_buff *skb); -extern void homa_gro_gen3(struct sk_buff *skb); -extern void homa_gro_hook_tcp(void); -extern void homa_gro_unhook_tcp(void); -extern struct sk_buff - *homa_gro_receive(struct list_head *gro_list, - struct sk_buff *skb); -extern struct sk_buff - *homa_gso_segment(struct sk_buff *skb, - netdev_features_t features); extern int homa_hash(struct sock *sk); extern enum hrtimer_restart homa_hrtimer(struct hrtimer *timer); @@ -1235,8 +1140,6 @@ extern struct sk_buff *homa_new_data_packet(struct homa_rpc *rpc, struct iov_iter *iter, int offset, int length, int max_seg_data); -extern int homa_offload_end(void); -extern int homa_offload_init(void); extern void homa_outgoing_sysctl_changed(struct homa *homa); extern int homa_pacer_main(void *transportInfo); extern void homa_pacer_stop(struct homa *homa); @@ -1264,7 +1167,6 @@ extern void homa_rpc_acked(struct homa_sock *hsk, const struct in6_addr *saddr, struct homa_ack *ack); extern void homa_rpc_free(struct homa_rpc *rpc); extern void homa_rpc_handoff(struct homa_rpc *rpc); -extern void homa_send_ipis(void); extern int homa_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); extern int homa_setsockopt(struct sock *sk, int level, int optname, sockptr_t __user optval, unsigned int optlen); @@ -1276,9 +1178,6 @@ extern void homa_spin(int ns); extern char *homa_symbol_for_type(uint8_t type); extern int homa_sysctl_softirq_cores(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -extern struct sk_buff - *homa_tcp_gro_receive(struct list_head *held_list, - struct sk_buff *skb); extern void homa_timer(struct homa *homa); extern int homa_timer_main(void *transportInfo); extern void homa_unhash(struct sock *sk); diff --git a/homa_incoming.c b/homa_incoming.c index 09b65fc..18e511d 100644 --- a/homa_incoming.c +++ b/homa_incoming.c @@ -6,6 +6,7 @@ #include "homa_impl.h" #include "homa_grant.h" +#include "homa_offload.h" #include "homa_peer.h" #include "homa_pool.h" @@ -1267,7 +1268,7 @@ struct homa_rpc *homa_wait_for_message(struct homa_sock *hsk, int flags, INC_METRIC(poll_cycles, now - poll_start); /* Now it's time to sleep. */ - homa_cores[interest.core]->last_app_active = now; + per_cpu(homa_offload_core, interest.core).last_app_active = now; set_current_state(TASK_INTERRUPTIBLE); rpc = (struct homa_rpc *) atomic_long_read(&interest.ready_rpc); if (!rpc && !hsk->shutdown) { @@ -1381,7 +1382,8 @@ struct homa_interest *homa_choose_interest(struct homa *homa, list_for_each(pos, head) { interest = (struct homa_interest *) (((char *) pos) - offset); - if (homa_cores[interest->core]->last_active < busy_time) { + if (per_cpu(homa_offload_core, interest->core).last_active + < busy_time) { if (backup != NULL) INC_METRIC(handoffs_alt_thread, 1); return interest; @@ -1463,7 +1465,7 @@ void homa_rpc_handoff(struct homa_rpc *rpc) /* Update the last_app_active time for the thread's core, so Homa * will try to avoid doing any work there. */ - homa_cores[interest->core]->last_app_active = get_cycles(); + per_cpu(homa_offload_core, interest->core).last_app_active = get_cycles(); /* Clear the interest. This serves two purposes. First, it saves * the waking thread from acquiring the socket lock again, which diff --git a/homa_offload.c b/homa_offload.c index eb3353c..db813f7 100644 --- a/homa_offload.c +++ b/homa_offload.c @@ -5,6 +5,9 @@ */ #include "homa_impl.h" +#include "homa_offload.h" + +DEFINE_PER_CPU(struct homa_offload_core, homa_offload_core); #define CORES_TO_CHECK 4 @@ -38,6 +41,25 @@ static struct net_offload hook_tcp6_net_offload; */ int homa_offload_init(void) { + int i; + + for (i = 0; i < nr_cpu_ids; i++) { + struct homa_offload_core *offload_core; + int j; + + offload_core = &per_cpu(homa_offload_core, i); + offload_core->last_active = 0; + offload_core->last_gro = 0; + atomic_set(&offload_core->softirq_backlog, 0); + offload_core->softirq_offset = 0; + offload_core->gen3_softirq_cores[0] = i^1; + for (j = 1; j < NUM_GEN3_SOFTIRQ_CORES; j++) + offload_core->gen3_softirq_cores[j] = -1; + offload_core->last_app_active = 0; + offload_core->held_skb = NULL; + offload_core->held_bucket = 0; + } + int res1 = inet_add_offload(&homa_offload, IPPROTO_HOMA); int res2 = inet6_add_offload(&homa_offload, IPPROTO_HOMA); @@ -257,9 +279,10 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list, */ struct sk_buff *held_skb; struct sk_buff *result = NULL; - struct homa_core *core = homa_cores[raw_smp_processor_id()]; + struct homa_offload_core *offload_core = &per_cpu(homa_offload_core, + raw_smp_processor_id()); __u64 now = get_cycles(); - int busy = (now - core->last_gro) < homa->gro_busy_cycles; + int busy = (now - offload_core->last_gro) < homa->gro_busy_cycles; __u32 hash; __u64 saved_softirq_metric, softirq_cycles; __u64 *softirq_cycles_metric; @@ -268,7 +291,7 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list, int priority; __u32 saddr; - core->last_active = now; + offload_core->last_active = now; if (skb_is_ipv6(skb)) { priority = ipv6_hdr(skb)->priority; saddr = ntohl(ipv6_hdr(skb)->saddr.in6_u.u6_addr32[3]); @@ -324,7 +347,7 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list, * core added a Homa packet (if there is such a list). */ hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); - if (core->held_skb) { + if (offload_core->held_skb) { /* Reverse-engineer the location of the napi_struct, so we * can verify that held_skb is still valid. */ @@ -333,18 +356,19 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list, struct napi_struct *napi = container_of(gro_list, struct napi_struct, gro_hash[hash]); - /* Must verify that core->held_skb points to a packet on + /* Must verify that offload_core->held_skb points to a packet on * the list, and that the packet is a Homa packet. * homa_gro_complete isn't always invoked before removing - * packets from the list, so core->held_skb could be a + * packets from the list, so offload_core->held_skb could be a * dangling pointer (or the skb could have been reused for * some other protocol). */ list_for_each_entry(held_skb, - &napi->gro_hash[core->held_bucket].list, list) { + &napi->gro_hash[offload_core->held_bucket].list, + list) { int protocol; - if (held_skb != core->held_skb) + if (held_skb != offload_core->held_skb) continue; if (skb_is_ipv6(held_skb)) protocol = ipv6_hdr(held_skb)->nexthdr; @@ -382,9 +406,9 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list, homa_gro_complete(held_skb, 0); netif_receive_skb(held_skb); homa_send_ipis(); - napi->gro_hash[core->held_bucket].count--; - if (napi->gro_hash[core->held_bucket].count == 0) - __clear_bit(core->held_bucket, + napi->gro_hash[offload_core->held_bucket].count--; + if (napi->gro_hash[offload_core->held_bucket].count == 0) + __clear_bit(offload_core->held_bucket, &napi->gro_bitmask); result = ERR_PTR(-EINPROGRESS); } @@ -400,14 +424,14 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list, * means we aren't heavily loaded; if batching does occur, * homa_gro_complete will pick a different core). */ - core->held_skb = skb; - core->held_bucket = hash; + offload_core->held_skb = skb; + offload_core->held_bucket = hash; if (likely(homa->gro_policy & HOMA_GRO_SAME_CORE)) homa_set_softirq_cpu(skb, raw_smp_processor_id()); done: homa_check_pacer(homa, 1); - core->last_gro = get_cycles(); + offload_core->last_gro = get_cycles(); return result; bypass: @@ -420,7 +444,7 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list, softirq_cycles = *softirq_cycles_metric - saved_softirq_metric; *softirq_cycles_metric = saved_softirq_metric; INC_METRIC(bypass_softirq_cycles, softirq_cycles); - core->last_gro = get_cycles(); + offload_core->last_gro = get_cycles(); /* This return value indicates that we have freed skb. */ return ERR_PTR(-EINPROGRESS); @@ -448,16 +472,16 @@ void homa_gro_gen2(struct sk_buff *skb) int this_core = raw_smp_processor_id(); int candidate = this_core; __u64 now = get_cycles(); - struct homa_core *core; + struct homa_offload_core *offload_core; for (i = CORES_TO_CHECK; i > 0; i--) { candidate++; if (unlikely(candidate >= nr_cpu_ids)) candidate = 0; - core = homa_cores[candidate]; - if (atomic_read(&core->softirq_backlog) > 0) + offload_core = &per_cpu(homa_offload_core, candidate); + if (atomic_read(&offload_core->softirq_backlog) > 0) continue; - if ((core->last_gro + homa->busy_cycles) > now) + if ((offload_core->last_gro + homa->busy_cycles) > now) continue; tt_record3("homa_gro_gen2 chose core %d for id %d offset %d", candidate, homa_local_id(h->common.sender_id), @@ -468,12 +492,12 @@ void homa_gro_gen2(struct sk_buff *skb) /* All of the candidates appear to be busy; just * rotate among them. */ - int offset = homa_cores[this_core]->softirq_offset; + int offset = per_cpu(homa_offload_core, this_core).softirq_offset; offset += 1; if (offset > CORES_TO_CHECK) offset = 1; - homa_cores[this_core]->softirq_offset = offset; + per_cpu(homa_offload_core, this_core).softirq_offset = offset; candidate = this_core + offset; while (candidate >= nr_cpu_ids) candidate -= nr_cpu_ids; @@ -481,7 +505,7 @@ void homa_gro_gen2(struct sk_buff *skb) candidate, homa_local_id(h->common.sender_id), ntohl(h->seg.offset)); } - atomic_inc(&homa_cores[candidate]->softirq_backlog); + atomic_inc(&per_cpu(homa_offload_core, candidate).softirq_backlog); homa_set_softirq_cpu(skb, candidate); } @@ -501,7 +525,8 @@ void homa_gro_gen3(struct sk_buff *skb) struct data_header *h = (struct data_header *) skb_transport_header(skb); int i, core; __u64 now, busy_time; - int *candidates = homa_cores[raw_smp_processor_id()]->gen3_softirq_cores; + int *candidates = per_cpu(homa_offload_core, raw_smp_processor_id()) + .gen3_softirq_cores; now = get_cycles(); busy_time = now - homa->busy_cycles; @@ -512,17 +537,18 @@ void homa_gro_gen3(struct sk_buff *skb) if (candidate < 0) break; - if (homa_cores[candidate]->last_app_active < busy_time) { + if (per_cpu(homa_offload_core, candidate).last_app_active + < busy_time) { core = candidate; break; } } homa_set_softirq_cpu(skb, core); - homa_cores[core]->last_active = now; + per_cpu(homa_offload_core, core).last_active = now; tt_record4("homa_gro_gen3 chose core %d for id %d, offset %d, delta %d", core, homa_local_id(h->common.sender_id), ntohl(h->seg.offset), - now - homa_cores[core]->last_app_active); + now - per_cpu(homa_offload_core, core).last_app_active); INC_METRIC(gen3_handoffs, 1); if (core != candidates[0]) INC_METRIC(gen3_alt_handoffs, 1); @@ -546,7 +572,7 @@ int homa_gro_complete(struct sk_buff *skb, int hoffset) // ntohl(h->seg.offset), // NAPI_GRO_CB(skb)->count); - homa_cores[raw_smp_processor_id()]->held_skb = NULL; + per_cpu(homa_offload_core, raw_smp_processor_id()).held_skb = NULL; if (homa->gro_policy & HOMA_GRO_GEN3) { homa_gro_gen3(skb); } else if (homa->gro_policy & HOMA_GRO_GEN2) { @@ -568,7 +594,7 @@ int homa_gro_complete(struct sk_buff *skb, int hoffset) core++; if (unlikely(core >= nr_cpu_ids)) core = 0; - last_active = homa_cores[core]->last_active; + last_active = per_cpu(homa_offload_core, core).last_active; if (last_active < best_time) { best_time = last_active; best = core; diff --git a/homa_offload.h b/homa_offload.h new file mode 100644 index 0000000..c0f3c9b --- /dev/null +++ b/homa_offload.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ + +/* This file contains definitions related to homa_offload.c. */ + +#ifndef _HOMA_OFFLOAD_H +#define _HOMA_OFFLOAD_H + +#include + +/** + * struct homa_offload_core - Stores core-specific information used during + * GRO operations. + */ +struct homa_offload_core { + /** + * @last_active: the last time (in get_cycle() units) that + * there was system activity, such NAPI or SoftIRQ, on this + * core. Used for load balancing. + */ + __u64 last_active; + + /** + * @last_gro: the last time (in get_cycle() units) that + * homa_gro_receive returned on this core. Used to determine + * whether GRO is keeping a core busy. + */ + __u64 last_gro; + + /** + * @softirq_backlog: the number of batches of packets that have + * been queued for SoftIRQ processing on this core but haven't + * yet been processed. + */ + atomic_t softirq_backlog; + + /** + * @softirq_offset: used when rotating SoftIRQ assignment among + * the next cores; contains an offset to add to the current core + * to produce the core for SoftIRQ. + */ + int softirq_offset; + + /** + * @gen3_softirq_cores: when the Gen3 load balancer is in use, + * GRO will arrange for SoftIRQ processing to occur on one of + * these cores; -1 values are ignored (see balance.txt for more + * on lewd balancing). This information is filled in via sysctl. + */ +#define NUM_GEN3_SOFTIRQ_CORES 3 + int gen3_softirq_cores[NUM_GEN3_SOFTIRQ_CORES]; + + /** + * @last_app_active: the most recent time (get_cycles() units) + * when an application was actively using Homa on this core (e.g., + * by sending or receiving messages). Used for load balancing + * (see balance.txt). + */ + __u64 last_app_active; + + /** + * held_skb: last packet buffer known to be available for + * merging other packets into on this core (note: may not still + * be available), or NULL if none. + */ + struct sk_buff *held_skb; + + /** + * @held_bucket: the index, within napi->gro_hash, of the list + * containing @held_skb; undefined if @held_skb is NULL. Used to + * verify that @held_skb is still available. + */ + int held_bucket; +}; +DECLARE_PER_CPU(struct homa_offload_core, homa_offload_core); + +extern int homa_gro_complete(struct sk_buff *skb, int thoff); +extern void homa_gro_gen2(struct sk_buff *skb); +extern void homa_gro_gen3(struct sk_buff *skb); +extern void homa_gro_hook_tcp(void); +extern void homa_gro_unhook_tcp(void); +extern struct sk_buff + *homa_gro_receive(struct list_head *gro_list, + struct sk_buff *skb); +extern struct sk_buff + *homa_gso_segment(struct sk_buff *skb, + netdev_features_t features); +extern int homa_offload_end(void); +extern int homa_offload_init(void); +extern void homa_send_ipis(void); +extern struct sk_buff + *homa_tcp_gro_receive(struct list_head *held_list, + struct sk_buff *skb); + +#endif /* _HOMA_OFFLOAD_H */ diff --git a/homa_plumbing.c b/homa_plumbing.c index e5b1673..bed2453 100644 --- a/homa_plumbing.c +++ b/homa_plumbing.c @@ -5,6 +5,7 @@ */ #include "homa_impl.h" +#include "homa_offload.h" #include "homa_peer.h" #include "homa_pool.h" @@ -889,14 +890,13 @@ int homa_sendmsg(struct sock *sk, struct msghdr *msg, size_t length) struct homa_rpc *rpc = NULL; union sockaddr_in_union *addr = (union sockaddr_in_union *) msg->msg_name; - homa_cores[raw_smp_processor_id()]->last_app_active = start; + per_cpu(homa_offload_core, raw_smp_processor_id()).last_app_active = start; if (unlikely(!msg->msg_control_is_user)) { tt_record("homa_sendmsg error: !msg->msg_control_is_user"); result = -EINVAL; goto error; } - if (unlikely(copy_from_user(&args, msg->msg_control, - sizeof(args)))) { + if (unlikely(copy_from_user(&args, msg->msg_control, sizeof(args)))) { result = -EFAULT; goto error; } @@ -1023,7 +1023,7 @@ int homa_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int result; INC_METRIC(recv_calls, 1); - homa_cores[raw_smp_processor_id()]->last_app_active = start; + per_cpu(homa_offload_core, raw_smp_processor_id()).last_app_active = start; if (unlikely(!msg->msg_control)) { /* This test isn't strictly necessary, but it provides a * hook for testing kernel call times. @@ -1245,7 +1245,7 @@ int homa_softirq(struct sk_buff *skb) start = get_cycles(); INC_METRIC(softirq_calls, 1); - homa_cores[raw_smp_processor_id()]->last_active = start; + per_cpu(homa_offload_core, raw_smp_processor_id()).last_active = start; if ((start - last) > 1000000) { int scaled_ms = (int) (10*(start-last)/cpu_khz); @@ -1393,7 +1393,7 @@ int homa_softirq(struct sk_buff *skb) packets = other_pkts; } - atomic_dec(&homa_cores[raw_smp_processor_id()]->softirq_backlog); + atomic_dec(&per_cpu(homa_offload_core, raw_smp_processor_id()).softirq_backlog); INC_METRIC(softirq_cycles, get_cycles() - start); return 0; } @@ -1621,7 +1621,7 @@ int homa_sysctl_softirq_cores(struct ctl_table *table, int write, { int result, i; struct ctl_table table_copy; - struct homa_core *core; + struct homa_offload_core *offload_core; int max_values, *values; max_values = (NUM_GEN3_SOFTIRQ_CORES + 1) * nr_cpu_ids; @@ -1647,9 +1647,9 @@ int homa_sysctl_softirq_cores(struct ctl_table *table, int write, if (values[i] < 0) break; - core = homa_cores[values[i]]; + offload_core = &per_cpu(homa_offload_core, values[i]); for (j = 0; j < NUM_GEN3_SOFTIRQ_CORES; j++) - core->gen3_softirq_cores[j] = values[i+j+1]; + offload_core->gen3_softirq_cores[j] = values[i+j+1]; } } else { /* Read: return values from all of the cores. */ @@ -1663,9 +1663,9 @@ int homa_sysctl_softirq_cores(struct ctl_table *table, int write, *dst = i; dst++; table_copy.maxlen += sizeof(int); - core = homa_cores[i]; + offload_core = &per_cpu(homa_offload_core, i); for (j = 0; j < NUM_GEN3_SOFTIRQ_CORES; j++) { - *dst = core->gen3_softirq_cores[j]; + *dst = offload_core->gen3_softirq_cores[j]; dst++; table_copy.maxlen += sizeof(int); } diff --git a/homa_utils.c b/homa_utils.c index 953dfe4..377411c 100644 --- a/homa_utils.c +++ b/homa_utils.c @@ -9,14 +9,6 @@ #include "homa_rpc.h" #include "homa_skb.h" -/* Core-specific information. NR_CPUS is an overestimate of the actual - * number, but allows us to allocate the array statically. - */ -struct homa_core *homa_cores[NR_CPUS]; - -/* Points to block of memory holding all homa_cores; used to free it. */ -char *core_memory; - struct completion homa_pacer_kthread_done; /** @@ -29,43 +21,11 @@ struct completion homa_pacer_kthread_done; */ int homa_init(struct homa *homa) { - size_t aligned_size; - char *first; int i, err; _Static_assert(HOMA_MAX_PRIORITIES >= 8, "homa_init assumes at least 8 priority levels"); - /* Initialize core-specific info (if no-one else has already done it), - * making sure that each core has private cache lines. - */ - if (!core_memory) { - aligned_size = (sizeof(struct homa_core) + 0x3f) & ~0x3f; - core_memory = vmalloc(0x3f + (nr_cpu_ids*aligned_size)); - if (!core_memory) { - pr_err("Homa couldn't allocate memory for core-specific data\n"); - return -ENOMEM; - } - first = (char *) (((__u64) core_memory + 0x3f) & ~0x3f); - for (i = 0; i < nr_cpu_ids; i++) { - struct homa_core *core; - int j; - - core = (struct homa_core *) (first + i*aligned_size); - homa_cores[i] = core; - core->last_active = 0; - core->last_gro = 0; - atomic_set(&core->softirq_backlog, 0); - core->softirq_offset = 0; - core->gen3_softirq_cores[0] = i^1; - for (j = 1; j < NUM_GEN3_SOFTIRQ_CORES; j++) - core->gen3_softirq_cores[j] = -1; - core->last_app_active = 0; - core->held_skb = NULL; - core->held_bucket = 0; - } - } - homa->pacer_kthread = NULL; init_completion(&homa_pacer_kthread_done); atomic64_set(&homa->next_outgoing_id, 2); @@ -180,8 +140,6 @@ int homa_init(struct homa *homa) */ void homa_destroy(struct homa *homa) { - int i; - if (homa->pacer_kthread) { homa_pacer_stop(homa); wait_for_completion(&homa_pacer_kthread_done); @@ -193,13 +151,6 @@ void homa_destroy(struct homa *homa) homa_peertab_destroy(homa->peers); kfree(homa->peers); homa_skb_cleanup(homa); - - if (core_memory) { - vfree(core_memory); - core_memory = NULL; - for (i = 0; i < nr_cpu_ids; i++) - homa_cores[i] = NULL; - } kfree(homa->metrics); } diff --git a/test/unit_homa_incoming.c b/test/unit_homa_incoming.c index ccdfd31..22754cd 100644 --- a/test/unit_homa_incoming.c +++ b/test/unit_homa_incoming.c @@ -3,6 +3,7 @@ */ #include "homa_impl.h" +#include "homa_offload.h" #include "homa_peer.h" #include "homa_pool.h" #define KSELFTEST_NOT_MAIN 1 @@ -2387,9 +2388,9 @@ TEST_F(homa_incoming, homa_choose_interest__find_idle_core) mock_cycles = 5000; self->homa.busy_cycles = 1000; - homa_cores[1]->last_active = 4100; - homa_cores[2]->last_active = 3500; - homa_cores[3]->last_active = 2000; + per_cpu(homa_offload_core, 1).last_active = 4100; + per_cpu(homa_offload_core, 2).last_active = 3500; + per_cpu(homa_offload_core, 3).last_active = 2000; struct homa_interest *result = homa_choose_interest(&self->homa, &self->hsk.request_interests, @@ -2413,9 +2414,9 @@ TEST_F(homa_incoming, homa_choose_interest__all_cores_busy) mock_cycles = 5000; self->homa.busy_cycles = 1000; - homa_cores[1]->last_active = 4100; - homa_cores[2]->last_active = 4001; - homa_cores[3]->last_active = 4800; + per_cpu(homa_offload_core, 1).last_active = 4100; + per_cpu(homa_offload_core, 2).last_active = 4001; + per_cpu(homa_offload_core, 3).last_active = 4800; struct homa_interest *result = homa_choose_interest(&self->homa, &self->hsk.request_interests, @@ -2607,10 +2608,10 @@ TEST_F(homa_incoming, homa_rpc_handoff__update_last_app_active) interest.core = 2; crpc->interest = &interest; mock_cycles = 10000; - homa_cores[2]->last_app_active = 444; + per_cpu(homa_offload_core, 2).last_app_active = 444; homa_rpc_handoff(crpc); EXPECT_STREQ("wake_up_process pid 0", unit_log_get()); - EXPECT_EQ(10000, homa_cores[2]->last_app_active); + EXPECT_EQ(10000, per_cpu(homa_offload_core, 2).last_app_active); atomic_andnot(RPC_HANDING_OFF, &crpc->flags); } diff --git a/test/unit_homa_offload.c b/test/unit_homa_offload.c index 3531fb3..9691bed 100644 --- a/test/unit_homa_offload.c +++ b/test/unit_homa_offload.c @@ -3,6 +3,7 @@ */ #include "homa_impl.h" +#include "homa_offload.h" #include "homa_rpc.h" #define KSELFTEST_NOT_MAIN 1 #include "kselftest_harness.h" @@ -10,6 +11,8 @@ #include "mock.h" #include "utils.h" +#define cur_offload_core (&per_cpu(homa_offload_core, raw_smp_processor_id())) + extern struct homa *homa; static struct sk_buff *tcp_gro_receive(struct list_head *held_list, @@ -82,18 +85,20 @@ FIXTURE_SETUP(homa_offload) inet_offloads[IPPROTO_TCP] = &self->tcp_offloads; self->tcp6_offloads.callbacks.gro_receive = tcp6_gro_receive; inet6_offloads[IPPROTO_TCP] = &self->tcp6_offloads; + homa_offload_init(); unit_log_clear(); /* Configure so core isn't considered too busy for bypasses. */ mock_cycles = 1000; self->homa.gro_busy_cycles = 500; - cur_core->last_gro = 400; + cur_offload_core->last_gro = 400; } FIXTURE_TEARDOWN(homa_offload) { struct sk_buff *skb, *tmp; + homa_offload_end(); list_for_each_entry_safe(skb, tmp, &self->napi.gro_hash[2].list, list) kfree_skb(skb); homa_destroy(&self->homa); @@ -160,10 +165,10 @@ TEST_F(homa_offload, homa_tcp_gro_receive__pass_to_homa_ipv6) h->flags = HOMA_TCP_FLAGS; h->urgent = htons(HOMA_TCP_URGENT); NAPI_GRO_CB(skb)->same_flow = 0; - cur_core->held_skb = NULL; - cur_core->held_bucket = 99; + cur_offload_core->held_skb = NULL; + cur_offload_core->held_bucket = 99; EXPECT_EQ(NULL, homa_tcp_gro_receive(&self->empty_list, skb)); - EXPECT_EQ(skb, cur_core->held_skb); + EXPECT_EQ(skb, cur_offload_core->held_skb); EXPECT_STREQ("", unit_log_get()); EXPECT_EQ(IPPROTO_HOMA, ipv6_hdr(skb)->nexthdr); kfree_skb(skb); @@ -182,10 +187,10 @@ TEST_F(homa_offload, homa_tcp_gro_receive__pass_to_homa_ipv4) h->flags = HOMA_TCP_FLAGS; h->urgent = htons(HOMA_TCP_URGENT); NAPI_GRO_CB(skb)->same_flow = 0; - cur_core->held_skb = NULL; - cur_core->held_bucket = 99; + cur_offload_core->held_skb = NULL; + cur_offload_core->held_bucket = 99; EXPECT_EQ(NULL, homa_tcp_gro_receive(&self->empty_list, skb)); - EXPECT_EQ(skb, cur_core->held_skb); + EXPECT_EQ(skb, cur_offload_core->held_skb); EXPECT_STREQ("", unit_log_get()); EXPECT_EQ(IPPROTO_HOMA, ip_hdr(skb)->protocol); EXPECT_EQ(2303, ip_hdr(skb)->check); @@ -221,8 +226,8 @@ TEST_F(homa_offload, homa_gro_receive__update_offset_from_sequence) self->header.seg.offset = -1; skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0); NAPI_GRO_CB(skb)->same_flow = 0; - cur_core->held_skb = NULL; - cur_core->held_bucket = 99; + cur_offload_core->held_skb = NULL; + cur_offload_core->held_bucket = 99; EXPECT_EQ(NULL, homa_gro_receive(&self->empty_list, skb)); h = (struct data_header *) skb_transport_header(skb); EXPECT_EQ(6000, htonl(h->seg.offset)); @@ -274,7 +279,7 @@ TEST_F(homa_offload, homa_gro_receive__HOMA_GRO_SHORT_BYPASS) * than one packet. */ self->homa.gro_policy |= HOMA_GRO_SHORT_BYPASS; - cur_core->last_gro = 400; + cur_offload_core->last_gro = 400; skb2 = mock_skb_new(&self->ip, &h.common, 1400, 2000); result = homa_gro_receive(&self->empty_list, skb2); EXPECT_EQ(0, -PTR_ERR(result)); @@ -283,14 +288,14 @@ TEST_F(homa_offload, homa_gro_receive__HOMA_GRO_SHORT_BYPASS) /* Third attempt: bypass should happen. */ h.message_length = htonl(1400); h.incoming = htonl(1400); - cur_core->last_gro = 400; + cur_offload_core->last_gro = 400; skb3 = mock_skb_new(&self->ip, &h.common, 1400, 4000); result = homa_gro_receive(&self->empty_list, skb3); EXPECT_EQ(EINPROGRESS, -PTR_ERR(result)); EXPECT_EQ(1, homa_metrics_per_cpu()->gro_data_bypasses); /* Third attempt: no bypass because core busy. */ - cur_core->last_gro = 600; + cur_offload_core->last_gro = 600; skb4 = mock_skb_new(&self->ip, &h.common, 1400, 4000); result = homa_gro_receive(&self->empty_list, skb3); EXPECT_EQ(0, -PTR_ERR(result)); @@ -332,7 +337,7 @@ TEST_F(homa_offload, homa_gro_receive__fast_grant_optimization) /* Second attempt: HOMA_FAST_GRANTS is enabled. */ self->homa.gro_policy = HOMA_GRO_FAST_GRANTS; - cur_core->last_gro = 400; + cur_offload_core->last_gro = 400; struct sk_buff *skb2 = mock_skb_new(&client_ip, &h.common, 0, 0); result = homa_gro_receive(&self->empty_list, skb2); EXPECT_EQ(EINPROGRESS, -PTR_ERR(result)); @@ -340,7 +345,7 @@ TEST_F(homa_offload, homa_gro_receive__fast_grant_optimization) EXPECT_SUBSTR("xmit DATA 1400@10000", unit_log_get()); /* Third attempt: core is too busy for fast grants. */ - cur_core->last_gro = 600; + cur_offload_core->last_gro = 600; struct sk_buff *skb3 = mock_skb_new(&client_ip, &h.common, 0, 0); result = homa_gro_receive(&self->empty_list, skb3); EXPECT_EQ(0, -PTR_ERR(result)); @@ -356,13 +361,13 @@ TEST_F(homa_offload, homa_gro_receive__no_held_skb) skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0); skb->hash = 2; NAPI_GRO_CB(skb)->same_flow = 0; - cur_core->held_skb = NULL; - cur_core->held_bucket = 2; + cur_offload_core->held_skb = NULL; + cur_offload_core->held_bucket = 2; EXPECT_EQ(NULL, homa_gro_receive(&self->napi.gro_hash[2].list, skb)); same_flow = NAPI_GRO_CB(skb)->same_flow; EXPECT_EQ(0, same_flow); - EXPECT_EQ(skb, cur_core->held_skb); - EXPECT_EQ(2, cur_core->held_bucket); + EXPECT_EQ(skb, cur_offload_core->held_skb); + EXPECT_EQ(2, cur_offload_core->held_bucket); kfree_skb(skb); } TEST_F(homa_offload, homa_gro_receive__empty_merge_list) @@ -373,13 +378,13 @@ TEST_F(homa_offload, homa_gro_receive__empty_merge_list) skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0); skb->hash = 2; NAPI_GRO_CB(skb)->same_flow = 0; - cur_core->held_skb = self->skb; - cur_core->held_bucket = 3; + cur_offload_core->held_skb = self->skb; + cur_offload_core->held_bucket = 3; EXPECT_EQ(NULL, homa_gro_receive(&self->napi.gro_hash[2].list, skb)); same_flow = NAPI_GRO_CB(skb)->same_flow; EXPECT_EQ(0, same_flow); - EXPECT_EQ(skb, cur_core->held_skb); - EXPECT_EQ(2, cur_core->held_bucket); + EXPECT_EQ(skb, cur_offload_core->held_skb); + EXPECT_EQ(2, cur_offload_core->held_bucket); kfree_skb(skb); } TEST_F(homa_offload, homa_gro_receive__held_skb_not_in_merge_list) @@ -390,13 +395,13 @@ TEST_F(homa_offload, homa_gro_receive__held_skb_not_in_merge_list) skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0); skb->hash = 3; NAPI_GRO_CB(skb)->same_flow = 0; - cur_core->held_skb = skb; - cur_core->held_bucket = 2; + cur_offload_core->held_skb = skb; + cur_offload_core->held_bucket = 2; EXPECT_EQ(NULL, homa_gro_receive(&self->napi.gro_hash[3].list, skb)); same_flow = NAPI_GRO_CB(skb)->same_flow; EXPECT_EQ(0, same_flow); - EXPECT_EQ(skb, cur_core->held_skb); - EXPECT_EQ(3, cur_core->held_bucket); + EXPECT_EQ(skb, cur_offload_core->held_skb); + EXPECT_EQ(3, cur_offload_core->held_bucket); kfree_skb(skb); } TEST_F(homa_offload, homa_gro_receive__held_skb__in_merge_list_but_wrong_proto) @@ -407,25 +412,25 @@ TEST_F(homa_offload, homa_gro_receive__held_skb__in_merge_list_but_wrong_proto) skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0); skb->hash = 3; NAPI_GRO_CB(skb)->same_flow = 0; - cur_core->held_skb = self->skb; + cur_offload_core->held_skb = self->skb; if (skb_is_ipv6(self->skb)) ipv6_hdr(self->skb)->nexthdr = IPPROTO_TCP; else ip_hdr(self->skb)->protocol = IPPROTO_TCP; - cur_core->held_bucket = 2; + cur_offload_core->held_bucket = 2; EXPECT_EQ(NULL, homa_gro_receive(&self->napi.gro_hash[3].list, skb)); same_flow = NAPI_GRO_CB(skb)->same_flow; EXPECT_EQ(0, same_flow); - EXPECT_EQ(skb, cur_core->held_skb); - EXPECT_EQ(3, cur_core->held_bucket); + EXPECT_EQ(skb, cur_offload_core->held_skb); + EXPECT_EQ(3, cur_offload_core->held_bucket); kfree_skb(skb); } TEST_F(homa_offload, homa_gro_receive__merge) { struct sk_buff *skb, *skb2; int same_flow; - cur_core->held_skb = self->skb2; - cur_core->held_bucket = 2; + cur_offload_core->held_skb = self->skb2; + cur_offload_core->held_bucket = 2; self->header.seg.offset = htonl(6000); self->header.common.sender_id = cpu_to_be64(1002); @@ -460,8 +465,8 @@ TEST_F(homa_offload, homa_gro_receive__max_gro_skbs) // First packet: fits below the limit. homa->max_gro_skbs = 3; - cur_core->held_skb = self->skb2; - cur_core->held_bucket = 2; + cur_offload_core->held_skb = self->skb2; + cur_offload_core->held_bucket = 2; self->header.seg.offset = htonl(6000); skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0); homa_gro_receive(&self->napi.gro_hash[3].list, skb); @@ -485,7 +490,7 @@ TEST_F(homa_offload, homa_gro_receive__max_gro_skbs) // Third packet also hits the limit for skb, causing the bucket // to become empty. homa->max_gro_skbs = 2; - cur_core->held_skb = self->skb; + cur_offload_core->held_skb = self->skb; skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0); unit_log_clear(); EXPECT_EQ(EINPROGRESS, -PTR_ERR(homa_gro_receive( @@ -504,112 +509,117 @@ TEST_F(homa_offload, homa_gro_gen2) mock_cycles = 1000; homa->busy_cycles = 100; mock_set_core(5); - atomic_set(&homa_cores[6]->softirq_backlog, 1); - homa_cores[6]->last_gro = 0; - atomic_set(&homa_cores[7]->softirq_backlog, 0); - homa_cores[7]->last_gro = 901; - atomic_set(&homa_cores[0]->softirq_backlog, 2); - homa_cores[0]->last_gro = 0; - atomic_set(&homa_cores[1]->softirq_backlog, 0); - homa_cores[1]->last_gro = 899; - atomic_set(&homa_cores[2]->softirq_backlog, 0); - homa_cores[2]->last_gro = 0; + atomic_set(&per_cpu(homa_offload_core, 6).softirq_backlog, 1); + per_cpu(homa_offload_core, 6).last_gro = 0; + atomic_set(&per_cpu(homa_offload_core, 7).softirq_backlog, 0); + per_cpu(homa_offload_core, 7).last_gro = 901; + atomic_set(&per_cpu(homa_offload_core, 0).softirq_backlog, 2); + per_cpu(homa_offload_core, 0).last_gro = 0; + atomic_set(&per_cpu(homa_offload_core, 1).softirq_backlog, 0); + per_cpu(homa_offload_core, 1).last_gro = 899; + atomic_set(&per_cpu(homa_offload_core, 2).softirq_backlog, 0); + per_cpu(homa_offload_core, 2).last_gro = 0; // Avoid busy cores. homa_gro_complete(self->skb, 0); EXPECT_EQ(1, self->skb->hash - 32); - EXPECT_EQ(1, atomic_read(&homa_cores[1]->softirq_backlog)); + EXPECT_EQ(1, atomic_read(&per_cpu(homa_offload_core, 1).softirq_backlog)); // All cores busy; must rotate. homa_gro_complete(self->skb, 0); EXPECT_EQ(6, self->skb->hash - 32); homa_gro_complete(self->skb, 0); EXPECT_EQ(7, self->skb->hash - 32); - EXPECT_EQ(2, homa_cores[5]->softirq_offset); + EXPECT_EQ(2, per_cpu(homa_offload_core, 5).softirq_offset); homa_gro_complete(self->skb, 0); EXPECT_EQ(0, self->skb->hash - 32); homa_gro_complete(self->skb, 0); EXPECT_EQ(1, self->skb->hash - 32); homa_gro_complete(self->skb, 0); EXPECT_EQ(6, self->skb->hash - 32); - EXPECT_EQ(1, homa_cores[5]->softirq_offset); + EXPECT_EQ(1, per_cpu(homa_offload_core, 5).softirq_offset); } TEST_F(homa_offload, homa_gro_gen3__basics) { + struct homa_offload_core *offload_core = cur_offload_core; + struct homa_offload_core *offload3 = &per_cpu(homa_offload_core, 3); + struct homa_offload_core *offload5 = &per_cpu(homa_offload_core, 5); + struct homa_offload_core *offload7 = &per_cpu(homa_offload_core, 7); + homa->gro_policy = HOMA_GRO_GEN3; - struct homa_core *core = cur_core; - core->gen3_softirq_cores[0] = 3; - core->gen3_softirq_cores[1] = 7; - core->gen3_softirq_cores[2] = 5; - homa_cores[3]->last_app_active = 4100; - homa_cores[7]->last_app_active = 3900; - homa_cores[5]->last_app_active = 2000; + offload_core->gen3_softirq_cores[0] = 3; + offload_core->gen3_softirq_cores[1] = 7; + offload_core->gen3_softirq_cores[2] = 5; + offload3->last_app_active = 4100; + offload7->last_app_active = 3900; + offload5->last_app_active = 2000; mock_cycles = 5000; self->homa.busy_cycles = 1000; homa_gro_complete(self->skb, 0); EXPECT_EQ(7, self->skb->hash - 32); - EXPECT_EQ(0, homa_cores[3]->last_active); - EXPECT_EQ(5000, homa_cores[7]->last_active); + EXPECT_EQ(0, offload3->last_active); + EXPECT_EQ(5000, offload7->last_active); } TEST_F(homa_offload, homa_gro_gen3__stop_on_negative_core_id) { homa->gro_policy = HOMA_GRO_GEN3; - struct homa_core *core = cur_core; - core->gen3_softirq_cores[0] = 3; - core->gen3_softirq_cores[1] = -1; - core->gen3_softirq_cores[2] = 5; - homa_cores[3]->last_app_active = 4100; - homa_cores[5]->last_app_active = 2000; + struct homa_offload_core *offload_core = cur_offload_core; + offload_core->gen3_softirq_cores[0] = 3; + offload_core->gen3_softirq_cores[1] = -1; + offload_core->gen3_softirq_cores[2] = 5; + per_cpu(homa_offload_core, 3).last_app_active = 4100; + per_cpu(homa_offload_core, 5).last_app_active = 2000; mock_cycles = 5000; self->homa.busy_cycles = 1000; homa_gro_complete(self->skb, 0); EXPECT_EQ(3, self->skb->hash - 32); - EXPECT_EQ(5000, homa_cores[3]->last_active); + EXPECT_EQ(5000, per_cpu(homa_offload_core, 3).last_active); } TEST_F(homa_offload, homa_gro_gen3__all_cores_busy_so_pick_first) { homa->gro_policy = HOMA_GRO_GEN3; - struct homa_core *core = cur_core; - core->gen3_softirq_cores[0] = 3; - core->gen3_softirq_cores[1] = 7; - core->gen3_softirq_cores[2] = 5; - homa_cores[3]->last_app_active = 4100; - homa_cores[7]->last_app_active = 4001; - homa_cores[5]->last_app_active = 4500; + struct homa_offload_core *offload_core = cur_offload_core; + offload_core->gen3_softirq_cores[0] = 3; + offload_core->gen3_softirq_cores[1] = 7; + offload_core->gen3_softirq_cores[2] = 5; + per_cpu(homa_offload_core, 3).last_app_active = 4100; + per_cpu(homa_offload_core, 7).last_app_active = 4001; + per_cpu(homa_offload_core, 5).last_app_active = 4500; mock_cycles = 5000; self->homa.busy_cycles = 1000; homa_gro_complete(self->skb, 0); EXPECT_EQ(3, self->skb->hash - 32); - EXPECT_EQ(5000, homa_cores[3]->last_active); + EXPECT_EQ(5000, per_cpu(homa_offload_core, 3).last_active); } TEST_F(homa_offload, homa_gro_complete__clear_held_skb) { - struct homa_core *core = homa_cores[raw_smp_processor_id()]; + struct homa_offload_core *offload_core = &per_cpu(homa_offload_core, + raw_smp_processor_id()); - core->held_skb = self->skb2; + offload_core->held_skb = self->skb2; homa_gro_complete(self->skb, 0); - EXPECT_EQ(NULL, core->held_skb); + EXPECT_EQ(NULL, offload_core->held_skb); } TEST_F(homa_offload, homa_gro_complete__GRO_IDLE) { homa->gro_policy = HOMA_GRO_IDLE; - homa_cores[6]->last_active = 30; - homa_cores[7]->last_active = 25; - homa_cores[0]->last_active = 20; - homa_cores[1]->last_active = 15; - homa_cores[2]->last_active = 10; + per_cpu(homa_offload_core, 6).last_active = 30; + per_cpu(homa_offload_core, 7).last_active = 25; + per_cpu(homa_offload_core, 0).last_active = 20; + per_cpu(homa_offload_core, 1).last_active = 15; + per_cpu(homa_offload_core, 2).last_active = 10; mock_set_core(5); homa_gro_complete(self->skb, 0); EXPECT_EQ(1, self->skb->hash - 32); - homa_cores[6]->last_active = 5; + per_cpu(homa_offload_core, 6).last_active = 5; mock_set_core(5); homa_gro_complete(self->skb, 0); EXPECT_EQ(6, self->skb->hash - 32); diff --git a/test/utils.h b/test/utils.h index 5825bbc..dd741f6 100644 --- a/test/utils.h +++ b/test/utils.h @@ -32,8 +32,6 @@ enum unit_rpc_state { UNIT_IN_SERVICE = 24, }; -#define cur_core homa_cores[raw_smp_processor_id()] - extern char *unit_ack_string(struct homa_ack *ack); extern struct homa_rpc *unit_client_rpc(struct homa_sock *hsk,