From 2ff453e3e59cf0f38fbf44458afb318d5e152055 Mon Sep 17 00:00:00 2001
From: John Ousterhout <ouster@cs.stanford.edu>
Date: Thu, 3 Oct 2024 11:28:11 -0700
Subject: [PATCH] Extract homa_offload.h from homa_impl.h Also created new
 per-core struct homa_offload_core. Delete the homa_core struct: it's no
 longer needed.

---
 homa_impl.h               | 101 ----------------------
 homa_incoming.c           |   8 +-
 homa_offload.c            |  82 +++++++++++-------
 homa_offload.h            |  94 +++++++++++++++++++++
 homa_plumbing.c           |  22 ++---
 homa_utils.c              |  49 -----------
 test/unit_homa_incoming.c |  17 ++--
 test/unit_homa_offload.c  | 172 ++++++++++++++++++++------------------
 test/utils.h              |   2 -
 9 files changed, 264 insertions(+), 283 deletions(-)
 create mode 100644 homa_offload.h

diff --git a/homa_impl.h b/homa_impl.h
index 43717e7..199b019 100644
--- a/homa_impl.h
+++ b/homa_impl.h
@@ -139,8 +139,6 @@ struct homa;
 /* Declarations used in this file, so they can't be made at the end. */
 extern void     homa_throttle_lock_slow(struct homa *homa);
 
-extern struct homa_core *homa_cores[];
-
 #define sizeof32(type) ((int) (sizeof(type)))
 
 /** define CACHE_LINE_SIZE - The number of bytes in a cache line. */
@@ -876,88 +874,6 @@ struct homa {
 	 */
 	int temp[4];
 };
-
-/**
- * struct homa_core - Homa allocates one of these structures for each
- * core, to hold information that needs to be kept on a per-core basis.
- */
-struct homa_core {
-	/** NUMA-specific page pool from which to allocate skb pages. */
-	struct homa_page_pool *pool;
-
-	/**
-	 * @last_active: the last time (in get_cycle() units) that
-	 * there was system activity, such NAPI or SoftIRQ, on this
-	 * core. Used for load balancing.
-	 */
-	__u64 last_active;
-
-	/**
-	 * @last_gro: the last time (in get_cycle() units) that
-	 * homa_gro_receive returned on this core. Used to determine
-	 * whether GRO is keeping a core busy.
-	 */
-	__u64 last_gro;
-
-	/**
-	 * @softirq_backlog: the number of batches of packets that have
-	 * been queued for SoftIRQ processing on this core but haven't
-	 * yet been processed.
-	 */
-	atomic_t softirq_backlog;
-
-	/**
-	 * @softirq_offset: used when rotating SoftIRQ assignment among
-	 * the next cores; contains an offset to add to the current core
-	 * to produce the core for SoftIRQ.
-	 */
-	int softirq_offset;
-
-	/**
-	 * @gen3_softirq_cores: when the Gen3 load balancer is in use,
-	 * GRO will arrange for SoftIRQ processing to occur on one of
-	 * these cores; -1 values are ignored (see balance.txt for more
-	 * on lewd balancing). This information is filled in via sysctl.
-	 */
-#define NUM_GEN3_SOFTIRQ_CORES 3
-	int gen3_softirq_cores[NUM_GEN3_SOFTIRQ_CORES];
-
-	/**
-	 * @last_app_active: the most recent time (get_cycles() units)
-	 * when an application was actively using Homa on this core (e.g.,
-	 * by sending or receiving messages). Used for load balancing
-	 * (see balance.txt).
-	 */
-	__u64 last_app_active;
-
-	/**
-	 * held_skb: last packet buffer known to be available for
-	 * merging other packets into on this core (note: may not still
-	 * be available), or NULL if none.
-	 */
-	struct sk_buff *held_skb;
-
-	/**
-	 * @held_bucket: the index, within napi->gro_hash, of the list
-	 * containing @held_skb; undefined if @held_skb is NULL. Used to
-	 * verify that @held_skb is still available.
-	 */
-	int held_bucket;
-
-	/**
-	 * @thread: the most recent thread to invoke a Homa system call
-	 * on this core, or NULL if none.
-	 */
-	struct task_struct *thread;
-
-	/**
-	 * @syscall_end_time: the time, in get_cycle() units, when the last
-	 * Homa system call completed on this core. Meaningless if thread
-	 * is NULL.
-	 */
-	__u64 syscall_end_time;
-};
-
 /**
  * struct homa_skb_info - Additional information needed by Homa for each
  * outbound DATA packet. Space is allocated for this at the very end of the
@@ -1205,17 +1121,6 @@ extern void     homa_gap_retry(struct homa_rpc *rpc);
 extern int      homa_get_port(struct sock *sk, unsigned short snum);
 extern int      homa_getsockopt(struct sock *sk, int level, int optname,
 		   char __user *optval, int __user *option);
-extern int      homa_gro_complete(struct sk_buff *skb, int thoff);
-extern void     homa_gro_gen2(struct sk_buff *skb);
-extern void     homa_gro_gen3(struct sk_buff *skb);
-extern void     homa_gro_hook_tcp(void);
-extern void     homa_gro_unhook_tcp(void);
-extern struct sk_buff
-	       *homa_gro_receive(struct list_head *gro_list,
-		    struct sk_buff *skb);
-extern struct sk_buff
-	       *homa_gso_segment(struct sk_buff *skb,
-		    netdev_features_t features);
 extern int      homa_hash(struct sock *sk);
 extern enum hrtimer_restart
 		homa_hrtimer(struct hrtimer *timer);
@@ -1235,8 +1140,6 @@ extern struct sk_buff
 	       *homa_new_data_packet(struct homa_rpc *rpc,
 		    struct iov_iter *iter, int offset, int length,
 		    int max_seg_data);
-extern int      homa_offload_end(void);
-extern int      homa_offload_init(void);
 extern void     homa_outgoing_sysctl_changed(struct homa *homa);
 extern int      homa_pacer_main(void *transportInfo);
 extern void     homa_pacer_stop(struct homa *homa);
@@ -1264,7 +1167,6 @@ extern void     homa_rpc_acked(struct homa_sock *hsk,
 		    const struct in6_addr *saddr, struct homa_ack *ack);
 extern void     homa_rpc_free(struct homa_rpc *rpc);
 extern void     homa_rpc_handoff(struct homa_rpc *rpc);
-extern void     homa_send_ipis(void);
 extern int      homa_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
 extern int      homa_setsockopt(struct sock *sk, int level, int optname,
 		    sockptr_t __user optval, unsigned int optlen);
@@ -1276,9 +1178,6 @@ extern void     homa_spin(int ns);
 extern char    *homa_symbol_for_type(uint8_t type);
 extern int      homa_sysctl_softirq_cores(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos);
-extern struct sk_buff
-	       *homa_tcp_gro_receive(struct list_head *held_list,
-		    struct sk_buff *skb);
 extern void     homa_timer(struct homa *homa);
 extern int      homa_timer_main(void *transportInfo);
 extern void     homa_unhash(struct sock *sk);
diff --git a/homa_incoming.c b/homa_incoming.c
index 09b65fc..18e511d 100644
--- a/homa_incoming.c
+++ b/homa_incoming.c
@@ -6,6 +6,7 @@
 
 #include "homa_impl.h"
 #include "homa_grant.h"
+#include "homa_offload.h"
 #include "homa_peer.h"
 #include "homa_pool.h"
 
@@ -1267,7 +1268,7 @@ struct homa_rpc *homa_wait_for_message(struct homa_sock *hsk, int flags,
 		INC_METRIC(poll_cycles, now - poll_start);
 
 		/* Now it's time to sleep. */
-		homa_cores[interest.core]->last_app_active = now;
+		per_cpu(homa_offload_core, interest.core).last_app_active = now;
 		set_current_state(TASK_INTERRUPTIBLE);
 		rpc = (struct homa_rpc *) atomic_long_read(&interest.ready_rpc);
 		if (!rpc && !hsk->shutdown) {
@@ -1381,7 +1382,8 @@ struct homa_interest *homa_choose_interest(struct homa *homa,
 
 	list_for_each(pos, head) {
 		interest = (struct homa_interest *) (((char *) pos) - offset);
-		if (homa_cores[interest->core]->last_active < busy_time) {
+		if (per_cpu(homa_offload_core, interest->core).last_active
+				< busy_time) {
 			if (backup != NULL)
 				INC_METRIC(handoffs_alt_thread, 1);
 			return interest;
@@ -1463,7 +1465,7 @@ void homa_rpc_handoff(struct homa_rpc *rpc)
 	/* Update the last_app_active time for the thread's core, so Homa
 	 * will try to avoid doing any work there.
 	 */
-	homa_cores[interest->core]->last_app_active = get_cycles();
+	per_cpu(homa_offload_core, interest->core).last_app_active = get_cycles();
 
 	/* Clear the interest. This serves two purposes. First, it saves
 	 * the waking thread from acquiring the socket lock again, which
diff --git a/homa_offload.c b/homa_offload.c
index eb3353c..db813f7 100644
--- a/homa_offload.c
+++ b/homa_offload.c
@@ -5,6 +5,9 @@
  */
 
 #include "homa_impl.h"
+#include "homa_offload.h"
+
+DEFINE_PER_CPU(struct homa_offload_core, homa_offload_core);
 
 #define CORES_TO_CHECK 4
 
@@ -38,6 +41,25 @@ static struct net_offload hook_tcp6_net_offload;
  */
 int homa_offload_init(void)
 {
+	int i;
+
+	for (i = 0; i < nr_cpu_ids; i++) {
+		struct homa_offload_core *offload_core;
+		int j;
+
+		offload_core = &per_cpu(homa_offload_core, i);
+		offload_core->last_active = 0;
+		offload_core->last_gro = 0;
+		atomic_set(&offload_core->softirq_backlog, 0);
+		offload_core->softirq_offset = 0;
+		offload_core->gen3_softirq_cores[0] = i^1;
+		for (j = 1; j < NUM_GEN3_SOFTIRQ_CORES; j++)
+			offload_core->gen3_softirq_cores[j] = -1;
+		offload_core->last_app_active = 0;
+		offload_core->held_skb = NULL;
+		offload_core->held_bucket = 0;
+	}
+
 	int res1 = inet_add_offload(&homa_offload, IPPROTO_HOMA);
 	int res2 = inet6_add_offload(&homa_offload, IPPROTO_HOMA);
 
@@ -257,9 +279,10 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
 	 */
 	struct sk_buff *held_skb;
 	struct sk_buff *result = NULL;
-	struct homa_core *core = homa_cores[raw_smp_processor_id()];
+	struct homa_offload_core *offload_core = &per_cpu(homa_offload_core,
+			raw_smp_processor_id());
 	__u64 now = get_cycles();
-	int busy = (now - core->last_gro) < homa->gro_busy_cycles;
+	int busy = (now - offload_core->last_gro) < homa->gro_busy_cycles;
 	__u32 hash;
 	__u64 saved_softirq_metric, softirq_cycles;
 	__u64 *softirq_cycles_metric;
@@ -268,7 +291,7 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
 	int priority;
 	__u32 saddr;
 
-	core->last_active = now;
+	offload_core->last_active = now;
 	if (skb_is_ipv6(skb)) {
 		priority = ipv6_hdr(skb)->priority;
 		saddr = ntohl(ipv6_hdr(skb)->saddr.in6_u.u6_addr32[3]);
@@ -324,7 +347,7 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
 	 * core added a Homa packet (if there is such a list).
 	 */
 	hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
-	if (core->held_skb) {
+	if (offload_core->held_skb) {
 		/* Reverse-engineer the location of the napi_struct, so we
 		 * can verify that held_skb is still valid.
 		 */
@@ -333,18 +356,19 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
 		struct napi_struct *napi = container_of(gro_list,
 				struct napi_struct, gro_hash[hash]);
 
-		/* Must verify that core->held_skb points to a packet on
+		/* Must verify that offload_core->held_skb points to a packet on
 		 * the list, and that the packet is a Homa packet.
 		 * homa_gro_complete isn't always invoked before removing
-		 * packets from the list, so core->held_skb could be a
+		 * packets from the list, so offload_core->held_skb could be a
 		 * dangling pointer (or the skb could have been reused for
 		 * some other protocol).
 		 */
 		list_for_each_entry(held_skb,
-				&napi->gro_hash[core->held_bucket].list, list) {
+				&napi->gro_hash[offload_core->held_bucket].list,
+				list) {
 			int protocol;
 
-			if (held_skb != core->held_skb)
+			if (held_skb != offload_core->held_skb)
 				continue;
 			if (skb_is_ipv6(held_skb))
 				protocol = ipv6_hdr(held_skb)->nexthdr;
@@ -382,9 +406,9 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
 				homa_gro_complete(held_skb, 0);
 				netif_receive_skb(held_skb);
 				homa_send_ipis();
-				napi->gro_hash[core->held_bucket].count--;
-				if (napi->gro_hash[core->held_bucket].count == 0)
-					__clear_bit(core->held_bucket,
+				napi->gro_hash[offload_core->held_bucket].count--;
+				if (napi->gro_hash[offload_core->held_bucket].count == 0)
+					__clear_bit(offload_core->held_bucket,
 							&napi->gro_bitmask);
 				result = ERR_PTR(-EINPROGRESS);
 			}
@@ -400,14 +424,14 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
 	 * means we aren't heavily loaded; if batching does occur,
 	 * homa_gro_complete will pick a different core).
 	 */
-	core->held_skb = skb;
-	core->held_bucket = hash;
+	offload_core->held_skb = skb;
+	offload_core->held_bucket = hash;
 	if (likely(homa->gro_policy & HOMA_GRO_SAME_CORE))
 		homa_set_softirq_cpu(skb, raw_smp_processor_id());
 
 done:
 	homa_check_pacer(homa, 1);
-	core->last_gro = get_cycles();
+	offload_core->last_gro = get_cycles();
 	return result;
 
 bypass:
@@ -420,7 +444,7 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
 	softirq_cycles = *softirq_cycles_metric - saved_softirq_metric;
 	*softirq_cycles_metric = saved_softirq_metric;
 	INC_METRIC(bypass_softirq_cycles, softirq_cycles);
-	core->last_gro = get_cycles();
+	offload_core->last_gro = get_cycles();
 
 	/* This return value indicates that we have freed skb. */
 	return ERR_PTR(-EINPROGRESS);
@@ -448,16 +472,16 @@ void homa_gro_gen2(struct sk_buff *skb)
 	int this_core = raw_smp_processor_id();
 	int candidate = this_core;
 	__u64 now = get_cycles();
-	struct homa_core *core;
+	struct homa_offload_core *offload_core;
 
 	for (i = CORES_TO_CHECK; i > 0; i--) {
 		candidate++;
 		if (unlikely(candidate >= nr_cpu_ids))
 			candidate = 0;
-		core = homa_cores[candidate];
-		if (atomic_read(&core->softirq_backlog)  > 0)
+		offload_core = &per_cpu(homa_offload_core, candidate);
+		if (atomic_read(&offload_core->softirq_backlog)  > 0)
 			continue;
-		if ((core->last_gro + homa->busy_cycles) > now)
+		if ((offload_core->last_gro + homa->busy_cycles) > now)
 			continue;
 		tt_record3("homa_gro_gen2 chose core %d for id %d offset %d",
 				candidate, homa_local_id(h->common.sender_id),
@@ -468,12 +492,12 @@ void homa_gro_gen2(struct sk_buff *skb)
 		/* All of the candidates appear to be busy; just
 		 * rotate among them.
 		 */
-		int offset = homa_cores[this_core]->softirq_offset;
+		int offset = per_cpu(homa_offload_core, this_core).softirq_offset;
 
 		offset += 1;
 		if (offset > CORES_TO_CHECK)
 			offset = 1;
-		homa_cores[this_core]->softirq_offset = offset;
+		per_cpu(homa_offload_core, this_core).softirq_offset = offset;
 		candidate = this_core + offset;
 		while (candidate >= nr_cpu_ids)
 			candidate -= nr_cpu_ids;
@@ -481,7 +505,7 @@ void homa_gro_gen2(struct sk_buff *skb)
 				candidate, homa_local_id(h->common.sender_id),
 				ntohl(h->seg.offset));
 	}
-	atomic_inc(&homa_cores[candidate]->softirq_backlog);
+	atomic_inc(&per_cpu(homa_offload_core, candidate).softirq_backlog);
 	homa_set_softirq_cpu(skb, candidate);
 }
 
@@ -501,7 +525,8 @@ void homa_gro_gen3(struct sk_buff *skb)
 	struct data_header *h = (struct data_header *) skb_transport_header(skb);
 	int i, core;
 	__u64 now, busy_time;
-	int *candidates = homa_cores[raw_smp_processor_id()]->gen3_softirq_cores;
+	int *candidates = per_cpu(homa_offload_core, raw_smp_processor_id())
+			.gen3_softirq_cores;
 
 	now = get_cycles();
 	busy_time = now - homa->busy_cycles;
@@ -512,17 +537,18 @@ void homa_gro_gen3(struct sk_buff *skb)
 
 		if (candidate < 0)
 			break;
-		if (homa_cores[candidate]->last_app_active < busy_time) {
+		if (per_cpu(homa_offload_core, candidate).last_app_active
+				< busy_time) {
 			core = candidate;
 			break;
 		}
 	}
 	homa_set_softirq_cpu(skb, core);
-	homa_cores[core]->last_active = now;
+	per_cpu(homa_offload_core, core).last_active = now;
 	tt_record4("homa_gro_gen3 chose core %d for id %d, offset %d, delta %d",
 			core, homa_local_id(h->common.sender_id),
 			ntohl(h->seg.offset),
-			now - homa_cores[core]->last_app_active);
+			now - per_cpu(homa_offload_core, core).last_app_active);
 	INC_METRIC(gen3_handoffs, 1);
 	if (core != candidates[0])
 		INC_METRIC(gen3_alt_handoffs, 1);
@@ -546,7 +572,7 @@ int homa_gro_complete(struct sk_buff *skb, int hoffset)
 	//		ntohl(h->seg.offset),
 	//		NAPI_GRO_CB(skb)->count);
 
-	homa_cores[raw_smp_processor_id()]->held_skb = NULL;
+	per_cpu(homa_offload_core, raw_smp_processor_id()).held_skb = NULL;
 	if (homa->gro_policy & HOMA_GRO_GEN3) {
 		homa_gro_gen3(skb);
 	} else if (homa->gro_policy & HOMA_GRO_GEN2) {
@@ -568,7 +594,7 @@ int homa_gro_complete(struct sk_buff *skb, int hoffset)
 			core++;
 			if (unlikely(core >= nr_cpu_ids))
 				core = 0;
-			last_active = homa_cores[core]->last_active;
+			last_active = per_cpu(homa_offload_core, core).last_active;
 			if (last_active < best_time) {
 				best_time = last_active;
 				best = core;
diff --git a/homa_offload.h b/homa_offload.h
new file mode 100644
index 0000000..c0f3c9b
--- /dev/null
+++ b/homa_offload.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+
+/* This file contains definitions related to homa_offload.c. */
+
+#ifndef _HOMA_OFFLOAD_H
+#define _HOMA_OFFLOAD_H
+
+#include <linux/types.h>
+
+/**
+ * struct homa_offload_core - Stores core-specific information used during
+ * GRO operations.
+ */
+struct homa_offload_core {
+	/**
+	 * @last_active: the last time (in get_cycle() units) that
+	 * there was system activity, such NAPI or SoftIRQ, on this
+	 * core. Used for load balancing.
+	 */
+	__u64 last_active;
+
+	/**
+	 * @last_gro: the last time (in get_cycle() units) that
+	 * homa_gro_receive returned on this core. Used to determine
+	 * whether GRO is keeping a core busy.
+	 */
+	__u64 last_gro;
+
+	/**
+	 * @softirq_backlog: the number of batches of packets that have
+	 * been queued for SoftIRQ processing on this core but haven't
+	 * yet been processed.
+	 */
+	atomic_t softirq_backlog;
+
+	/**
+	 * @softirq_offset: used when rotating SoftIRQ assignment among
+	 * the next cores; contains an offset to add to the current core
+	 * to produce the core for SoftIRQ.
+	 */
+	int softirq_offset;
+
+	/**
+	 * @gen3_softirq_cores: when the Gen3 load balancer is in use,
+	 * GRO will arrange for SoftIRQ processing to occur on one of
+	 * these cores; -1 values are ignored (see balance.txt for more
+	 * on lewd balancing). This information is filled in via sysctl.
+	 */
+#define NUM_GEN3_SOFTIRQ_CORES 3
+	int gen3_softirq_cores[NUM_GEN3_SOFTIRQ_CORES];
+
+	/**
+	 * @last_app_active: the most recent time (get_cycles() units)
+	 * when an application was actively using Homa on this core (e.g.,
+	 * by sending or receiving messages). Used for load balancing
+	 * (see balance.txt).
+	 */
+	__u64 last_app_active;
+
+	/**
+	 * held_skb: last packet buffer known to be available for
+	 * merging other packets into on this core (note: may not still
+	 * be available), or NULL if none.
+	 */
+	struct sk_buff *held_skb;
+
+	/**
+	 * @held_bucket: the index, within napi->gro_hash, of the list
+	 * containing @held_skb; undefined if @held_skb is NULL. Used to
+	 * verify that @held_skb is still available.
+	 */
+	int held_bucket;
+};
+DECLARE_PER_CPU(struct homa_offload_core, homa_offload_core);
+
+extern int      homa_gro_complete(struct sk_buff *skb, int thoff);
+extern void     homa_gro_gen2(struct sk_buff *skb);
+extern void     homa_gro_gen3(struct sk_buff *skb);
+extern void     homa_gro_hook_tcp(void);
+extern void     homa_gro_unhook_tcp(void);
+extern struct sk_buff
+	       *homa_gro_receive(struct list_head *gro_list,
+		    struct sk_buff *skb);
+extern struct sk_buff
+	       *homa_gso_segment(struct sk_buff *skb,
+		    netdev_features_t features);
+extern int      homa_offload_end(void);
+extern int      homa_offload_init(void);
+extern void     homa_send_ipis(void);
+extern struct sk_buff
+	       *homa_tcp_gro_receive(struct list_head *held_list,
+		    struct sk_buff *skb);
+
+#endif /* _HOMA_OFFLOAD_H */
diff --git a/homa_plumbing.c b/homa_plumbing.c
index e5b1673..bed2453 100644
--- a/homa_plumbing.c
+++ b/homa_plumbing.c
@@ -5,6 +5,7 @@
  */
 
 #include "homa_impl.h"
+#include "homa_offload.h"
 #include "homa_peer.h"
 #include "homa_pool.h"
 
@@ -889,14 +890,13 @@ int homa_sendmsg(struct sock *sk, struct msghdr *msg, size_t length)
 	struct homa_rpc *rpc = NULL;
 	union sockaddr_in_union *addr = (union sockaddr_in_union *) msg->msg_name;
 
-	homa_cores[raw_smp_processor_id()]->last_app_active = start;
+	per_cpu(homa_offload_core, raw_smp_processor_id()).last_app_active = start;
 	if (unlikely(!msg->msg_control_is_user)) {
 		tt_record("homa_sendmsg error: !msg->msg_control_is_user");
 		result = -EINVAL;
 		goto error;
 	}
-	if (unlikely(copy_from_user(&args, msg->msg_control,
-			sizeof(args)))) {
+	if (unlikely(copy_from_user(&args, msg->msg_control, sizeof(args)))) {
 		result = -EFAULT;
 		goto error;
 	}
@@ -1023,7 +1023,7 @@ int homa_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
 	int result;
 
 	INC_METRIC(recv_calls, 1);
-	homa_cores[raw_smp_processor_id()]->last_app_active = start;
+	per_cpu(homa_offload_core, raw_smp_processor_id()).last_app_active = start;
 	if (unlikely(!msg->msg_control)) {
 		/* This test isn't strictly necessary, but it provides a
 		 * hook for testing kernel call times.
@@ -1245,7 +1245,7 @@ int homa_softirq(struct sk_buff *skb)
 
 	start = get_cycles();
 	INC_METRIC(softirq_calls, 1);
-	homa_cores[raw_smp_processor_id()]->last_active = start;
+	per_cpu(homa_offload_core, raw_smp_processor_id()).last_active = start;
 	if ((start - last) > 1000000) {
 		int scaled_ms = (int) (10*(start-last)/cpu_khz);
 
@@ -1393,7 +1393,7 @@ int homa_softirq(struct sk_buff *skb)
 		packets = other_pkts;
 	}
 
-	atomic_dec(&homa_cores[raw_smp_processor_id()]->softirq_backlog);
+	atomic_dec(&per_cpu(homa_offload_core, raw_smp_processor_id()).softirq_backlog);
 	INC_METRIC(softirq_cycles, get_cycles() - start);
 	return 0;
 }
@@ -1621,7 +1621,7 @@ int homa_sysctl_softirq_cores(struct ctl_table *table, int write,
 {
 	int result, i;
 	struct ctl_table table_copy;
-	struct homa_core *core;
+	struct homa_offload_core *offload_core;
 	int max_values, *values;
 
 	max_values = (NUM_GEN3_SOFTIRQ_CORES + 1) * nr_cpu_ids;
@@ -1647,9 +1647,9 @@ int homa_sysctl_softirq_cores(struct ctl_table *table, int write,
 
 			if (values[i] < 0)
 				break;
-			core = homa_cores[values[i]];
+			offload_core = &per_cpu(homa_offload_core, values[i]);
 			for (j = 0; j < NUM_GEN3_SOFTIRQ_CORES; j++)
-				core->gen3_softirq_cores[j] = values[i+j+1];
+				offload_core->gen3_softirq_cores[j] = values[i+j+1];
 		}
 	} else {
 		/* Read: return values from all of the cores. */
@@ -1663,9 +1663,9 @@ int homa_sysctl_softirq_cores(struct ctl_table *table, int write,
 			*dst = i;
 			dst++;
 			table_copy.maxlen += sizeof(int);
-			core = homa_cores[i];
+			offload_core = &per_cpu(homa_offload_core, i);
 			for (j = 0; j < NUM_GEN3_SOFTIRQ_CORES; j++) {
-				*dst = core->gen3_softirq_cores[j];
+				*dst = offload_core->gen3_softirq_cores[j];
 				dst++;
 				table_copy.maxlen += sizeof(int);
 			}
diff --git a/homa_utils.c b/homa_utils.c
index 953dfe4..377411c 100644
--- a/homa_utils.c
+++ b/homa_utils.c
@@ -9,14 +9,6 @@
 #include "homa_rpc.h"
 #include "homa_skb.h"
 
-/* Core-specific information. NR_CPUS is an overestimate of the actual
- * number, but allows us to allocate the array statically.
- */
-struct homa_core *homa_cores[NR_CPUS];
-
-/* Points to block of memory holding all homa_cores; used to free it. */
-char *core_memory;
-
 struct completion homa_pacer_kthread_done;
 
 /**
@@ -29,43 +21,11 @@ struct completion homa_pacer_kthread_done;
  */
 int homa_init(struct homa *homa)
 {
-	size_t aligned_size;
-	char *first;
 	int i, err;
 
 	_Static_assert(HOMA_MAX_PRIORITIES >= 8,
 		       "homa_init assumes at least 8 priority levels");
 
-	/* Initialize core-specific info (if no-one else has already done it),
-	 * making sure that each core has private cache lines.
-	 */
-	if (!core_memory) {
-		aligned_size = (sizeof(struct homa_core) + 0x3f) & ~0x3f;
-		core_memory = vmalloc(0x3f + (nr_cpu_ids*aligned_size));
-		if (!core_memory) {
-			pr_err("Homa couldn't allocate memory for core-specific data\n");
-			return -ENOMEM;
-		}
-		first = (char *) (((__u64) core_memory + 0x3f) & ~0x3f);
-		for (i = 0; i < nr_cpu_ids; i++) {
-			struct homa_core *core;
-			int j;
-
-			core = (struct homa_core *) (first + i*aligned_size);
-			homa_cores[i] = core;
-			core->last_active = 0;
-			core->last_gro = 0;
-			atomic_set(&core->softirq_backlog, 0);
-			core->softirq_offset = 0;
-			core->gen3_softirq_cores[0] = i^1;
-			for (j = 1; j < NUM_GEN3_SOFTIRQ_CORES; j++)
-				core->gen3_softirq_cores[j] = -1;
-			core->last_app_active = 0;
-			core->held_skb = NULL;
-			core->held_bucket = 0;
-		}
-	}
-
 	homa->pacer_kthread = NULL;
 	init_completion(&homa_pacer_kthread_done);
 	atomic64_set(&homa->next_outgoing_id, 2);
@@ -180,8 +140,6 @@ int homa_init(struct homa *homa)
  */
 void homa_destroy(struct homa *homa)
 {
-	int i;
-
 	if (homa->pacer_kthread) {
 		homa_pacer_stop(homa);
 		wait_for_completion(&homa_pacer_kthread_done);
@@ -193,13 +151,6 @@ void homa_destroy(struct homa *homa)
 	homa_peertab_destroy(homa->peers);
 	kfree(homa->peers);
 	homa_skb_cleanup(homa);
-
-	if (core_memory) {
-		vfree(core_memory);
-		core_memory = NULL;
-		for (i = 0; i < nr_cpu_ids; i++)
-			homa_cores[i] = NULL;
-	}
 	kfree(homa->metrics);
 }
 
diff --git a/test/unit_homa_incoming.c b/test/unit_homa_incoming.c
index ccdfd31..22754cd 100644
--- a/test/unit_homa_incoming.c
+++ b/test/unit_homa_incoming.c
@@ -3,6 +3,7 @@
  */
 
 #include "homa_impl.h"
+#include "homa_offload.h"
 #include "homa_peer.h"
 #include "homa_pool.h"
 #define KSELFTEST_NOT_MAIN 1
@@ -2387,9 +2388,9 @@ TEST_F(homa_incoming, homa_choose_interest__find_idle_core)
 
 	mock_cycles = 5000;
 	self->homa.busy_cycles = 1000;
-	homa_cores[1]->last_active = 4100;
-	homa_cores[2]->last_active = 3500;
-	homa_cores[3]->last_active = 2000;
+	per_cpu(homa_offload_core, 1).last_active = 4100;
+	per_cpu(homa_offload_core, 2).last_active = 3500;
+	per_cpu(homa_offload_core, 3).last_active = 2000;
 
 	struct homa_interest *result = homa_choose_interest(&self->homa,
 			&self->hsk.request_interests,
@@ -2413,9 +2414,9 @@ TEST_F(homa_incoming, homa_choose_interest__all_cores_busy)
 
 	mock_cycles = 5000;
 	self->homa.busy_cycles = 1000;
-	homa_cores[1]->last_active = 4100;
-	homa_cores[2]->last_active = 4001;
-	homa_cores[3]->last_active = 4800;
+	per_cpu(homa_offload_core, 1).last_active = 4100;
+	per_cpu(homa_offload_core, 2).last_active = 4001;
+	per_cpu(homa_offload_core, 3).last_active = 4800;
 
 	struct homa_interest *result = homa_choose_interest(&self->homa,
 			&self->hsk.request_interests,
@@ -2607,10 +2608,10 @@ TEST_F(homa_incoming, homa_rpc_handoff__update_last_app_active)
 	interest.core = 2;
 	crpc->interest = &interest;
 	mock_cycles = 10000;
-	homa_cores[2]->last_app_active = 444;
+	per_cpu(homa_offload_core, 2).last_app_active = 444;
 	homa_rpc_handoff(crpc);
 	EXPECT_STREQ("wake_up_process pid 0", unit_log_get());
-	EXPECT_EQ(10000, homa_cores[2]->last_app_active);
+	EXPECT_EQ(10000, per_cpu(homa_offload_core, 2).last_app_active);
 	atomic_andnot(RPC_HANDING_OFF, &crpc->flags);
 }
 
diff --git a/test/unit_homa_offload.c b/test/unit_homa_offload.c
index 3531fb3..9691bed 100644
--- a/test/unit_homa_offload.c
+++ b/test/unit_homa_offload.c
@@ -3,6 +3,7 @@
  */
 
 #include "homa_impl.h"
+#include "homa_offload.h"
 #include "homa_rpc.h"
 #define KSELFTEST_NOT_MAIN 1
 #include "kselftest_harness.h"
@@ -10,6 +11,8 @@
 #include "mock.h"
 #include "utils.h"
 
+#define cur_offload_core (&per_cpu(homa_offload_core, raw_smp_processor_id()))
+
 extern struct homa *homa;
 
 static struct sk_buff *tcp_gro_receive(struct list_head *held_list,
@@ -82,18 +85,20 @@ FIXTURE_SETUP(homa_offload)
 	inet_offloads[IPPROTO_TCP] = &self->tcp_offloads;
 	self->tcp6_offloads.callbacks.gro_receive = tcp6_gro_receive;
 	inet6_offloads[IPPROTO_TCP] = &self->tcp6_offloads;
+	homa_offload_init();
 
 	unit_log_clear();
 
 	/* Configure so core isn't considered too busy for bypasses. */
 	mock_cycles = 1000;
 	self->homa.gro_busy_cycles = 500;
-	cur_core->last_gro = 400;
+	cur_offload_core->last_gro = 400;
 }
 FIXTURE_TEARDOWN(homa_offload)
 {
         struct sk_buff *skb, *tmp;
 
+	homa_offload_end();
 	list_for_each_entry_safe(skb, tmp, &self->napi.gro_hash[2].list, list)
 		kfree_skb(skb);
 	homa_destroy(&self->homa);
@@ -160,10 +165,10 @@ TEST_F(homa_offload, homa_tcp_gro_receive__pass_to_homa_ipv6)
 	h->flags = HOMA_TCP_FLAGS;
 	h->urgent = htons(HOMA_TCP_URGENT);
 	NAPI_GRO_CB(skb)->same_flow = 0;
-	cur_core->held_skb = NULL;
-	cur_core->held_bucket = 99;
+	cur_offload_core->held_skb = NULL;
+	cur_offload_core->held_bucket = 99;
 	EXPECT_EQ(NULL, homa_tcp_gro_receive(&self->empty_list, skb));
-	EXPECT_EQ(skb, cur_core->held_skb);
+	EXPECT_EQ(skb, cur_offload_core->held_skb);
 	EXPECT_STREQ("", unit_log_get());
 	EXPECT_EQ(IPPROTO_HOMA, ipv6_hdr(skb)->nexthdr);
 	kfree_skb(skb);
@@ -182,10 +187,10 @@ TEST_F(homa_offload, homa_tcp_gro_receive__pass_to_homa_ipv4)
 	h->flags = HOMA_TCP_FLAGS;
 	h->urgent = htons(HOMA_TCP_URGENT);
 	NAPI_GRO_CB(skb)->same_flow = 0;
-	cur_core->held_skb = NULL;
-	cur_core->held_bucket = 99;
+	cur_offload_core->held_skb = NULL;
+	cur_offload_core->held_bucket = 99;
 	EXPECT_EQ(NULL, homa_tcp_gro_receive(&self->empty_list, skb));
-	EXPECT_EQ(skb, cur_core->held_skb);
+	EXPECT_EQ(skb, cur_offload_core->held_skb);
 	EXPECT_STREQ("", unit_log_get());
 	EXPECT_EQ(IPPROTO_HOMA, ip_hdr(skb)->protocol);
 	EXPECT_EQ(2303, ip_hdr(skb)->check);
@@ -221,8 +226,8 @@ TEST_F(homa_offload, homa_gro_receive__update_offset_from_sequence)
 	self->header.seg.offset = -1;
 	skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0);
 	NAPI_GRO_CB(skb)->same_flow = 0;
-	cur_core->held_skb = NULL;
-	cur_core->held_bucket = 99;
+	cur_offload_core->held_skb = NULL;
+	cur_offload_core->held_bucket = 99;
 	EXPECT_EQ(NULL, homa_gro_receive(&self->empty_list, skb));
 	h = (struct data_header *) skb_transport_header(skb);
 	EXPECT_EQ(6000, htonl(h->seg.offset));
@@ -274,7 +279,7 @@ TEST_F(homa_offload, homa_gro_receive__HOMA_GRO_SHORT_BYPASS)
 	 * than one packet.
 	 */
 	self->homa.gro_policy |= HOMA_GRO_SHORT_BYPASS;
-	cur_core->last_gro = 400;
+	cur_offload_core->last_gro = 400;
 	skb2 = mock_skb_new(&self->ip, &h.common, 1400, 2000);
 	result = homa_gro_receive(&self->empty_list, skb2);
 	EXPECT_EQ(0, -PTR_ERR(result));
@@ -283,14 +288,14 @@ TEST_F(homa_offload, homa_gro_receive__HOMA_GRO_SHORT_BYPASS)
 	/* Third attempt: bypass should happen. */
 	h.message_length = htonl(1400);
 	h.incoming = htonl(1400);
-	cur_core->last_gro = 400;
+	cur_offload_core->last_gro = 400;
 	skb3 = mock_skb_new(&self->ip, &h.common, 1400, 4000);
 	result = homa_gro_receive(&self->empty_list, skb3);
 	EXPECT_EQ(EINPROGRESS, -PTR_ERR(result));
 	EXPECT_EQ(1, homa_metrics_per_cpu()->gro_data_bypasses);
 
 	/* Third attempt: no bypass because core busy. */
-	cur_core->last_gro = 600;
+	cur_offload_core->last_gro = 600;
 	skb4 = mock_skb_new(&self->ip, &h.common, 1400, 4000);
 	result = homa_gro_receive(&self->empty_list, skb3);
 	EXPECT_EQ(0, -PTR_ERR(result));
@@ -332,7 +337,7 @@ TEST_F(homa_offload, homa_gro_receive__fast_grant_optimization)
 
 	/* Second attempt: HOMA_FAST_GRANTS is enabled. */
 	self->homa.gro_policy = HOMA_GRO_FAST_GRANTS;
-	cur_core->last_gro = 400;
+	cur_offload_core->last_gro = 400;
 	struct sk_buff *skb2 = mock_skb_new(&client_ip, &h.common, 0, 0);
 	result = homa_gro_receive(&self->empty_list, skb2);
 	EXPECT_EQ(EINPROGRESS, -PTR_ERR(result));
@@ -340,7 +345,7 @@ TEST_F(homa_offload, homa_gro_receive__fast_grant_optimization)
 	EXPECT_SUBSTR("xmit DATA 1400@10000", unit_log_get());
 
 	/* Third attempt: core is too busy for fast grants. */
-	cur_core->last_gro = 600;
+	cur_offload_core->last_gro = 600;
 	struct sk_buff *skb3 = mock_skb_new(&client_ip, &h.common, 0, 0);
 	result = homa_gro_receive(&self->empty_list, skb3);
 	EXPECT_EQ(0, -PTR_ERR(result));
@@ -356,13 +361,13 @@ TEST_F(homa_offload, homa_gro_receive__no_held_skb)
 	skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0);
 	skb->hash = 2;
 	NAPI_GRO_CB(skb)->same_flow = 0;
-	cur_core->held_skb = NULL;
-	cur_core->held_bucket = 2;
+	cur_offload_core->held_skb = NULL;
+	cur_offload_core->held_bucket = 2;
 	EXPECT_EQ(NULL, homa_gro_receive(&self->napi.gro_hash[2].list, skb));
 	same_flow = NAPI_GRO_CB(skb)->same_flow;
 	EXPECT_EQ(0, same_flow);
-	EXPECT_EQ(skb, cur_core->held_skb);
-	EXPECT_EQ(2, cur_core->held_bucket);
+	EXPECT_EQ(skb, cur_offload_core->held_skb);
+	EXPECT_EQ(2, cur_offload_core->held_bucket);
 	kfree_skb(skb);
 }
 TEST_F(homa_offload, homa_gro_receive__empty_merge_list)
@@ -373,13 +378,13 @@ TEST_F(homa_offload, homa_gro_receive__empty_merge_list)
 	skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0);
 	skb->hash = 2;
 	NAPI_GRO_CB(skb)->same_flow = 0;
-	cur_core->held_skb = self->skb;
-	cur_core->held_bucket = 3;
+	cur_offload_core->held_skb = self->skb;
+	cur_offload_core->held_bucket = 3;
 	EXPECT_EQ(NULL, homa_gro_receive(&self->napi.gro_hash[2].list, skb));
 	same_flow = NAPI_GRO_CB(skb)->same_flow;
 	EXPECT_EQ(0, same_flow);
-	EXPECT_EQ(skb, cur_core->held_skb);
-	EXPECT_EQ(2, cur_core->held_bucket);
+	EXPECT_EQ(skb, cur_offload_core->held_skb);
+	EXPECT_EQ(2, cur_offload_core->held_bucket);
 	kfree_skb(skb);
 }
 TEST_F(homa_offload, homa_gro_receive__held_skb_not_in_merge_list)
@@ -390,13 +395,13 @@ TEST_F(homa_offload, homa_gro_receive__held_skb_not_in_merge_list)
 	skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0);
 	skb->hash = 3;
 	NAPI_GRO_CB(skb)->same_flow = 0;
-	cur_core->held_skb = skb;
-	cur_core->held_bucket = 2;
+	cur_offload_core->held_skb = skb;
+	cur_offload_core->held_bucket = 2;
 	EXPECT_EQ(NULL, homa_gro_receive(&self->napi.gro_hash[3].list, skb));
 	same_flow = NAPI_GRO_CB(skb)->same_flow;
 	EXPECT_EQ(0, same_flow);
-	EXPECT_EQ(skb, cur_core->held_skb);
-	EXPECT_EQ(3, cur_core->held_bucket);
+	EXPECT_EQ(skb, cur_offload_core->held_skb);
+	EXPECT_EQ(3, cur_offload_core->held_bucket);
 	kfree_skb(skb);
 }
 TEST_F(homa_offload, homa_gro_receive__held_skb__in_merge_list_but_wrong_proto)
@@ -407,25 +412,25 @@ TEST_F(homa_offload, homa_gro_receive__held_skb__in_merge_list_but_wrong_proto)
 	skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0);
 	skb->hash = 3;
 	NAPI_GRO_CB(skb)->same_flow = 0;
-	cur_core->held_skb = self->skb;
+	cur_offload_core->held_skb = self->skb;
 	if (skb_is_ipv6(self->skb))
 		ipv6_hdr(self->skb)->nexthdr = IPPROTO_TCP;
 	else
 		ip_hdr(self->skb)->protocol = IPPROTO_TCP;
-	cur_core->held_bucket = 2;
+	cur_offload_core->held_bucket = 2;
 	EXPECT_EQ(NULL, homa_gro_receive(&self->napi.gro_hash[3].list, skb));
 	same_flow = NAPI_GRO_CB(skb)->same_flow;
 	EXPECT_EQ(0, same_flow);
-	EXPECT_EQ(skb, cur_core->held_skb);
-	EXPECT_EQ(3, cur_core->held_bucket);
+	EXPECT_EQ(skb, cur_offload_core->held_skb);
+	EXPECT_EQ(3, cur_offload_core->held_bucket);
 	kfree_skb(skb);
 }
 TEST_F(homa_offload, homa_gro_receive__merge)
 {
 	struct sk_buff *skb, *skb2;
 	int same_flow;
-	cur_core->held_skb = self->skb2;
-	cur_core->held_bucket = 2;
+	cur_offload_core->held_skb = self->skb2;
+	cur_offload_core->held_bucket = 2;
 
 	self->header.seg.offset = htonl(6000);
 	self->header.common.sender_id = cpu_to_be64(1002);
@@ -460,8 +465,8 @@ TEST_F(homa_offload, homa_gro_receive__max_gro_skbs)
 
 	// First packet: fits below the limit.
 	homa->max_gro_skbs = 3;
-	cur_core->held_skb = self->skb2;
-	cur_core->held_bucket = 2;
+	cur_offload_core->held_skb = self->skb2;
+	cur_offload_core->held_bucket = 2;
 	self->header.seg.offset = htonl(6000);
 	skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0);
 	homa_gro_receive(&self->napi.gro_hash[3].list, skb);
@@ -485,7 +490,7 @@ TEST_F(homa_offload, homa_gro_receive__max_gro_skbs)
 	// Third packet also hits the limit for skb, causing the bucket
 	// to become empty.
 	homa->max_gro_skbs = 2;
-	cur_core->held_skb = self->skb;
+	cur_offload_core->held_skb = self->skb;
 	skb = mock_skb_new(&self->ip, &self->header.common, 1400, 0);
 	unit_log_clear();
 	EXPECT_EQ(EINPROGRESS, -PTR_ERR(homa_gro_receive(
@@ -504,112 +509,117 @@ TEST_F(homa_offload, homa_gro_gen2)
 	mock_cycles = 1000;
 	homa->busy_cycles = 100;
 	mock_set_core(5);
-	atomic_set(&homa_cores[6]->softirq_backlog, 1);
-	homa_cores[6]->last_gro = 0;
-	atomic_set(&homa_cores[7]->softirq_backlog, 0);
-	homa_cores[7]->last_gro = 901;
-	atomic_set(&homa_cores[0]->softirq_backlog, 2);
-	homa_cores[0]->last_gro = 0;
-	atomic_set(&homa_cores[1]->softirq_backlog, 0);
-	homa_cores[1]->last_gro = 899;
-	atomic_set(&homa_cores[2]->softirq_backlog, 0);
-	homa_cores[2]->last_gro = 0;
+	atomic_set(&per_cpu(homa_offload_core, 6).softirq_backlog, 1);
+	per_cpu(homa_offload_core, 6).last_gro = 0;
+	atomic_set(&per_cpu(homa_offload_core, 7).softirq_backlog, 0);
+	per_cpu(homa_offload_core, 7).last_gro = 901;
+	atomic_set(&per_cpu(homa_offload_core, 0).softirq_backlog, 2);
+	per_cpu(homa_offload_core, 0).last_gro = 0;
+	atomic_set(&per_cpu(homa_offload_core, 1).softirq_backlog, 0);
+	per_cpu(homa_offload_core, 1).last_gro = 899;
+	atomic_set(&per_cpu(homa_offload_core, 2).softirq_backlog, 0);
+	per_cpu(homa_offload_core, 2).last_gro = 0;
 
 	// Avoid busy cores.
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(1, self->skb->hash - 32);
-	EXPECT_EQ(1, atomic_read(&homa_cores[1]->softirq_backlog));
+	EXPECT_EQ(1, atomic_read(&per_cpu(homa_offload_core, 1).softirq_backlog));
 
 	// All cores busy; must rotate.
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(6, self->skb->hash - 32);
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(7, self->skb->hash - 32);
-	EXPECT_EQ(2, homa_cores[5]->softirq_offset);
+	EXPECT_EQ(2, per_cpu(homa_offload_core, 5).softirq_offset);
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(0, self->skb->hash - 32);
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(1, self->skb->hash - 32);
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(6, self->skb->hash - 32);
-	EXPECT_EQ(1, homa_cores[5]->softirq_offset);
+	EXPECT_EQ(1, per_cpu(homa_offload_core, 5).softirq_offset);
 }
 
 TEST_F(homa_offload, homa_gro_gen3__basics)
 {
+	struct homa_offload_core *offload_core = cur_offload_core;
+	struct homa_offload_core *offload3 = &per_cpu(homa_offload_core, 3);
+	struct homa_offload_core *offload5 = &per_cpu(homa_offload_core, 5);
+	struct homa_offload_core *offload7 = &per_cpu(homa_offload_core, 7);
+
 	homa->gro_policy = HOMA_GRO_GEN3;
-	struct homa_core *core = cur_core;
-	core->gen3_softirq_cores[0] = 3;
-	core->gen3_softirq_cores[1] = 7;
-	core->gen3_softirq_cores[2] = 5;
-	homa_cores[3]->last_app_active = 4100;
-	homa_cores[7]->last_app_active = 3900;
-	homa_cores[5]->last_app_active = 2000;
+	offload_core->gen3_softirq_cores[0] = 3;
+	offload_core->gen3_softirq_cores[1] = 7;
+	offload_core->gen3_softirq_cores[2] = 5;
+	offload3->last_app_active = 4100;
+	offload7->last_app_active = 3900;
+	offload5->last_app_active = 2000;
 	mock_cycles = 5000;
 	self->homa.busy_cycles = 1000;
 
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(7, self->skb->hash - 32);
-	EXPECT_EQ(0, homa_cores[3]->last_active);
-	EXPECT_EQ(5000, homa_cores[7]->last_active);
+	EXPECT_EQ(0, offload3->last_active);
+	EXPECT_EQ(5000, offload7->last_active);
 }
 TEST_F(homa_offload, homa_gro_gen3__stop_on_negative_core_id)
 {
 	homa->gro_policy = HOMA_GRO_GEN3;
-	struct homa_core *core = cur_core;
-	core->gen3_softirq_cores[0] = 3;
-	core->gen3_softirq_cores[1] = -1;
-	core->gen3_softirq_cores[2] = 5;
-	homa_cores[3]->last_app_active = 4100;
-	homa_cores[5]->last_app_active = 2000;
+	struct homa_offload_core *offload_core = cur_offload_core;
+	offload_core->gen3_softirq_cores[0] = 3;
+	offload_core->gen3_softirq_cores[1] = -1;
+	offload_core->gen3_softirq_cores[2] = 5;
+	per_cpu(homa_offload_core, 3).last_app_active = 4100;
+	per_cpu(homa_offload_core, 5).last_app_active = 2000;
 	mock_cycles = 5000;
 	self->homa.busy_cycles = 1000;
 
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(3, self->skb->hash - 32);
-	EXPECT_EQ(5000, homa_cores[3]->last_active);
+	EXPECT_EQ(5000, per_cpu(homa_offload_core, 3).last_active);
 }
 TEST_F(homa_offload, homa_gro_gen3__all_cores_busy_so_pick_first)
 {
 	homa->gro_policy = HOMA_GRO_GEN3;
-	struct homa_core *core = cur_core;
-	core->gen3_softirq_cores[0] = 3;
-	core->gen3_softirq_cores[1] = 7;
-	core->gen3_softirq_cores[2] = 5;
-	homa_cores[3]->last_app_active = 4100;
-	homa_cores[7]->last_app_active = 4001;
-	homa_cores[5]->last_app_active = 4500;
+	struct homa_offload_core *offload_core = cur_offload_core;
+	offload_core->gen3_softirq_cores[0] = 3;
+	offload_core->gen3_softirq_cores[1] = 7;
+	offload_core->gen3_softirq_cores[2] = 5;
+	per_cpu(homa_offload_core, 3).last_app_active = 4100;
+	per_cpu(homa_offload_core, 7).last_app_active = 4001;
+	per_cpu(homa_offload_core, 5).last_app_active = 4500;
 	mock_cycles = 5000;
 	self->homa.busy_cycles = 1000;
 
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(3, self->skb->hash - 32);
-	EXPECT_EQ(5000, homa_cores[3]->last_active);
+	EXPECT_EQ(5000, per_cpu(homa_offload_core, 3).last_active);
 }
 
 
 TEST_F(homa_offload, homa_gro_complete__clear_held_skb)
 {
-	struct homa_core *core = homa_cores[raw_smp_processor_id()];
+	struct homa_offload_core *offload_core = &per_cpu(homa_offload_core,
+			raw_smp_processor_id());
 
-	core->held_skb = self->skb2;
+	offload_core->held_skb = self->skb2;
 	homa_gro_complete(self->skb, 0);
-	EXPECT_EQ(NULL, core->held_skb);
+	EXPECT_EQ(NULL, offload_core->held_skb);
 }
 TEST_F(homa_offload, homa_gro_complete__GRO_IDLE)
 {
 	homa->gro_policy = HOMA_GRO_IDLE;
-	homa_cores[6]->last_active = 30;
-	homa_cores[7]->last_active = 25;
-	homa_cores[0]->last_active = 20;
-	homa_cores[1]->last_active = 15;
-	homa_cores[2]->last_active = 10;
+	per_cpu(homa_offload_core, 6).last_active = 30;
+	per_cpu(homa_offload_core, 7).last_active = 25;
+	per_cpu(homa_offload_core, 0).last_active = 20;
+	per_cpu(homa_offload_core, 1).last_active = 15;
+	per_cpu(homa_offload_core, 2).last_active = 10;
 
 	mock_set_core(5);
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(1, self->skb->hash - 32);
 
-	homa_cores[6]->last_active = 5;
+	per_cpu(homa_offload_core, 6).last_active = 5;
 	mock_set_core(5);
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(6, self->skb->hash - 32);
diff --git a/test/utils.h b/test/utils.h
index 5825bbc..dd741f6 100644
--- a/test/utils.h
+++ b/test/utils.h
@@ -32,8 +32,6 @@ enum unit_rpc_state {
 	UNIT_IN_SERVICE     = 24,
 };
 
-#define cur_core homa_cores[raw_smp_processor_id()]
-
 extern char         *unit_ack_string(struct homa_ack *ack);
 extern struct homa_rpc
                     *unit_client_rpc(struct homa_sock *hsk,