From 3a1961981acd4575ea890263f97c67f36fdd6635 Mon Sep 17 00:00:00 2001
From: John Ousterhout <ouster@cs.stanford.edu>
Date: Fri, 27 Sep 2024 11:51:57 -0700
Subject: [PATCH] Extract homa_metrics.h from homa_impl.h, create
 homa_metrics.c

---
 Makefile                  |   1 +
 homa_impl.h               | 686 +------------------------------------
 homa_incoming.c           |   1 -
 homa_metrics.c            | 427 +++++++++++++++++++++++
 homa_metrics.h            | 692 ++++++++++++++++++++++++++++++++++++++
 homa_offload.c            |   8 +-
 homa_plumbing.c           |  83 -----
 homa_timer.c              |   2 +-
 homa_utils.c              | 441 ------------------------
 homa_wire.h               |   2 +
 test/Makefile             |   2 +
 test/mock.c               |   2 +
 test/unit_homa_grant.c    |  22 +-
 test/unit_homa_incoming.c |  46 +--
 test/unit_homa_metrics.c  |  98 ++++++
 test/unit_homa_offload.c  |  14 +-
 test/unit_homa_outgoing.c |  26 +-
 test/unit_homa_peertab.c  |  16 +-
 test/unit_homa_plumbing.c |  60 +---
 test/unit_homa_pool.c     |   4 +-
 test/unit_homa_skb.c      |   8 +-
 test/unit_homa_socktab.c  |   8 +-
 test/unit_homa_timer.c    |   6 +-
 test/unit_homa_utils.c    |  36 +-
 test/utils.h              |   2 -
 timetrace.c               |   3 +-
 26 files changed, 1331 insertions(+), 1365 deletions(-)
 create mode 100644 homa_metrics.c
 create mode 100644 homa_metrics.h
 create mode 100644 test/unit_homa_metrics.c

diff --git a/Makefile b/Makefile
index 5af80ea..fb277f1 100644
--- a/Makefile
+++ b/Makefile
@@ -5,6 +5,7 @@ ifneq ($(KERNELRELEASE),)
 obj-m += homa.o
 homa-y = homa_grant.o \
 	    homa_incoming.o \
+	    homa_metrics.o \
             homa_offload.o \
             homa_outgoing.o \
             homa_peertab.o \
diff --git a/homa_impl.h b/homa_impl.h
index e91aaa7..856279f 100644
--- a/homa_impl.h
+++ b/homa_impl.h
@@ -48,8 +48,6 @@
 #pragma GCC diagnostic warning "-Wpointer-sign"
 #pragma GCC diagnostic warning "-Wunused-variable"
 
-#include "homa_wire.h"
-
 #ifdef __UNIT_TEST__
 #undef alloc_pages
 #define alloc_pages mock_alloc_pages
@@ -115,6 +113,15 @@ extern void mock_spin_unlock(spinlock_t *lock);
 #undef vmalloc
 #define vmalloc mock_vmalloc
 extern void *mock_vmalloc(size_t size);
+
+#undef DECLARE_PER_CPU
+#define DECLARE_PER_CPU(type, name) extern type name[10];
+
+#undef DEFINE_PER_CPU
+#define DEFINE_PER_CPU(type, name) type name[10];
+
+#undef per_cpu
+#define per_cpu(name, core) (name[core])
 #endif /* __UNIT_TEST__ */
 
 /* Null out things that confuse VSCode Intellisense */
@@ -125,9 +132,6 @@ extern void *mock_vmalloc(size_t size);
 #define set_current_state(...)
 #endif
 
-#include "homa.h"
-#include "timetrace.h"
-
 /* Forward declarations. */
 struct homa_sock;
 struct homa_rpc;
@@ -135,6 +139,11 @@ struct homa_rpc_bucket;
 struct homa;
 struct homa_peer;
 
+#include "homa.h"
+#include "timetrace.h"
+#include "homa_wire.h"
+#include "homa_metrics.h"
+
 /* Declarations used in this file, so they can't be made at the end. */
 extern void     homa_bucket_lock_slow(struct homa_rpc_bucket *bucket, __u64 id);
 extern int      homa_grantable_lock_slow(struct homa *homa, int recalc);
@@ -1870,657 +1879,6 @@ struct homa {
 	int temp[4];
 };
 
-/**
- * struct homa_metrics - various performance counters kept by Homa.
- *
- * There is one of these structures for each core, so counters can
- * be updated without worrying about synchronization or extra cache
- * misses. This isn't quite perfect (it's conceivable that a process
- * could move from one CPU to another in the middle of updating a counter),
- * but this is unlikely, and we can tolerate the occasional miscounts
- * that might result.
- *
- * All counters are free-running: they never reset.
- */
-#define HOMA_NUM_SMALL_COUNTS 64
-#define HOMA_NUM_MEDIUM_COUNTS 128
-struct homa_metrics {
-	/**
-	 * @small_msg_bytes: entry i holds the total number of bytes
-	 * received in messages whose length is between 64*i and 64*i + 63,
-	 * inclusive.
-	 */
-	__u64 small_msg_bytes[HOMA_NUM_SMALL_COUNTS];
-
-	/**
-	 * @medium_msg_bytes: entry i holds the total number of bytes
-	 * received in messages whose length is between 1024*i and
-	 * 1024*i + 1023, inclusive. The first four entries are always 0
-	 * (small_msg_counts covers this range).
-	 */
-	__u64 medium_msg_bytes[HOMA_NUM_MEDIUM_COUNTS];
-
-	/**
-	 * @large_msg_count: the total number of messages received whose
-	 * length is too large to appear in medium_msg_bytes.
-	 */
-	__u64 large_msg_count;
-
-	/**
-	 * @large_msg_bytes: the total number of bytes received in
-	 * messages too large to be counted by medium_msg_bytes.
-	 */
-	__u64 large_msg_bytes;
-
-	/**
-	 * @sent_msg_bytes: The total number of bytes in outbound
-	 * messages.
-	 */
-	__u64 sent_msg_bytes;
-
-	/**
-	 * @packets_sent: total number of packets sent for each packet type
-	 * (entry 0 corresponds to DATA, and so on).
-	 */
-	__u64 packets_sent[BOGUS-DATA];
-
-	/**
-	 * @packets_received: total number of packets received for each
-	 * packet type (entry 0 corresponds to DATA, and so on).
-	 */
-	__u64 packets_received[BOGUS-DATA];
-
-	/** @priority_bytes: total bytes sent at each priority level. */
-	__u64 priority_bytes[HOMA_MAX_PRIORITIES];
-
-	/** @priority_packets: total packets sent at each priority level. */
-	__u64 priority_packets[HOMA_MAX_PRIORITIES];
-
-	/**
-	 * @skb_allocs: total number of calls to homa_skb_new_tx.
-	 */
-	__u64 skb_allocs;
-
-	/**
-	 * @skb_alloc_cycles: total time spent in homa_skb_new_tx, as
-	 * measured with get_cycles().
-	 */
-	__u64 skb_alloc_cycles;
-
-	/**
-	 * @skb_frees: total number of sk_buffs for data packets that have
-	 * been freed (counts normal paths only).
-	 */
-	__u64 skb_frees;
-
-	/**
-	 *  @skb_free_cycles: total time spent freeing sk_buffs, as
-	 * measured with get_cycles().
-	 */
-	__u64 skb_free_cycles;
-
-	/**
-	 * @skb_page_allocs: total number of calls to homa_skb_page_alloc.
-	 */
-	__u64 skb_page_allocs;
-
-	/**
-	 * @skb_page_alloc_cycles: total time spent in homa_skb_page_alloc, as
-	 * measured with get_cycles().
-	 */
-	__u64 skb_page_alloc_cycles;
-
-	/**
-	 * @requests_received: total number of request messages received.
-	 */
-	__u64 requests_received;
-
-	/**
-	 * @requests_queued: total number of requests that were added to
-	 * @homa->ready_requests (no thread was waiting).
-	 */
-	__u64 requests_queued;
-
-	/**
-	 * @responses_received: total number of response messages received.
-	 */
-	__u64 responses_received;
-
-	/**
-	 * @responses_queued: total number of responses that were added to
-	 * @homa->ready_responses (no thread was waiting).
-	 */
-	__u64 responses_queued;
-
-	/**
-	 * @fast_wakeups: total number of times that a message arrived for
-	 * a receiving thread that was polling in homa_wait_for_message.
-	 */
-	__u64 fast_wakeups;
-
-	/**
-	 * @slow_wakeups: total number of times that a receiving thread
-	 * had to be put to sleep (no message arrived while it was polling).
-	 */
-	__u64 slow_wakeups;
-
-	/**
-	 * @handoffs_thread_waiting: total number of times that an RPC
-	 * was handed off to a waiting thread (vs. being queued).
-	 */
-	__u64 handoffs_thread_waiting;
-
-	/**
-	 * @handoffs_alt_thread: total number of times that a thread other
-	 * than the first on the list was chosen for a handoff (because the
-	 * first thread was on a busy core).
-	 */
-	__u64 handoffs_alt_thread;
-
-	/**
-	 * @poll_cycles: total time spent in the polling loop in
-	 * homa_wait_for_message, as measured with get_cycles().
-	 */
-	__u64 poll_cycles;
-
-	/**
-	 * @softirq_calls: total number of calls to homa_softirq (i.e.,
-	 * total number of GRO packets processed, each of which could contain
-	 * multiple Homa packets.
-	 */
-	__u64 softirq_calls;
-
-	/**
-	 * @softirq_cycles: total time spent executing homa_softirq when
-	 * invoked under Linux's SoftIRQ handler, as measured with get_cycles().
-	 */
-	__u64 softirq_cycles;
-
-	/**
-	 * @bypass_softirq_cycles: total time spent executing homa_softirq when
-	 * invoked during GRO, bypassing the SoftIRQ mechanism.
-	 */
-	__u64 bypass_softirq_cycles;
-
-	/**
-	 * @linux_softirq_cycles: total time spent executing all softirq
-	 * activities, as measured by the linux softirq module, in get_cycles()
-	 * units. Only available with modified Linux kernels.
-	 */
-	__u64 linux_softirq_cycles;
-
-	/**
-	 * @napi_cycles: total time spent executing all NAPI activities,
-	 * as measured by the linux softirq module, in get_cycles() units.
-	 * Only available with modified Linux kernels.
-	 */
-	__u64 napi_cycles;
-
-	/**
-	 * @send_cycles: total time spent executing the homa_sendmsg kernel
-	 * call handler to send requests, as measured with get_cycles().
-	 */
-	__u64 send_cycles;
-
-	/** @send_calls: total number of invocations of homa_semdmsg
-	 * for requests.
-	 */
-	__u64 send_calls;
-
-	/**
-	 * @recv_cycles: total time spent executing homa_recvmsg (including
-	 * time when the thread is blocked), as measured with get_cycles().
-	 */
-	__u64 recv_cycles;
-
-	/** @recv_calls: total number of invocations of homa_recvmsg. */
-	__u64 recv_calls;
-
-	/**
-	 * @blocked_cycles: total time threads spend in blocked state
-	 * while executing the homa_recvmsg kernel call handler.
-	 */
-	__u64 blocked_cycles;
-
-	/**
-	 * @reply_cycles: total time spent executing the homa_sendmsg kernel
-	 * call handler to send responses, as measured with get_cycles().
-	 */
-	__u64 reply_cycles;
-
-	/**
-	 * @reply_calls: total number of invocations of homa_semdmsg
-	 * for responses.
-	 */
-	__u64 reply_calls;
-
-	/**
-	 * @abort_cycles: total time spent executing the homa_ioc_abort
-	 * kernel call handler, as measured with get_cycles().
-	 */
-	__u64 abort_cycles;
-
-	/**
-	 * @abort_calls: total number of invocations of the homa_ioc_abort
-	 * kernel call.
-	 */
-	__u64 abort_calls;
-
-	/**
-	 * @so_set_buf_cycles: total time spent executing the homa_ioc_set_buf
-	 * kernel call handler, as measured with get_cycles().
-	 */
-	__u64 so_set_buf_cycles;
-
-	/**
-	 * @so_set_buf_calls: total number of invocations of the homa_ioc_set_buf
-	 * kernel call.
-	 */
-	__u64 so_set_buf_calls;
-
-	/**
-	 * @grantable_lock_cycles: total time spent with homa->grantable_lock
-	 * locked.
-	 */
-	__u64 grantable_lock_cycles;
-
-	/**
-	 * @timer_cycles: total time spent in homa_timer, as measured with
-	 * get_cycles().
-	 */
-	__u64 timer_cycles;
-
-	/**
-	 * @timer_reap_cycles: total time spent by homa_timer to reap dead
-	 * RPCs, as measured with get_cycles(). This time is included in
-	 * @timer_cycles.
-	 */
-	__u64 timer_reap_cycles;
-
-	/**
-	 * @data_pkt_reap_cycles: total time spent by homa_data_pkt to reap
-	 * dead RPCs, as measured with get_cycles().
-	 */
-	__u64 data_pkt_reap_cycles;
-
-	/**
-	 * @pacer_cycles: total time spent executing in homa_pacer_main
-	 * (not including blocked time), as measured with get_cycles().
-	 */
-	__u64 pacer_cycles;
-
-	/**
-	 * @pacer_lost_cycles: unnecessary delays in transmitting packets
-	 * (i.e. wasted output bandwidth) because the pacer was slow or got
-	 * descheduled.
-	 */
-	__u64 pacer_lost_cycles;
-
-	/**
-	 * @pacer_bytes: total number of bytes transmitted when
-	 * @homa->throttled_rpcs is nonempty.
-	 */
-	__u64 pacer_bytes;
-
-	/**
-	 * @pacer_skipped_rpcs: total number of times that the pacer had to
-	 * abort because it couldn't lock an RPC.
-	 */
-	__u64 pacer_skipped_rpcs;
-
-	/**
-	 * @pacer_needed_help: total number of times that homa_check_pacer
-	 * found that the pacer was running behind, so it actually invoked
-	 * homa_pacer_xmit.
-	 */
-	__u64 pacer_needed_help;
-
-	/**
-	 * @throttled_cycles: total amount of time that @homa->throttled_rpcs
-	 * is nonempty, as measured with get_cycles().
-	 */
-	__u64 throttled_cycles;
-
-	/**
-	 * @resent_packets: total number of data packets issued in response to
-	 * RESEND packets.
-	 */
-	__u64 resent_packets;
-
-	/**
-	 * @peer_hash_links: total # of link traversals in homa_peer_find.
-	 */
-	__u64 peer_hash_links;
-
-	/**
-	 * @peer_new_entries: total # of new entries created in Homa's
-	 * peer table (this value doesn't increment if the desired peer is
-	 * found in the entry in its hash chain).
-	 */
-	__u64 peer_new_entries;
-
-	/**
-	 * @peer_kmalloc errors: total number of times homa_peer_find
-	 * returned an error because it couldn't allocate memory for a new
-	 * peer.
-	 */
-	__u64 peer_kmalloc_errors;
-
-	/**
-	 * @peer_route errors: total number of times homa_peer_find
-	 * returned an error because it couldn't create a route to the peer.
-	 */
-	__u64 peer_route_errors;
-
-	/**
-	 * @control_xmit_errors errors: total number of times ip_queue_xmit
-	 * failed when transmitting a control packet.
-	 */
-	__u64 control_xmit_errors;
-
-	/**
-	 * @data_xmit_errors errors: total number of times ip_queue_xmit
-	 * failed when transmitting a data packet.
-	 */
-	__u64 data_xmit_errors;
-
-	/**
-	 * @unknown_rpc: total number of times an incoming packet was
-	 * discarded because it referred to a nonexistent RPC. Doesn't
-	 * count grant packets received by servers (since these are
-	 * fairly common).
-	 */
-	__u64 unknown_rpcs;
-
-	/**
-	 * @cant_create_server_rpc: total number of times a server discarded
-	 * an incoming packet because it couldn't create a homa_rpc object.
-	 */
-	__u64 server_cant_create_rpcs;
-
-	/**
-	 * @unknown_packet_type: total number of times a packet was discarded
-	 * because its type wasn't one of the supported values.
-	 */
-	__u64 unknown_packet_types;
-
-	/**
-	 * @short_packets: total number of times a packet was discarded
-	 * because it was too short to hold all the required information.
-	 */
-	__u64 short_packets;
-
-	/**
-	 * @packet_discards: total number of times a normal (non-retransmitted)
-	 * packet was discarded because all its data had already been received.
-	 */
-	__u64 packet_discards;
-
-	/**
-	 * @resent_discards: total number of times a retransmitted packet
-	 * was discarded because its data had already been received.
-	 */
-	__u64 resent_discards;
-
-	/**
-	 * @resent_packets_used: total number of times a resent packet was
-	 * actually incorporated into the message at the target (i.e. it
-	 * wasn't redundant).
-	 */
-	__u64 resent_packets_used;
-
-	/**
-	 * @rpc_timeouts: total number of times an RPC (either client or
-	 * server) was aborted because the peer was nonresponsive.
-	 */
-	__u64 rpc_timeouts;
-
-	/**
-	 * @server_rpc_discards: total number of times an RPC was aborted on
-	 * the server side because of a timeout.
-	 */
-	__u64 server_rpc_discards;
-
-	/**
-	 * @server_rpcs_unknown: total number of times an RPC was aborted on
-	 * the server side because it is no longer known to the client.
-	 */
-	__u64 server_rpcs_unknown;
-
-	/**
-	 * @client_lock_misses: total number of times that Homa had to wait
-	 * to acquire a client bucket lock.
-	 */
-	__u64 client_lock_misses;
-
-	/**
-	 * @client_lock_miss_cycles: total time spent waiting for client
-	 * bucket lock misses, measured by get_cycles().
-	 */
-	__u64 client_lock_miss_cycles;
-
-	/**
-	 * @server_lock_misses: total number of times that Homa had to wait
-	 * to acquire a server bucket lock.
-	 */
-	__u64 server_lock_misses;
-
-	/**
-	 * @server_lock_miss_cycles: total time spent waiting for server
-	 * bucket lock misses, measured by get_cycles().
-	 */
-	__u64 server_lock_miss_cycles;
-
-	/**
-	 * @socket_lock_miss_cycles: total time spent waiting for socket
-	 * lock misses, measured by get_cycles().
-	 */
-	__u64 socket_lock_miss_cycles;
-
-	/**
-	 * @socket_lock_misses: total number of times that Homa had to wait
-	 * to acquire a socket lock.
-	 */
-	__u64 socket_lock_misses;
-
-	/**
-	 * @throttle_lock_miss_cycles: total time spent waiting for throttle
-	 * lock misses, measured by get_cycles().
-	 */
-	__u64 throttle_lock_miss_cycles;
-
-	/**
-	 * @throttle_lock_misses: total number of times that Homa had to wait
-	 * to acquire the throttle lock.
-	 */
-	__u64 throttle_lock_misses;
-
-	/**
-	 * @peer_acklock_miss_cycles: total time spent waiting for peer
-	 * lock misses, measured by get_cycles().
-	 */
-	__u64 peer_ack_lock_miss_cycles;
-
-	/**
-	 * @peer_ack_lock_misses: total number of times that Homa had to wait
-	 * to acquire the lock used for managing acks for a peer.
-	 */
-	__u64 peer_ack_lock_misses;
-
-	/**
-	 * @grantable_lock_miss_cycles: total time spent waiting for grantable
-	 * lock misses, measured by get_cycles().
-	 */
-	__u64 grantable_lock_miss_cycles;
-
-	/**
-	 * @grantable_lock_misses: total number of times that Homa had to wait
-	 * to acquire the grantable lock.
-	 */
-	__u64 grantable_lock_misses;
-
-	/**
-	 * @grantable_rpcs_integral: cumulative sum of time_delta*grantable,
-	 * where time_delta is a get_cycles time and grantable is the
-	 * value of homa->num_grantable_rpcs over that time period.
-	 */
-	__u64 grantable_rpcs_integral;
-
-	/**
-	 * @grant_recalc_calls: cumulative number of times homa_grant_recalc
-	 * has been invoked.
-	 */
-	__u64 grant_recalc_calls;
-
-	/**
-	 * @grant_recalc_cycles: total time spent in homa_grant_recalc,
-	 * in get_cycles() units.
-	 */
-	__u64 grant_recalc_cycles;
-
-	/**
-	 * @grant_recalc_loops: cumulative number of times homa_grant_recalc
-	 * has looped back to recalculate again.
-	 */
-	__u64 grant_recalc_loops;
-
-	/**
-	 * @grant_recalc_skips: cumulative number of times that
-	 * homa_grant_recalc skipped its work because in other thread
-	 * already did it.
-	 */
-	__u64 grant_recalc_skips;
-
-	/**
-	 * @grant_priority_bumps: cumulative number of times the grant priority
-	 * of an RPC has increased above its next-higher-priority neighbor.
-	 */
-	__u64 grant_priority_bumps;
-
-	/**
-	 * @fifo_grants: total number of times that grants were sent to
-	 * the oldest message.
-	 */
-	__u64 fifo_grants;
-
-	/**
-	 * @fifo_grants_no_incoming: total number of times that, when a
-	 * FIFO grant was issued, the message had no outstanding grants
-	 * (everything granted had been received).
-	 */
-	__u64 fifo_grants_no_incoming;
-
-	/**
-	 * @disabled_reaps: total number of times that the reaper couldn't
-	 * run at all because it was disabled.
-	 */
-	__u64 disabled_reaps;
-
-	/**
-	 * @disabled_rpc_reaps: total number of times that the reaper skipped
-	 * an RPC because reaping was disabled for that particular RPC
-	 */
-	__u64 disabled_rpc_reaps;
-
-	/**
-	 * @reaper_runs: total number of times that the reaper was invoked
-	 * and was not disabled.
-	 */
-	__u64 reaper_calls;
-
-	/**
-	 * @reaper_dead_skbs: incremented by hsk->dead_skbs each time that
-	 * reaper_calls is incremented.
-	 */
-	__u64 reaper_dead_skbs;
-
-	/**
-	 * @forced_reaps: total number of times that homa_wait_for_message
-	 * invoked the reaper because dead_skbs was too high.
-	 */
-	__u64 forced_reaps;
-
-	/**
-	 * @throttle_list_adds: total number of calls to homa_add_to_throttled.
-	 */
-	__u64 throttle_list_adds;
-
-	/**
-	 * @throttle_list_checks: number of list elements examined in
-	 * calls to homa_add_to_throttled.
-	 */
-	__u64 throttle_list_checks;
-
-	/**
-	 * @unacked_overflows: total number of times that homa_peer_add_ack
-	 * found insufficient space for the new id and hence had to send an
-	 * ACK message.
-	 */
-	__u64 ack_overflows;
-
-	/**
-	 * @ignored_need_acks: total number of times that a NEED_ACK packet
-	 * was ignored because the RPC's result hadn't been fully received.
-	 */
-	__u64 ignored_need_acks;
-
-	/**
-	 * @bpage_resuses: total number of times that, when an owned page
-	 * reached the end, it could be reused because all existing
-	 * allocations had been released.
-	 */
-	__u64 bpage_reuses;
-
-	/**
-	 * @buffer_alloc_failures: total number of times that
-	 * homa_pool_allocate was unable to allocate buffer space for
-	 * an incoming message.
-	 */
-	__u64 buffer_alloc_failures;
-
-	/**
-	 * @linux_pkt_alloc_bytes: total bytes allocated in new packet buffers
-	 * by the NIC driver because of packet cache underflows.
-	 */
-	__u64 linux_pkt_alloc_bytes;
-
-	/**
-	 * @dropped_data_no_bufs: total bytes of incoming data dropped because
-	 * there was no application buffer space available.
-	 */
-	__u64 dropped_data_no_bufs;
-
-	/**
-	 * @gen3_handoffs: total number of handoffs from GRO to SoftIRQ made
-	 * by Gen3 load balancer.
-	 */
-	__u64 gen3_handoffs;
-
-	/**
-	 * @gen3_alt_handoffs: total number of GRO->SoftIRQ handoffs that
-	 * didn't choose the primary SoftIRQ core because it was busy with
-	 * app threads.
-	 */
-	__u64 gen3_alt_handoffs;
-
-	/**
-	 * @gro_grant_bypasses: total number of GRANT packets passed directly
-	 * to homa_softirq by homa_gro_receive, bypassing the normal SoftIRQ
-	 * mechanism (triggered by HOMA_GRO_FAST_GRANTS).
-	 */
-	__u64 gro_grant_bypasses;
-
-	/**
-	 * @gro_data_bypasses: total number of DATA packets passed directly
-	 * to homa_softirq by homa_gro_receive, bypassing the normal SoftIRQ
-	 * mechanism (triggered by HOMA_GRO_SHORT_BYPASS).
-	 */
-	__u64 gro_data_bypasses;
-
-	/** @temp: For temporary use during testing. */
-#define NUM_TEMP_METRICS 10
-	__u64 temp[NUM_TEMP_METRICS];
-};
 
 /**
  * struct homa_numa - Homa allocates one of these structures for each
@@ -2654,9 +2012,6 @@ struct homa_core {
 	 * HOMA_SKB_PAGE_SIZE in length.
 	 */
 	struct page *stashed_pages[HOMA_MAX_STASHED(HOMA_MAX_MESSAGE_LENGTH)];
-
-	/** @metrics: performance statistics for this core. */
-	struct homa_metrics metrics;
 };
 
 /**
@@ -2695,9 +2050,6 @@ struct homa_skb_info {
 	int offset;
 };
 
-#define INC_METRIC(metric, count) \
-		(homa_cores[raw_smp_processor_id()]->metrics.metric) += (count)
-
 /**
  * homa_get_skb_info() - Return the address of Homa's private information
  * for an sk_buff.
@@ -3146,7 +2498,6 @@ extern void     homa_ack_pkt(struct sk_buff *skb, struct homa_sock *hsk,
 		    struct homa_rpc *rpc);
 extern void     homa_add_packet(struct homa_rpc *rpc, struct sk_buff *skb);
 extern void     homa_add_to_throttled(struct homa_rpc *rpc);
-extern void     homa_append_metric(struct homa *homa, const char *format, ...);
 extern int      homa_backlog_rcv(struct sock *sk, struct sk_buff *skb);
 extern int      homa_bind(struct socket *sk, struct sockaddr *addr,
 		    int addr_len);
@@ -3232,12 +2583,6 @@ extern int      homa_message_in_init(struct homa_rpc *rpc, int length,
 extern int      homa_message_out_fill(struct homa_rpc *rpc,
 		    struct iov_iter *iter, int xmit);
 extern void     homa_message_out_init(struct homa_rpc *rpc, int length);
-extern loff_t   homa_metrics_lseek(struct file *file, loff_t offset,
-		    int whence);
-extern int      homa_metrics_open(struct inode *inode, struct file *file);
-extern ssize_t  homa_metrics_read(struct file *file, char __user *buffer,
-		    size_t length, loff_t *offset);
-extern int      homa_metrics_release(struct inode *inode, struct file *file);
 extern void     homa_need_ack_pkt(struct sk_buff *skb, struct homa_sock *hsk,
 		    struct homa_rpc *rpc);
 extern struct sk_buff
@@ -3282,13 +2627,10 @@ extern void     homa_pool_release_buffers(struct homa_pool *pool,
 		    int num_buffers, __u32 *buffers);
 extern char    *homa_print_ipv4_addr(__be32 addr);
 extern char    *homa_print_ipv6_addr(const struct in6_addr *addr);
-extern char    *homa_print_metrics(struct homa *homa);
 extern char    *homa_print_packet(struct sk_buff *skb, char *buffer, int buf_len);
 extern char    *homa_print_packet_short(struct sk_buff *skb, char *buffer,
 		    int buf_len);
 extern void     homa_prios_changed(struct homa *homa);
-extern int      homa_proc_read_metrics(char *buffer, char **start, off_t offset,
-		    int count, int *eof, void *data);
 extern int      homa_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		    int flags, int *addr_len);
 extern int      homa_register_interests(struct homa_interest *interest,
diff --git a/homa_incoming.c b/homa_incoming.c
index b774db2..3221b61 100644
--- a/homa_incoming.c
+++ b/homa_incoming.c
@@ -5,7 +5,6 @@
  */
 
 #include "homa_impl.h"
-#include "homa_wire.h"
 
 /**
  * homa_message_in_init() - Constructor for homa_message_in.
diff --git a/homa_metrics.c b/homa_metrics.c
new file mode 100644
index 0000000..42f3b7b
--- /dev/null
+++ b/homa_metrics.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: BSD-2-Clause
+
+/* This file contains various functions for managing Homa's performance
+ * counters.
+ */
+
+#include "homa_impl.h"
+
+DEFINE_PER_CPU(struct homa_metrics, homa_metrics);
+
+/* For functions that are invoked directly by Linux, so they can't be
+ * passed a struct homa arguments.
+ */
+extern struct homa *homa;
+
+/**
+ * homa_metric_append() - Formats a new metric and appends it to homa->metrics.
+ * @homa:        The new data will appended to the @metrics field of
+ *               this structure.
+ * @format:      Standard printf-style format string describing the
+ *               new metric. Arguments after this provide the usual
+ *               values expected for printf-like functions.
+ */
+void homa_metric_append(struct homa *homa, const char *format, ...)
+{
+	char *new_buffer;
+	size_t new_chars;
+	va_list ap;
+
+	if (!homa->metrics) {
+#ifdef __UNIT_TEST__
+		homa->metrics_capacity =  30;
+#else
+		homa->metrics_capacity =  4096;
+#endif
+		homa->metrics =  kmalloc(homa->metrics_capacity, GFP_KERNEL);
+		if (!homa->metrics) {
+			pr_warn("%s couldn't allocate memory\n", __func__);
+			return;
+		}
+		homa->metrics_length = 0;
+	}
+
+	/* May have to execute this loop multiple times if we run out
+	 * of space in homa->metrics; each iteration expands the storage,
+	 * until eventually it is large enough.
+	 */
+	while (true) {
+		va_start(ap, format);
+		new_chars = vsnprintf(homa->metrics + homa->metrics_length,
+				homa->metrics_capacity - homa->metrics_length,
+				format, ap);
+		va_end(ap);
+		if ((homa->metrics_length + new_chars) < homa->metrics_capacity)
+			break;
+
+		/* Not enough room; expand buffer capacity. */
+		homa->metrics_capacity *= 2;
+		new_buffer = kmalloc(homa->metrics_capacity, GFP_KERNEL);
+		if (!new_buffer) {
+			pr_warn("%s couldn't allocate memory\n", __func__);
+			return;
+		}
+		memcpy(new_buffer, homa->metrics, homa->metrics_length);
+		kfree(homa->metrics);
+		homa->metrics = new_buffer;
+	}
+	homa->metrics_length += new_chars;
+}
+
+/**
+ * homa_metrics_print() - Sample all of the Homa performance metrics and
+ * generate a human-readable string describing all of them.
+ * @homa:    Overall data about the Homa protocol implementation;
+ *           the formatted string will be stored in homa->metrics.
+ *
+ * Return:   The formatted string.
+ */
+char *homa_metrics_print(struct homa *homa)
+{
+	int core, i, lower = 0;
+
+	homa->metrics_length = 0;
+#define M(...) homa_metric_append(homa, __VA_ARGS__)
+	M("rdtsc_cycles         %20llu  RDTSC cycle counter when metrics were gathered\n",
+			get_cycles());
+	M("cpu_khz                   %15llu  Clock rate for RDTSC counter, in khz\n",
+			cpu_khz);
+	for (core = 0; core < nr_cpu_ids; core++) {
+		struct homa_metrics *m = &per_cpu(homa_metrics, core);
+		__s64 delta;
+
+		M("core                 %15d  Core id for following metrics\n",
+				core);
+		for (i = 0; i < HOMA_NUM_SMALL_COUNTS; i++) {
+			M("msg_bytes_%-9d       %15llu  Bytes in incoming messages containing %d-%d bytes\n",
+				(i+1)*64, m->small_msg_bytes[i], lower,
+				(i+1)*64);
+			lower = (i+1)*64 + 1;
+		}
+		for (i = (HOMA_NUM_SMALL_COUNTS*64)/1024;
+				i < HOMA_NUM_MEDIUM_COUNTS; i++) {
+			M("msg_bytes_%-9d       %15llu  Bytes in incoming messages containing %d-%d bytes\n",
+				(i+1)*1024, m->medium_msg_bytes[i], lower,
+				(i+1)*1024);
+			lower = (i+1)*1024 + 1;
+		}
+		M("large_msg_count           %15llu  # of incoming messages >= %d bytes\n",
+				m->large_msg_count, lower);
+		M("large_msg_bytes           %15llu  Bytes in incoming messages >= %d bytes\n",
+				m->large_msg_bytes, lower);
+		M("sent_msg_bytes            %15llu  otal bytes in all outgoing messages\n",
+				m->sent_msg_bytes);
+		for (i = DATA; i < BOGUS;  i++) {
+			char *symbol = homa_symbol_for_type(i);
+
+			M("packets_sent_%-7s      %15llu  %s packets sent\n",
+					symbol, m->packets_sent[i-DATA],
+					symbol);
+		}
+		for (i = DATA; i < BOGUS;  i++) {
+			char *symbol = homa_symbol_for_type(i);
+
+			M("packets_rcvd_%-7s      %15llu  %s packets received\n",
+					symbol, m->packets_received[i-DATA],
+					symbol);
+		}
+		for (i = 0; i < HOMA_MAX_PRIORITIES; i++) {
+			M("priority%d_bytes        %15llu  Bytes sent at priority %d (including headers)\n",
+					i, m->priority_bytes[i], i);
+		}
+		for (i = 0; i < HOMA_MAX_PRIORITIES; i++) {
+			M("priority%d_packets      %15llu  Packets sent at priority %d\n",
+					   i, m->priority_packets[i], i);
+		}
+		M("skb_allocs                %15llu  sk_buffs allocated\n",
+				m->skb_allocs);
+		M("skb_alloc_cycles          %15llu  Time spent allocating sk_buffs\n",
+				m->skb_alloc_cycles);
+		M("skb_frees                 %15llu  Data sk_buffs freed in normal paths\n",
+				m->skb_frees);
+		M("skb_free_cycles           %15llu  Time spent freeing data sk_buffs\n",
+				m->skb_free_cycles);
+		M("skb_page_allocs           %15llu  Pages allocated for sk_buff frags\n",
+				m->skb_page_allocs);
+		M("skb_page_alloc_cycles     %15llu  Time spent allocating pages for sk_buff frags\n",
+				m->skb_page_alloc_cycles);
+		M("requests_received         %15llu  Incoming request messages\n",
+				m->requests_received);
+		M("requests_queued           %15llu  Requests for which no thread was waiting\n",
+				m->requests_queued);
+		M("responses_received        %15llu  Incoming response messages\n",
+				m->responses_received);
+		M("responses_queued          %15llu  Responses for which no thread was waiting\n",
+				m->responses_queued);
+		M("fast_wakeups              %15llu  Messages received while polling\n",
+				m->fast_wakeups);
+		M("slow_wakeups              %15llu  Messages received after thread went to sleep\n",
+				m->slow_wakeups);
+		M("handoffs_thread_waiting   %15llu  RPC handoffs to waiting threads (vs. queue)\n",
+				m->handoffs_thread_waiting);
+		M("handoffs_alt_thread       %15llu  RPC handoffs not to first on list (avoid busy core)\n",
+				m->handoffs_alt_thread);
+		M("poll_cycles               %15llu  Time spent polling for incoming messages\n",
+				m->poll_cycles);
+		M("softirq_calls             %15llu  Calls to homa_softirq (i.e. # GRO pkts received)\n",
+				m->softirq_calls);
+		M("softirq_cycles            %15llu  Time spent in homa_softirq during SoftIRQ\n",
+				m->softirq_cycles);
+		M("bypass_softirq_cycles     %15llu  Time spent in homa_softirq during bypass from GRO\n",
+				m->bypass_softirq_cycles);
+		M("linux_softirq_cycles      %15llu  Time spent in all Linux SoftIRQ\n",
+				m->linux_softirq_cycles);
+		M("napi_cycles               %15llu  Time spent in NAPI-level packet handling\n",
+				m->napi_cycles);
+		M("send_cycles               %15llu  Time spent in homa_sendmsg for requests\n",
+				m->send_cycles);
+		M("send_calls                %15llu  Total invocations of homa_sendmsg for equests\n",
+				m->send_calls);
+		// It is possible for us to get here at a time when a
+		// thread has been blocked for a long time and has
+		// recorded blocked_cycles, but hasn't finished the
+		// system call so recv_cycles hasn't been incremented
+		// yet. If that happens, just record 0 to prevent
+		// underflow errors.
+		delta = m->recv_cycles - m->blocked_cycles;
+		if (delta < 0)
+			delta = 0;
+		M("recv_cycles               %15llu  Unblocked time spent in recvmsg kernel call\n",
+				delta);
+		M("recv_calls                %15llu  Total invocations of recvmsg kernel call\n",
+				m->recv_calls);
+		M("blocked_cycles            %15llu  Time spent blocked in homa_recvmsg\n",
+				m->blocked_cycles);
+		M("reply_cycles              %15llu  Time spent in homa_sendmsg for responses\n",
+				m->reply_cycles);
+		M("reply_calls               %15llu  Total invocations of homa_sendmsg for responses\n",
+				m->reply_calls);
+		M("abort_cycles              %15llu  Time spent in homa_ioc_abort kernel call\n",
+				m->reply_cycles);
+		M("abort_calls               %15llu  Total invocations of abort kernel call\n",
+				m->reply_calls);
+		M("so_set_buf_cycles         %15llu  Time spent in setsockopt SO_HOMA_SET_BUF\n",
+				m->so_set_buf_cycles);
+		M("so_set_buf_calls          %15llu  Total invocations of setsockopt SO_HOMA_SET_BUF\n",
+				m->so_set_buf_calls);
+		M("grantable_lock_cycles     %15llu  Time spent with homa->grantable_lock locked\n",
+				m->grantable_lock_cycles);
+		M("timer_cycles              %15llu  Time spent in homa_timer\n",
+				m->timer_cycles);
+		M("timer_reap_cycles         %15llu  Time in homa_timer spent reaping RPCs\n",
+				m->timer_reap_cycles);
+		M("data_pkt_reap_cycles      %15llu  Time in homa_data_pkt spent reaping RPCs\n",
+				m->data_pkt_reap_cycles);
+		M("pacer_cycles              %15llu  Time spent in homa_pacer_main\n",
+				m->pacer_cycles);
+		M("homa_cycles               %15llu  Total time in all Homa-related functions\n",
+				m->softirq_cycles + m->napi_cycles +
+				m->send_cycles + m->recv_cycles +
+				m->reply_cycles - m->blocked_cycles +
+				m->timer_cycles + m->pacer_cycles);
+		M("pacer_lost_cycles         %15llu  Lost transmission time because pacer was slow\n",
+				m->pacer_lost_cycles);
+		M("pacer_bytes               %15llu  Bytes transmitted when the pacer was active\n",
+				m->pacer_bytes);
+		M("pacer_skipped_rpcs        %15llu  Pacer aborts because of locked RPCs\n",
+				m->pacer_skipped_rpcs);
+		M("pacer_needed_help         %15llu  homa_pacer_xmit invocations from homa_check_pacer\n",
+				m->pacer_needed_help);
+		M("throttled_cycles          %15llu  Time when the throttled queue was nonempty\n",
+				m->throttled_cycles);
+		M("resent_packets            %15llu  DATA packets sent in response to RESENDs\n",
+				m->resent_packets);
+		M("peer_hash_links           %15llu  Hash chain link traversals in peer table\n",
+				m->peer_hash_links);
+		M("peer_new_entries          %15llu  New entries created in peer table\n",
+				m->peer_new_entries);
+		M("peer_kmalloc_errors       %15llu  kmalloc failures creating peer table entries\n",
+				m->peer_kmalloc_errors);
+		M("peer_route_errors         %15llu  Routing failures creating peer table entries\n",
+				m->peer_route_errors);
+		M("control_xmit_errors       %15llu  Errors sending control packets\n",
+				m->control_xmit_errors);
+		M("data_xmit_errors          %15llu  Errors sending data packets\n",
+				m->data_xmit_errors);
+		M("unknown_rpcs              %15llu  Non-grant packets discarded because RPC unknown\n",
+				m->unknown_rpcs);
+		M("server_cant_create_rpcs   %15llu  Packets discarded because server couldn't create RPC\n",
+				m->server_cant_create_rpcs);
+		M("unknown_packet_types      %15llu  Packets discarded because of unsupported type\n",
+				m->unknown_packet_types);
+		M("short_packets             %15llu  Packets discarded because too short\n",
+				m->short_packets);
+		M("packet_discards           %15llu  Non-resent packets discarded because data already received\n",
+				m->packet_discards);
+		M("resent_discards           %15llu  Resent packets discarded because data already received\n",
+				m->resent_discards);
+		M("resent_packets_used       %15llu  Retransmitted packets that were actually used\n",
+				m->resent_packets_used);
+		M("rpc_timeouts             %15llu   RPCs aborted because peer was nonresponsive\n",
+				m->rpc_timeouts);
+		M("server_rpc_discards       %15llu  RPCs discarded by server because of errors\n",
+				m->server_rpc_discards);
+		M("server_rpcs_unknown       %15llu  RPCs aborted by server because unknown to client\n",
+				m->server_rpcs_unknown);
+		M("client_lock_misses        %15llu  Bucket lock misses for client RPCs\n",
+				m->client_lock_misses);
+		M("client_lock_miss_cycles   %15llu  Time lost waiting for client bucket locks\n",
+				m->client_lock_miss_cycles);
+		M("server_lock_misses        %15llu  Bucket lock misses for server RPCs\n",
+				m->server_lock_misses);
+		M("server_lock_miss_cycles   %15llu  Time lost waiting for server bucket locks\n",
+				m->server_lock_miss_cycles);
+		M("socket_lock_misses        %15llu  Socket lock misses\n",
+				m->socket_lock_misses);
+		M("socket_lock_miss_cycles   %15llu  Time lost waiting for socket locks\n",
+				m->socket_lock_miss_cycles);
+		M("throttle_lock_misses      %15llu  Throttle lock misses\n",
+				m->throttle_lock_misses);
+		M("throttle_lock_miss_cycles %15llu  Time lost waiting for throttle locks\n",
+				m->throttle_lock_miss_cycles);
+		M("peer_ack_lock_misses      %15llu  Misses on peer ack locks\n",
+				m->peer_ack_lock_misses);
+		M("peer_ack_lock_miss_cycles %15llu  Time lost waiting for peer ack locks\n",
+				m->peer_ack_lock_miss_cycles);
+		M("grantable_lock_misses     %15llu  Grantable lock misses\n",
+				m->grantable_lock_misses);
+		M("grantable_lock_miss_cycles%15llu  Time lost waiting for grantable lock\n",
+				m->grantable_lock_miss_cycles);
+		M("grantable_rpcs_integral   %15llu  Integral of homa->num_grantable_rpcs*dt\n",
+				m->grantable_rpcs_integral);
+		M("grant_recalc_calls        %15llu  Number of calls to homa_grant_recalc\n",
+				m->grant_recalc_calls);
+		M("grant_recalc_cycles       %15llu  Time spent in homa_grant_recalc\n",
+				m->grant_recalc_cycles);
+		M("grant_recalc_skips        %15llu  Number of times homa_grant_recalc skipped redundant work\n",
+				m->grant_recalc_skips);
+		M("grant_recalc_loops        %15llu  Number of times homa_grant_recalc looped back\n",
+				m->grant_recalc_loops);
+		M("grant_priority_bumps      %15llu  Number of times an RPC moved up in the grant priority order\n",
+				m->grant_priority_bumps);
+		M("fifo_grants               %15llu  Grants issued using FIFO priority\n",
+				m->fifo_grants);
+		M("fifo_grants_no_incoming   %15llu  FIFO grants to messages with no outstanding grants\n",
+				m->fifo_grants_no_incoming);
+		M("disabled_reaps            %15llu  Reaper invocations that were disabled\n",
+				m->disabled_reaps);
+		M("disabled_rpc_reaps        %15llu  Disabled RPCs skipped by reaper\n",
+				m->disabled_rpc_reaps);
+		M("reaper_calls              %15llu  Reaper invocations that were not disabled\n",
+				m->reaper_calls);
+		M("reaper_dead_skbs          %15llu  Sum of hsk->dead_skbs across all reaper alls\n",
+				m->reaper_dead_skbs);
+		M("forced_reaps              %15llu  Reaps forced by accumulation of dead RPCs\n",
+				m->forced_reaps);
+		M("throttle_list_adds        %15llu  Calls to homa_add_to_throttled\n",
+				m->throttle_list_adds);
+		M("throttle_list_checks      %15llu  List elements checked in homa_add_to_throttled\n",
+				m->throttle_list_checks);
+		M("ack_overflows             %15llu  Explicit ACKs sent because peer->acks was full\n",
+				m->ack_overflows);
+		M("ignored_need_acks         %15llu  NEED_ACKs ignored because RPC result not yet received\n",
+				m->ignored_need_acks);
+		M("bpage_reuses              %15llu  Buffer page could be reused because ref count was zero\n",
+				m->bpage_reuses);
+		M("buffer_alloc_failures     %15llu  homa_pool_allocate didn't find enough buffer space for an RPC\n",
+				m->buffer_alloc_failures);
+		M("linux_pkt_alloc_bytes     %15llu  Bytes allocated in new packets by NIC driver due to cache overflows\n",
+				m->linux_pkt_alloc_bytes);
+		M("dropped_data_no_bufs      %15llu  Data bytes dropped because app buffers full\n",
+				m->dropped_data_no_bufs);
+		M("gen3_handoffs             %15llu  GRO->SoftIRQ handoffs made by Gen3 balancer\n",
+				m->gen3_handoffs);
+		M("gen3_alt_handoffs         %15llu  Gen3 handoffs to secondary core (primary was busy)\n",
+				m->gen3_alt_handoffs);
+		M("gro_grant_bypasses        %15llu  Grant packets passed directly to homa_softirq by homa_gro_receive\n",
+				m->gro_grant_bypasses);
+		M("gro_data_bypasses         %15llu  Data packets passed directly to homa_softirq by homa_gro_receive\n",
+				m->gro_data_bypasses);
+		for (i = 0; i < NUM_TEMP_METRICS;  i++)
+			M("temp%-2d                  %15llu  Temporary use in testing\n",
+					i, m->temp[i]);
+	}
+
+	return homa->metrics;
+}
+/**
+ * homa_metrics_open() - This function is invoked when /proc/net/homa_metrics is
+ * opened.
+ * @inode:    The inode corresponding to the file.
+ * @file:     Information about the open file.
+ *
+ * Return: always 0.
+ */
+int homa_metrics_open(struct inode *inode, struct file *file)
+{
+	/* Collect all of the metrics when the file is opened, and save
+	 * these for use by subsequent reads (don't want the metrics to
+	 * change between reads). If there are concurrent opens on the
+	 * file, only read the metrics once, during the first open, and
+	 * use this copy for subsequent opens, until the file has been
+	 * completely closed.
+	 */
+	spin_lock(&homa->metrics_lock);
+	if (homa->metrics_active_opens == 0)
+		homa_metrics_print(homa);
+	homa->metrics_active_opens++;
+	spin_unlock(&homa->metrics_lock);
+	return 0;
+}
+
+/**
+ * homa_metrics_read() - This function is invoked to handle read kernel calls on
+ * /proc/net/homa_metrics.
+ * @file:    Information about the file being read.
+ * @buffer:  Address in user space of the buffer in which data from the file
+ *           should be returned.
+ * @length:  Number of bytes available at @buffer.
+ * @offset:  Current read offset within the file.
+ *
+ * Return: the number of bytes returned at @buffer. 0 means the end of the
+ * file was reached, and a negative number indicates an error (-errno).
+ */
+ssize_t homa_metrics_read(struct file *file, char __user *buffer,
+		size_t length, loff_t *offset)
+{
+	size_t copied;
+
+	if (*offset >= homa->metrics_length)
+		return 0;
+	copied = homa->metrics_length - *offset;
+	if (copied > length)
+		copied = length;
+	if (copy_to_user(buffer, homa->metrics + *offset, copied))
+		return -EFAULT;
+	*offset += copied;
+	return copied;
+}
+
+/**
+ * homa_metrics_lseek() - This function is invoked to handle seeks on
+ * /proc/net/homa_metrics. Right now seeks are ignored: the file must be
+ * read sequentially.
+ * @file:    Information about the file being read.
+ * @offset:  Distance to seek, in bytes
+ * @whence:  Starting point from which to measure the distance to seek.
+ */
+loff_t homa_metrics_lseek(struct file *file, loff_t offset, int whence)
+{
+	return 0;
+}
+
+/**
+ * homa_metrics_release() - This function is invoked when the last reference to
+ * an open /proc/net/homa_metrics is closed.  It performs cleanup.
+ * @inode:    The inode corresponding to the file.
+ * @file:     Information about the open file.
+ *
+ * Return: always 0.
+ */
+int homa_metrics_release(struct inode *inode, struct file *file)
+{
+	spin_lock(&homa->metrics_lock);
+	homa->metrics_active_opens--;
+	spin_unlock(&homa->metrics_lock);
+	return 0;
+}
diff --git a/homa_metrics.h b/homa_metrics.h
new file mode 100644
index 0000000..a7ddcb0
--- /dev/null
+++ b/homa_metrics.h
@@ -0,0 +1,692 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+
+/* This file contains declarations related to Homa's performance metrics.  */
+
+#ifndef _HOMA_METRICS_H
+#define _HOMA_METRICS_H
+
+#include <linux/percpu-defs.h>
+#include <linux/types.h>
+
+#include "homa_wire.h"
+
+/**
+ * struct homa_metrics - various performance counters kept by Homa.
+ *
+ * There is one of these structures for each core, so counters can
+ * be updated without worrying about synchronization or extra cache
+ * misses.
+ *
+ * All counters are free-running: they never reset.
+ */
+#define HOMA_NUM_SMALL_COUNTS 64
+#define HOMA_NUM_MEDIUM_COUNTS 128
+struct homa_metrics {
+	/**
+	 * @small_msg_bytes: entry i holds the total number of bytes
+	 * received in messages whose length is between 64*i and 64*i + 63,
+	 * inclusive.
+	 */
+	__u64 small_msg_bytes[HOMA_NUM_SMALL_COUNTS];
+
+	/**
+	 * @medium_msg_bytes: entry i holds the total number of bytes
+	 * received in messages whose length is between 1024*i and
+	 * 1024*i + 1023, inclusive. The first four entries are always 0
+	 * (small_msg_counts covers this range).
+	 */
+	__u64 medium_msg_bytes[HOMA_NUM_MEDIUM_COUNTS];
+
+	/**
+	 * @large_msg_count: the total number of messages received whose
+	 * length is too large to appear in medium_msg_bytes.
+	 */
+	__u64 large_msg_count;
+
+	/**
+	 * @large_msg_bytes: the total number of bytes received in
+	 * messages too large to be counted by medium_msg_bytes.
+	 */
+	__u64 large_msg_bytes;
+
+	/**
+	 * @sent_msg_bytes: The total number of bytes in outbound
+	 * messages.
+	 */
+	__u64 sent_msg_bytes;
+
+	/**
+	 * @packets_sent: total number of packets sent for each packet type
+	 * (entry 0 corresponds to DATA, and so on).
+	 */
+	__u64 packets_sent[BOGUS-DATA];
+
+	/**
+	 * @packets_received: total number of packets received for each
+	 * packet type (entry 0 corresponds to DATA, and so on).
+	 */
+	__u64 packets_received[BOGUS-DATA];
+
+	/** @priority_bytes: total bytes sent at each priority level. */
+	__u64 priority_bytes[HOMA_MAX_PRIORITIES];
+
+	/** @priority_packets: total packets sent at each priority level. */
+	__u64 priority_packets[HOMA_MAX_PRIORITIES];
+
+	/**
+	 * @skb_allocs: total number of calls to homa_skb_new_tx.
+	 */
+	__u64 skb_allocs;
+
+	/**
+	 * @skb_alloc_cycles: total time spent in homa_skb_new_tx, as
+	 * measured with get_cycles().
+	 */
+	__u64 skb_alloc_cycles;
+
+	/**
+	 * @skb_frees: total number of sk_buffs for data packets that have
+	 * been freed (counts normal paths only).
+	 */
+	__u64 skb_frees;
+
+	/**
+	 *  @skb_free_cycles: total time spent freeing sk_buffs, as
+	 * measured with get_cycles().
+	 */
+	__u64 skb_free_cycles;
+
+	/**
+	 * @skb_page_allocs: total number of calls to homa_skb_page_alloc.
+	 */
+	__u64 skb_page_allocs;
+
+	/**
+	 * @skb_page_alloc_cycles: total time spent in homa_skb_page_alloc, as
+	 * measured with get_cycles().
+	 */
+	__u64 skb_page_alloc_cycles;
+
+	/**
+	 * @requests_received: total number of request messages received.
+	 */
+	__u64 requests_received;
+
+	/**
+	 * @requests_queued: total number of requests that were added to
+	 * @homa->ready_requests (no thread was waiting).
+	 */
+	__u64 requests_queued;
+
+	/**
+	 * @responses_received: total number of response messages received.
+	 */
+	__u64 responses_received;
+
+	/**
+	 * @responses_queued: total number of responses that were added to
+	 * @homa->ready_responses (no thread was waiting).
+	 */
+	__u64 responses_queued;
+
+	/**
+	 * @fast_wakeups: total number of times that a message arrived for
+	 * a receiving thread that was polling in homa_wait_for_message.
+	 */
+	__u64 fast_wakeups;
+
+	/**
+	 * @slow_wakeups: total number of times that a receiving thread
+	 * had to be put to sleep (no message arrived while it was polling).
+	 */
+	__u64 slow_wakeups;
+
+	/**
+	 * @handoffs_thread_waiting: total number of times that an RPC
+	 * was handed off to a waiting thread (vs. being queued).
+	 */
+	__u64 handoffs_thread_waiting;
+
+	/**
+	 * @handoffs_alt_thread: total number of times that a thread other
+	 * than the first on the list was chosen for a handoff (because the
+	 * first thread was on a busy core).
+	 */
+	__u64 handoffs_alt_thread;
+
+	/**
+	 * @poll_cycles: total time spent in the polling loop in
+	 * homa_wait_for_message, as measured with get_cycles().
+	 */
+	__u64 poll_cycles;
+
+	/**
+	 * @softirq_calls: total number of calls to homa_softirq (i.e.,
+	 * total number of GRO packets processed, each of which could contain
+	 * multiple Homa packets.
+	 */
+	__u64 softirq_calls;
+
+	/**
+	 * @softirq_cycles: total time spent executing homa_softirq when
+	 * invoked under Linux's SoftIRQ handler, as measured with get_cycles().
+	 */
+	__u64 softirq_cycles;
+
+	/**
+	 * @bypass_softirq_cycles: total time spent executing homa_softirq when
+	 * invoked during GRO, bypassing the SoftIRQ mechanism.
+	 */
+	__u64 bypass_softirq_cycles;
+
+	/**
+	 * @linux_softirq_cycles: total time spent executing all softirq
+	 * activities, as measured by the linux softirq module, in get_cycles()
+	 * units. Only available with modified Linux kernels.
+	 */
+	__u64 linux_softirq_cycles;
+
+	/**
+	 * @napi_cycles: total time spent executing all NAPI activities,
+	 * as measured by the linux softirq module, in get_cycles() units.
+	 * Only available with modified Linux kernels.
+	 */
+	__u64 napi_cycles;
+
+	/**
+	 * @send_cycles: total time spent executing the homa_sendmsg kernel
+	 * call handler to send requests, as measured with get_cycles().
+	 */
+	__u64 send_cycles;
+
+	/** @send_calls: total number of invocations of homa_semdmsg
+	 * for requests.
+	 */
+	__u64 send_calls;
+
+	/**
+	 * @recv_cycles: total time spent executing homa_recvmsg (including
+	 * time when the thread is blocked), as measured with get_cycles().
+	 */
+	__u64 recv_cycles;
+
+	/** @recv_calls: total number of invocations of homa_recvmsg. */
+	__u64 recv_calls;
+
+	/**
+	 * @blocked_cycles: total time threads spend in blocked state
+	 * while executing the homa_recvmsg kernel call handler.
+	 */
+	__u64 blocked_cycles;
+
+	/**
+	 * @reply_cycles: total time spent executing the homa_sendmsg kernel
+	 * call handler to send responses, as measured with get_cycles().
+	 */
+	__u64 reply_cycles;
+
+	/**
+	 * @reply_calls: total number of invocations of homa_semdmsg
+	 * for responses.
+	 */
+	__u64 reply_calls;
+
+	/**
+	 * @abort_cycles: total time spent executing the homa_ioc_abort
+	 * kernel call handler, as measured with get_cycles().
+	 */
+	__u64 abort_cycles;
+
+	/**
+	 * @abort_calls: total number of invocations of the homa_ioc_abort
+	 * kernel call.
+	 */
+	__u64 abort_calls;
+
+	/**
+	 * @so_set_buf_cycles: total time spent executing the homa_ioc_set_buf
+	 * kernel call handler, as measured with get_cycles().
+	 */
+	__u64 so_set_buf_cycles;
+
+	/**
+	 * @so_set_buf_calls: total number of invocations of the homa_ioc_set_buf
+	 * kernel call.
+	 */
+	__u64 so_set_buf_calls;
+
+	/**
+	 * @grantable_lock_cycles: total time spent with homa->grantable_lock
+	 * locked.
+	 */
+	__u64 grantable_lock_cycles;
+
+	/**
+	 * @timer_cycles: total time spent in homa_timer, as measured with
+	 * get_cycles().
+	 */
+	__u64 timer_cycles;
+
+	/**
+	 * @timer_reap_cycles: total time spent by homa_timer to reap dead
+	 * RPCs, as measured with get_cycles(). This time is included in
+	 * @timer_cycles.
+	 */
+	__u64 timer_reap_cycles;
+
+	/**
+	 * @data_pkt_reap_cycles: total time spent by homa_data_pkt to reap
+	 * dead RPCs, as measured with get_cycles().
+	 */
+	__u64 data_pkt_reap_cycles;
+
+	/**
+	 * @pacer_cycles: total time spent executing in homa_pacer_main
+	 * (not including blocked time), as measured with get_cycles().
+	 */
+	__u64 pacer_cycles;
+
+	/**
+	 * @pacer_lost_cycles: unnecessary delays in transmitting packets
+	 * (i.e. wasted output bandwidth) because the pacer was slow or got
+	 * descheduled.
+	 */
+	__u64 pacer_lost_cycles;
+
+	/**
+	 * @pacer_bytes: total number of bytes transmitted when
+	 * @homa->throttled_rpcs is nonempty.
+	 */
+	__u64 pacer_bytes;
+
+	/**
+	 * @pacer_skipped_rpcs: total number of times that the pacer had to
+	 * abort because it couldn't lock an RPC.
+	 */
+	__u64 pacer_skipped_rpcs;
+
+	/**
+	 * @pacer_needed_help: total number of times that homa_check_pacer
+	 * found that the pacer was running behind, so it actually invoked
+	 * homa_pacer_xmit.
+	 */
+	__u64 pacer_needed_help;
+
+	/**
+	 * @throttled_cycles: total amount of time that @homa->throttled_rpcs
+	 * is nonempty, as measured with get_cycles().
+	 */
+	__u64 throttled_cycles;
+
+	/**
+	 * @resent_packets: total number of data packets issued in response to
+	 * RESEND packets.
+	 */
+	__u64 resent_packets;
+
+	/**
+	 * @peer_hash_links: total # of link traversals in homa_peer_find.
+	 */
+	__u64 peer_hash_links;
+
+	/**
+	 * @peer_new_entries: total # of new entries created in Homa's
+	 * peer table (this value doesn't increment if the desired peer is
+	 * found in the entry in its hash chain).
+	 */
+	__u64 peer_new_entries;
+
+	/**
+	 * @peer_kmalloc errors: total number of times homa_peer_find
+	 * returned an error because it couldn't allocate memory for a new
+	 * peer.
+	 */
+	__u64 peer_kmalloc_errors;
+
+	/**
+	 * @peer_route errors: total number of times homa_peer_find
+	 * returned an error because it couldn't create a route to the peer.
+	 */
+	__u64 peer_route_errors;
+
+	/**
+	 * @control_xmit_errors errors: total number of times ip_queue_xmit
+	 * failed when transmitting a control packet.
+	 */
+	__u64 control_xmit_errors;
+
+	/**
+	 * @data_xmit_errors errors: total number of times ip_queue_xmit
+	 * failed when transmitting a data packet.
+	 */
+	__u64 data_xmit_errors;
+
+	/**
+	 * @unknown_rpc: total number of times an incoming packet was
+	 * discarded because it referred to a nonexistent RPC. Doesn't
+	 * count grant packets received by servers (since these are
+	 * fairly common).
+	 */
+	__u64 unknown_rpcs;
+
+	/**
+	 * @cant_create_server_rpc: total number of times a server discarded
+	 * an incoming packet because it couldn't create a homa_rpc object.
+	 */
+	__u64 server_cant_create_rpcs;
+
+	/**
+	 * @unknown_packet_type: total number of times a packet was discarded
+	 * because its type wasn't one of the supported values.
+	 */
+	__u64 unknown_packet_types;
+
+	/**
+	 * @short_packets: total number of times a packet was discarded
+	 * because it was too short to hold all the required information.
+	 */
+	__u64 short_packets;
+
+	/**
+	 * @packet_discards: total number of times a normal (non-retransmitted)
+	 * packet was discarded because all its data had already been received.
+	 */
+	__u64 packet_discards;
+
+	/**
+	 * @resent_discards: total number of times a retransmitted packet
+	 * was discarded because its data had already been received.
+	 */
+	__u64 resent_discards;
+
+	/**
+	 * @resent_packets_used: total number of times a resent packet was
+	 * actually incorporated into the message at the target (i.e. it
+	 * wasn't redundant).
+	 */
+	__u64 resent_packets_used;
+
+	/**
+	 * @rpc_timeouts: total number of times an RPC (either client or
+	 * server) was aborted because the peer was nonresponsive.
+	 */
+	__u64 rpc_timeouts;
+
+	/**
+	 * @server_rpc_discards: total number of times an RPC was aborted on
+	 * the server side because of a timeout.
+	 */
+	__u64 server_rpc_discards;
+
+	/**
+	 * @server_rpcs_unknown: total number of times an RPC was aborted on
+	 * the server side because it is no longer known to the client.
+	 */
+	__u64 server_rpcs_unknown;
+
+	/**
+	 * @client_lock_misses: total number of times that Homa had to wait
+	 * to acquire a client bucket lock.
+	 */
+	__u64 client_lock_misses;
+
+	/**
+	 * @client_lock_miss_cycles: total time spent waiting for client
+	 * bucket lock misses, measured by get_cycles().
+	 */
+	__u64 client_lock_miss_cycles;
+
+	/**
+	 * @server_lock_misses: total number of times that Homa had to wait
+	 * to acquire a server bucket lock.
+	 */
+	__u64 server_lock_misses;
+
+	/**
+	 * @server_lock_miss_cycles: total time spent waiting for server
+	 * bucket lock misses, measured by get_cycles().
+	 */
+	__u64 server_lock_miss_cycles;
+
+	/**
+	 * @socket_lock_miss_cycles: total time spent waiting for socket
+	 * lock misses, measured by get_cycles().
+	 */
+	__u64 socket_lock_miss_cycles;
+
+	/**
+	 * @socket_lock_misses: total number of times that Homa had to wait
+	 * to acquire a socket lock.
+	 */
+	__u64 socket_lock_misses;
+
+	/**
+	 * @throttle_lock_miss_cycles: total time spent waiting for throttle
+	 * lock misses, measured by get_cycles().
+	 */
+	__u64 throttle_lock_miss_cycles;
+
+	/**
+	 * @throttle_lock_misses: total number of times that Homa had to wait
+	 * to acquire the throttle lock.
+	 */
+	__u64 throttle_lock_misses;
+
+	/**
+	 * @peer_acklock_miss_cycles: total time spent waiting for peer
+	 * lock misses, measured by get_cycles().
+	 */
+	__u64 peer_ack_lock_miss_cycles;
+
+	/**
+	 * @peer_ack_lock_misses: total number of times that Homa had to wait
+	 * to acquire the lock used for managing acks for a peer.
+	 */
+	__u64 peer_ack_lock_misses;
+
+	/**
+	 * @grantable_lock_miss_cycles: total time spent waiting for grantable
+	 * lock misses, measured by get_cycles().
+	 */
+	__u64 grantable_lock_miss_cycles;
+
+	/**
+	 * @grantable_lock_misses: total number of times that Homa had to wait
+	 * to acquire the grantable lock.
+	 */
+	__u64 grantable_lock_misses;
+
+	/**
+	 * @grantable_rpcs_integral: cumulative sum of time_delta*grantable,
+	 * where time_delta is a get_cycles time and grantable is the
+	 * value of homa->num_grantable_rpcs over that time period.
+	 */
+	__u64 grantable_rpcs_integral;
+
+	/**
+	 * @grant_recalc_calls: cumulative number of times homa_grant_recalc
+	 * has been invoked.
+	 */
+	__u64 grant_recalc_calls;
+
+	/**
+	 * @grant_recalc_cycles: total time spent in homa_grant_recalc,
+	 * in get_cycles() units.
+	 */
+	__u64 grant_recalc_cycles;
+
+	/**
+	 * @grant_recalc_loops: cumulative number of times homa_grant_recalc
+	 * has looped back to recalculate again.
+	 */
+	__u64 grant_recalc_loops;
+
+	/**
+	 * @grant_recalc_skips: cumulative number of times that
+	 * homa_grant_recalc skipped its work because in other thread
+	 * already did it.
+	 */
+	__u64 grant_recalc_skips;
+
+	/**
+	 * @grant_priority_bumps: cumulative number of times the grant priority
+	 * of an RPC has increased above its next-higher-priority neighbor.
+	 */
+	__u64 grant_priority_bumps;
+
+	/**
+	 * @fifo_grants: total number of times that grants were sent to
+	 * the oldest message.
+	 */
+	__u64 fifo_grants;
+
+	/**
+	 * @fifo_grants_no_incoming: total number of times that, when a
+	 * FIFO grant was issued, the message had no outstanding grants
+	 * (everything granted had been received).
+	 */
+	__u64 fifo_grants_no_incoming;
+
+	/**
+	 * @disabled_reaps: total number of times that the reaper couldn't
+	 * run at all because it was disabled.
+	 */
+	__u64 disabled_reaps;
+
+	/**
+	 * @disabled_rpc_reaps: total number of times that the reaper skipped
+	 * an RPC because reaping was disabled for that particular RPC
+	 */
+	__u64 disabled_rpc_reaps;
+
+	/**
+	 * @reaper_runs: total number of times that the reaper was invoked
+	 * and was not disabled.
+	 */
+	__u64 reaper_calls;
+
+	/**
+	 * @reaper_dead_skbs: incremented by hsk->dead_skbs each time that
+	 * reaper_calls is incremented.
+	 */
+	__u64 reaper_dead_skbs;
+
+	/**
+	 * @forced_reaps: total number of times that homa_wait_for_message
+	 * invoked the reaper because dead_skbs was too high.
+	 */
+	__u64 forced_reaps;
+
+	/**
+	 * @throttle_list_adds: total number of calls to homa_add_to_throttled.
+	 */
+	__u64 throttle_list_adds;
+
+	/**
+	 * @throttle_list_checks: number of list elements examined in
+	 * calls to homa_add_to_throttled.
+	 */
+	__u64 throttle_list_checks;
+
+	/**
+	 * @unacked_overflows: total number of times that homa_peer_add_ack
+	 * found insufficient space for the new id and hence had to send an
+	 * ACK message.
+	 */
+	__u64 ack_overflows;
+
+	/**
+	 * @ignored_need_acks: total number of times that a NEED_ACK packet
+	 * was ignored because the RPC's result hadn't been fully received.
+	 */
+	__u64 ignored_need_acks;
+
+	/**
+	 * @bpage_resuses: total number of times that, when an owned page
+	 * reached the end, it could be reused because all existing
+	 * allocations had been released.
+	 */
+	__u64 bpage_reuses;
+
+	/**
+	 * @buffer_alloc_failures: total number of times that
+	 * homa_pool_allocate was unable to allocate buffer space for
+	 * an incoming message.
+	 */
+	__u64 buffer_alloc_failures;
+
+	/**
+	 * @linux_pkt_alloc_bytes: total bytes allocated in new packet buffers
+	 * by the NIC driver because of packet cache underflows.
+	 */
+	__u64 linux_pkt_alloc_bytes;
+
+	/**
+	 * @dropped_data_no_bufs: total bytes of incoming data dropped because
+	 * there was no application buffer space available.
+	 */
+	__u64 dropped_data_no_bufs;
+
+	/**
+	 * @gen3_handoffs: total number of handoffs from GRO to SoftIRQ made
+	 * by Gen3 load balancer.
+	 */
+	__u64 gen3_handoffs;
+
+	/**
+	 * @gen3_alt_handoffs: total number of GRO->SoftIRQ handoffs that
+	 * didn't choose the primary SoftIRQ core because it was busy with
+	 * app threads.
+	 */
+	__u64 gen3_alt_handoffs;
+
+	/**
+	 * @gro_grant_bypasses: total number of GRANT packets passed directly
+	 * to homa_softirq by homa_gro_receive, bypassing the normal SoftIRQ
+	 * mechanism (triggered by HOMA_GRO_FAST_GRANTS).
+	 */
+	__u64 gro_grant_bypasses;
+
+	/**
+	 * @gro_data_bypasses: total number of DATA packets passed directly
+	 * to homa_softirq by homa_gro_receive, bypassing the normal SoftIRQ
+	 * mechanism (triggered by HOMA_GRO_SHORT_BYPASS).
+	 */
+	__u64 gro_data_bypasses;
+
+	/** @temp: For temporary use during testing. */
+#define NUM_TEMP_METRICS 10
+	__u64 temp[NUM_TEMP_METRICS];
+};
+
+DECLARE_PER_CPU(struct homa_metrics, homa_metrics);
+
+/**
+ * per_cpu_metrics() - Return the metrics structure for the current core.
+ * This is unsynchronized and doesn't guarantee non-preemption.
+ */
+static inline struct homa_metrics *homa_metrics_per_cpu(void)
+{
+	return &per_cpu(homa_metrics, raw_smp_processor_id());
+}
+
+/* It isn't necessary to disable preemption here, because we don't need
+ * perfect synchronization: if the invoking thread is moved to a
+ * different core and races with an INC_METRIC there, the worst that
+ * happens is that one of the INC_METRICs is lost, which isn't a big deal.
+ */
+#define INC_METRIC(metric, count) per_cpu(homa_metrics, \
+		raw_smp_processor_id()).metric+= (count)
+
+extern void     homa_metric_append(struct homa *homa, const char *format, ...);
+extern loff_t   homa_metrics_lseek(struct file *file, loff_t offset,
+		    int whence);
+extern int      homa_metrics_open(struct inode *inode, struct file *file);
+extern char    *homa_metrics_print(struct homa *homa);
+extern ssize_t  homa_metrics_read(struct file *file, char __user *buffer,
+		    size_t length, loff_t *offset);
+extern int      homa_metrics_release(struct inode *inode, struct file *file);
+extern int      homa_proc_read_metrics(char *buffer, char **start, off_t offset,
+		    int count, int *eof, void *data);
+
+#endif /* _HOMA_METRICS_H */
\ No newline at end of file
diff --git a/homa_offload.c b/homa_offload.c
index 55cb8a8..eb3353c 100644
--- a/homa_offload.c
+++ b/homa_offload.c
@@ -262,6 +262,7 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
 	int busy = (now - core->last_gro) < homa->gro_busy_cycles;
 	__u32 hash;
 	__u64 saved_softirq_metric, softirq_cycles;
+	__u64 *softirq_cycles_metric;
 	struct data_header *h_new = (struct data_header *)
 			skb_transport_header(skb);
 	int priority;
@@ -413,10 +414,11 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
 	/* Record SoftIRQ cycles in a different metric to reflect that
 	 * they happened during bypass.
 	 */
-	saved_softirq_metric = core->metrics.softirq_cycles;
+	softirq_cycles_metric = &homa_metrics_per_cpu()->softirq_cycles;
+	saved_softirq_metric = *softirq_cycles_metric;
 	homa_softirq(skb);
-	softirq_cycles = core->metrics.softirq_cycles - saved_softirq_metric;
-	core->metrics.softirq_cycles = saved_softirq_metric;
+	softirq_cycles = *softirq_cycles_metric - saved_softirq_metric;
+	*softirq_cycles_metric = saved_softirq_metric;
 	INC_METRIC(bypass_softirq_cycles, softirq_cycles);
 	core->last_gro = get_cycles();
 
diff --git a/homa_plumbing.c b/homa_plumbing.c
index 743cb8a..a4d38eb 100644
--- a/homa_plumbing.c
+++ b/homa_plumbing.c
@@ -1526,89 +1526,6 @@ __poll_t homa_poll(struct file *file, struct socket *sock,
 	return mask;
 }
 
-/**
- * homa_metrics_open() - This function is invoked when /proc/net/homa_metrics is
- * opened.
- * @inode:    The inode corresponding to the file.
- * @file:     Information about the open file.
- *
- * Return: always 0.
- */
-int homa_metrics_open(struct inode *inode, struct file *file)
-{
-	/* Collect all of the metrics when the file is opened, and save
-	 * these for use by subsequent reads (don't want the metrics to
-	 * change between reads). If there are concurrent opens on the
-	 * file, only read the metrics once, during the first open, and
-	 * use this copy for subsequent opens, until the file has been
-	 * completely closed.
-	 */
-	spin_lock(&homa->metrics_lock);
-	if (homa->metrics_active_opens == 0)
-		homa_print_metrics(homa);
-	homa->metrics_active_opens++;
-	spin_unlock(&homa->metrics_lock);
-	return 0;
-}
-
-/**
- * homa_metrics_read() - This function is invoked to handle read kernel calls on
- * /proc/net/homa_metrics.
- * @file:    Information about the file being read.
- * @buffer:  Address in user space of the buffer in which data from the file
- *           should be returned.
- * @length:  Number of bytes available at @buffer.
- * @offset:  Current read offset within the file.
- *
- * Return: the number of bytes returned at @buffer. 0 means the end of the
- * file was reached, and a negative number indicates an error (-errno).
- */
-ssize_t homa_metrics_read(struct file *file, char __user *buffer,
-		size_t length, loff_t *offset)
-{
-	size_t copied;
-
-	if (*offset >= homa->metrics_length)
-		return 0;
-	copied = homa->metrics_length - *offset;
-	if (copied > length)
-		copied = length;
-	if (copy_to_user(buffer, homa->metrics + *offset, copied))
-		return -EFAULT;
-	*offset += copied;
-	return copied;
-}
-
-
-/**
- * homa_metrics_lseek() - This function is invoked to handle seeks on
- * /proc/net/homa_metrics. Right now seeks are ignored: the file must be
- * read sequentially.
- * @file:    Information about the file being read.
- * @offset:  Distance to seek, in bytes
- * @whence:  Starting point from which to measure the distance to seek.
- */
-loff_t homa_metrics_lseek(struct file *file, loff_t offset, int whence)
-{
-	return 0;
-}
-
-/**
- * homa_metrics_release() - This function is invoked when the last reference to
- * an open /proc/net/homa_metrics is closed.  It performs cleanup.
- * @inode:    The inode corresponding to the file.
- * @file:     Information about the open file.
- *
- * Return: always 0.
- */
-int homa_metrics_release(struct inode *inode, struct file *file)
-{
-	spin_lock(&homa->metrics_lock);
-	homa->metrics_active_opens--;
-	spin_unlock(&homa->metrics_lock);
-	return 0;
-}
-
 /**
  * homa_dointvec() - This function is a wrapper around proc_dointvec. It is
  * invoked to read and write sysctl values and also update other values
diff --git a/homa_timer.c b/homa_timer.c
index f6f4b55..aefd658 100644
--- a/homa_timer.c
+++ b/homa_timer.c
@@ -169,7 +169,7 @@ void homa_timer(struct homa *homa)
 
 	total_grants = 0;
 	for (core = 0; core < nr_cpu_ids; core++) {
-		struct homa_metrics *m = &homa_cores[core]->metrics;
+		struct homa_metrics *m = homa_metrics_per_cpu();
 
 		total_grants += m->packets_sent[GRANT-DATA];
 	}
diff --git a/homa_utils.c b/homa_utils.c
index df9cd0b..afecba5 100644
--- a/homa_utils.c
+++ b/homa_utils.c
@@ -88,7 +88,6 @@ int homa_init(struct homa *homa)
 			core->page_inuse = 0;
 			core->page_size = 0;
 			core->num_stashed_pages = 0;
-			memset(&core->metrics, 0, sizeof(core->metrics));
 		}
 	}
 
@@ -1477,446 +1476,6 @@ char *homa_symbol_for_type(uint8_t type)
 	return buffer;
 }
 
-/**
- * homa_append_metric() - Formats a new metric and appends it to homa->metrics.
- * @homa:        The new data will appended to the @metrics field of
- *               this structure.
- * @format:      Standard printf-style format string describing the
- *               new metric. Arguments after this provide the usual
- *               values expected for printf-like functions.
- */
-void homa_append_metric(struct homa *homa, const char *format, ...)
-{
-	char *new_buffer;
-	size_t new_chars;
-	va_list ap;
-
-	if (!homa->metrics) {
-#ifdef __UNIT_TEST__
-		homa->metrics_capacity =  30;
-#else
-		homa->metrics_capacity =  4096;
-#endif
-		homa->metrics =  kmalloc(homa->metrics_capacity, GFP_KERNEL);
-		if (!homa->metrics) {
-			pr_warn("%s couldn't allocate memory\n", __func__);
-			return;
-		}
-		homa->metrics_length = 0;
-	}
-
-	/* May have to execute this loop multiple times if we run out
-	 * of space in homa->metrics; each iteration expands the storage,
-	 * until eventually it is large enough.
-	 */
-	while (true) {
-		va_start(ap, format);
-		new_chars = vsnprintf(homa->metrics + homa->metrics_length,
-				homa->metrics_capacity - homa->metrics_length,
-				format, ap);
-		va_end(ap);
-		if ((homa->metrics_length + new_chars) < homa->metrics_capacity)
-			break;
-
-		/* Not enough room; expand buffer capacity. */
-		homa->metrics_capacity *= 2;
-		new_buffer = kmalloc(homa->metrics_capacity, GFP_KERNEL);
-		if (!new_buffer) {
-			pr_warn("%s couldn't allocate memory\n", __func__);
-			return;
-		}
-		memcpy(new_buffer, homa->metrics, homa->metrics_length);
-		kfree(homa->metrics);
-		homa->metrics = new_buffer;
-	}
-	homa->metrics_length += new_chars;
-}
-
-/**
- * homa_print_metrics() - Sample all of the Homa performance metrics and
- * generate a human-readable string describing all of them.
- * @homa:    Overall data about the Homa protocol implementation;
- *           the formatted string will be stored in homa->metrics.
- *
- * Return:   The formatted string.
- */
-char *homa_print_metrics(struct homa *homa)
-{
-	int core, i, lower = 0;
-
-	homa->metrics_length = 0;
-	homa_append_metric(homa,
-			"rdtsc_cycles         %20llu  RDTSC cycle counter when metrics were gathered\n",
-			get_cycles());
-	homa_append_metric(homa,
-			"cpu_khz                   %15llu  Clock rate for RDTSC counter, in khz\n",
-			cpu_khz);
-	for (core = 0; core < nr_cpu_ids; core++) {
-		struct homa_metrics *m = &homa_cores[core]->metrics;
-		__s64 delta;
-
-		homa_append_metric(homa,
-				"core                 %15d  Core id for following metrics\n",
-				core);
-		for (i = 0; i < HOMA_NUM_SMALL_COUNTS; i++) {
-			homa_append_metric(homa,
-				"msg_bytes_%-9d       %15llu  Bytes in incoming messages containing %d-%d bytes\n",
-				(i+1)*64, m->small_msg_bytes[i], lower,
-				(i+1)*64);
-			lower = (i+1)*64 + 1;
-		}
-		for (i = (HOMA_NUM_SMALL_COUNTS*64)/1024;
-				i < HOMA_NUM_MEDIUM_COUNTS; i++) {
-			homa_append_metric(homa,
-				"msg_bytes_%-9d       %15llu  Bytes in incoming messages containing %d-%d bytes\n",
-				(i+1)*1024, m->medium_msg_bytes[i], lower,
-				(i+1)*1024);
-			lower = (i+1)*1024 + 1;
-		}
-		homa_append_metric(homa,
-				"large_msg_count           %15llu  # of incoming messages >= %d bytes\n",
-				m->large_msg_count, lower);
-		homa_append_metric(homa,
-				"large_msg_bytes           %15llu  Bytes in incoming messages >= %d bytes\n",
-				m->large_msg_bytes, lower);
-		homa_append_metric(homa,
-				"sent_msg_bytes            %15llu  otal bytes in all outgoing messages\n",
-				m->sent_msg_bytes);
-		for (i = DATA; i < BOGUS;  i++) {
-			char *symbol = homa_symbol_for_type(i);
-
-			homa_append_metric(homa,
-					"packets_sent_%-7s      %15llu  %s packets sent\n",
-					symbol, m->packets_sent[i-DATA],
-					symbol);
-		}
-		for (i = DATA; i < BOGUS;  i++) {
-			char *symbol = homa_symbol_for_type(i);
-
-			homa_append_metric(homa,
-					"packets_rcvd_%-7s      %15llu  %s packets received\n",
-					symbol, m->packets_received[i-DATA],
-					symbol);
-		}
-		for (i = 0; i < HOMA_MAX_PRIORITIES; i++) {
-			homa_append_metric(homa,
-					"priority%d_bytes        %15llu  Bytes sent at priority %d (including headers)\n",
-					i, m->priority_bytes[i], i);
-		}
-		for (i = 0; i < HOMA_MAX_PRIORITIES; i++) {
-			homa_append_metric(homa,
-					   "priority%d_packets      %15llu  Packets sent at priority %d\n",
-					   i, m->priority_packets[i], i);
-		}
-		homa_append_metric(homa,
-				"skb_allocs                %15llu  sk_buffs allocated\n",
-				m->skb_allocs);
-		homa_append_metric(homa,
-				"skb_alloc_cycles          %15llu  Time spent allocating sk_buffs\n",
-				m->skb_alloc_cycles);
-		homa_append_metric(homa,
-				"skb_frees                 %15llu  Data sk_buffs freed in normal paths\n",
-				m->skb_frees);
-		homa_append_metric(homa,
-				"skb_free_cycles           %15llu  Time spent freeing data sk_buffs\n",
-				m->skb_free_cycles);
-		homa_append_metric(homa,
-				"skb_page_allocs           %15llu  Pages allocated for sk_buff frags\n",
-				m->skb_page_allocs);
-		homa_append_metric(homa,
-				"skb_page_alloc_cycles     %15llu  Time spent allocating pages for sk_buff frags\n",
-				m->skb_page_alloc_cycles);
-		homa_append_metric(homa,
-				"requests_received         %15llu  Incoming request messages\n",
-				m->requests_received);
-		homa_append_metric(homa,
-				"requests_queued           %15llu  Requests for which no thread was waiting\n",
-				m->requests_queued);
-		homa_append_metric(homa,
-				"responses_received        %15llu  Incoming response messages\n",
-				m->responses_received);
-		homa_append_metric(homa,
-				"responses_queued          %15llu  Responses for which no thread was waiting\n",
-				m->responses_queued);
-		homa_append_metric(homa,
-				"fast_wakeups              %15llu  Messages received while polling\n",
-				m->fast_wakeups);
-		homa_append_metric(homa,
-				"slow_wakeups              %15llu  Messages received after thread went to sleep\n",
-				m->slow_wakeups);
-		homa_append_metric(homa,
-				"handoffs_thread_waiting   %15llu  RPC handoffs to waiting threads (vs. queue)\n",
-				m->handoffs_thread_waiting);
-		homa_append_metric(homa,
-				"handoffs_alt_thread       %15llu  RPC handoffs not to first on list (avoid busy core)\n",
-				m->handoffs_alt_thread);
-		homa_append_metric(homa,
-				"poll_cycles               %15llu  Time spent polling for incoming messages\n",
-				m->poll_cycles);
-		homa_append_metric(homa,
-				"softirq_calls             %15llu  Calls to homa_softirq (i.e. # GRO pkts received)\n",
-				m->softirq_calls);
-		homa_append_metric(homa,
-				"softirq_cycles            %15llu  Time spent in homa_softirq during SoftIRQ\n",
-				m->softirq_cycles);
-		homa_append_metric(homa,
-				"bypass_softirq_cycles     %15llu  Time spent in homa_softirq during bypass from GRO\n",
-				m->bypass_softirq_cycles);
-		homa_append_metric(homa,
-				"linux_softirq_cycles      %15llu  Time spent in all Linux SoftIRQ\n",
-				m->linux_softirq_cycles);
-		homa_append_metric(homa,
-				"napi_cycles               %15llu  Time spent in NAPI-level packet handling\n",
-				m->napi_cycles);
-		homa_append_metric(homa,
-				"send_cycles               %15llu  Time spent in homa_sendmsg for requests\n",
-				m->send_cycles);
-		homa_append_metric(homa,
-				"send_calls                %15llu  Total invocations of homa_sendmsg for equests\n",
-				m->send_calls);
-		// It is possible for us to get here at a time when a
-		// thread has been blocked for a long time and has
-		// recorded blocked_cycles, but hasn't finished the
-		// system call so recv_cycles hasn't been incremented
-		// yet. If that happens, just record 0 to prevent
-		// underflow errors.
-		delta = m->recv_cycles - m->blocked_cycles;
-		if (delta < 0)
-			delta = 0;
-		homa_append_metric(homa,
-				"recv_cycles               %15llu  Unblocked time spent in recvmsg kernel call\n",
-				delta);
-		homa_append_metric(homa,
-				"recv_calls                %15llu  Total invocations of recvmsg kernel call\n",
-				m->recv_calls);
-		homa_append_metric(homa,
-				"blocked_cycles            %15llu  Time spent blocked in homa_recvmsg\n",
-				m->blocked_cycles);
-		homa_append_metric(homa,
-				"reply_cycles              %15llu  Time spent in homa_sendmsg for responses\n",
-				m->reply_cycles);
-		homa_append_metric(homa,
-				"reply_calls               %15llu  Total invocations of homa_sendmsg for responses\n",
-				m->reply_calls);
-		homa_append_metric(homa,
-				"abort_cycles              %15llu  Time spent in homa_ioc_abort kernel call\n",
-				m->reply_cycles);
-		homa_append_metric(homa,
-				"abort_calls               %15llu  Total invocations of abort kernel call\n",
-				m->reply_calls);
-		homa_append_metric(homa,
-				"so_set_buf_cycles         %15llu  Time spent in setsockopt SO_HOMA_SET_BUF\n",
-				m->so_set_buf_cycles);
-		homa_append_metric(homa,
-				"so_set_buf_calls          %15llu  Total invocations of setsockopt SO_HOMA_SET_BUF\n",
-				m->so_set_buf_calls);
-		homa_append_metric(homa,
-				"grantable_lock_cycles     %15llu  Time spent with homa->grantable_lock locked\n",
-				m->grantable_lock_cycles);
-		homa_append_metric(homa,
-				"timer_cycles              %15llu  Time spent in homa_timer\n",
-				m->timer_cycles);
-		homa_append_metric(homa,
-				"timer_reap_cycles         %15llu  Time in homa_timer spent reaping RPCs\n",
-				m->timer_reap_cycles);
-		homa_append_metric(homa,
-				"data_pkt_reap_cycles      %15llu  Time in homa_data_pkt spent reaping RPCs\n",
-				m->data_pkt_reap_cycles);
-		homa_append_metric(homa,
-				"pacer_cycles              %15llu  Time spent in homa_pacer_main\n",
-				m->pacer_cycles);
-		homa_append_metric(homa,
-				"homa_cycles               %15llu  Total time in all Homa-related functions\n",
-				m->softirq_cycles + m->napi_cycles +
-				m->send_cycles + m->recv_cycles +
-				m->reply_cycles - m->blocked_cycles +
-				m->timer_cycles + m->pacer_cycles);
-		homa_append_metric(homa,
-				"pacer_lost_cycles         %15llu  Lost transmission time because pacer was slow\n",
-				m->pacer_lost_cycles);
-		homa_append_metric(homa,
-				"pacer_bytes               %15llu  Bytes transmitted when the pacer was active\n",
-				m->pacer_bytes);
-		homa_append_metric(homa,
-				"pacer_skipped_rpcs        %15llu  Pacer aborts because of locked RPCs\n",
-				m->pacer_skipped_rpcs);
-		homa_append_metric(homa,
-				"pacer_needed_help         %15llu  homa_pacer_xmit invocations from homa_check_pacer\n",
-				m->pacer_needed_help);
-		homa_append_metric(homa,
-				"throttled_cycles          %15llu  Time when the throttled queue was nonempty\n",
-				m->throttled_cycles);
-		homa_append_metric(homa,
-				"resent_packets            %15llu  DATA packets sent in response to RESENDs\n",
-				m->resent_packets);
-		homa_append_metric(homa,
-				"peer_hash_links           %15llu  Hash chain link traversals in peer table\n",
-				m->peer_hash_links);
-		homa_append_metric(homa,
-				"peer_new_entries          %15llu  New entries created in peer table\n",
-				m->peer_new_entries);
-		homa_append_metric(homa,
-				"peer_kmalloc_errors       %15llu  kmalloc failures creating peer table entries\n",
-				m->peer_kmalloc_errors);
-		homa_append_metric(homa,
-				"peer_route_errors         %15llu  Routing failures creating peer table entries\n",
-				m->peer_route_errors);
-		homa_append_metric(homa,
-				"control_xmit_errors       %15llu  Errors sending control packets\n",
-				m->control_xmit_errors);
-		homa_append_metric(homa,
-				"data_xmit_errors          %15llu  Errors sending data packets\n",
-				m->data_xmit_errors);
-		homa_append_metric(homa,
-				"unknown_rpcs              %15llu  Non-grant packets discarded because RPC unknown\n",
-				m->unknown_rpcs);
-		homa_append_metric(homa,
-				"server_cant_create_rpcs   %15llu  Packets discarded because server couldn't create RPC\n",
-				m->server_cant_create_rpcs);
-		homa_append_metric(homa,
-				"unknown_packet_types      %15llu  Packets discarded because of unsupported type\n",
-				m->unknown_packet_types);
-		homa_append_metric(homa,
-				"short_packets             %15llu  Packets discarded because too short\n",
-				m->short_packets);
-		homa_append_metric(homa,
-				"packet_discards           %15llu  Non-resent packets discarded because data already received\n",
-				m->packet_discards);
-		homa_append_metric(homa,
-				"resent_discards           %15llu  Resent packets discarded because data already received\n",
-				m->resent_discards);
-		homa_append_metric(homa,
-				"resent_packets_used       %15llu  Retransmitted packets that were actually used\n",
-				m->resent_packets_used);
-		homa_append_metric(homa,
-				"rpc_timeouts             %15llu   RPCs aborted because peer was nonresponsive\n",
-				m->rpc_timeouts);
-		homa_append_metric(homa,
-				"server_rpc_discards       %15llu  RPCs discarded by server because of errors\n",
-				m->server_rpc_discards);
-		homa_append_metric(homa,
-				"server_rpcs_unknown       %15llu  RPCs aborted by server because unknown to client\n",
-				m->server_rpcs_unknown);
-		homa_append_metric(homa,
-				"client_lock_misses        %15llu  Bucket lock misses for client RPCs\n",
-				m->client_lock_misses);
-		homa_append_metric(homa,
-				"client_lock_miss_cycles   %15llu  Time lost waiting for client bucket locks\n",
-				m->client_lock_miss_cycles);
-		homa_append_metric(homa,
-				"server_lock_misses        %15llu  Bucket lock misses for server RPCs\n",
-				m->server_lock_misses);
-		homa_append_metric(homa,
-				"server_lock_miss_cycles   %15llu  Time lost waiting for server bucket locks\n",
-				m->server_lock_miss_cycles);
-		homa_append_metric(homa,
-				"socket_lock_misses        %15llu  Socket lock misses\n",
-				m->socket_lock_misses);
-		homa_append_metric(homa,
-				"socket_lock_miss_cycles   %15llu  Time lost waiting for socket locks\n",
-				m->socket_lock_miss_cycles);
-		homa_append_metric(homa,
-				"throttle_lock_misses      %15llu  Throttle lock misses\n",
-				m->throttle_lock_misses);
-		homa_append_metric(homa,
-				"throttle_lock_miss_cycles %15llu  Time lost waiting for throttle locks\n",
-				m->throttle_lock_miss_cycles);
-		homa_append_metric(homa,
-				"peer_ack_lock_misses      %15llu  Misses on peer ack locks\n",
-				m->peer_ack_lock_misses);
-		homa_append_metric(homa,
-				"peer_ack_lock_miss_cycles %15llu  Time lost waiting for peer ack locks\n",
-				m->peer_ack_lock_miss_cycles);
-		homa_append_metric(homa,
-				"grantable_lock_misses     %15llu  Grantable lock misses\n",
-				m->grantable_lock_misses);
-		homa_append_metric(homa,
-				"grantable_lock_miss_cycles%15llu  Time lost waiting for grantable lock\n",
-				m->grantable_lock_miss_cycles);
-		homa_append_metric(homa,
-				"grantable_rpcs_integral   %15llu  Integral of homa->num_grantable_rpcs*dt\n",
-				m->grantable_rpcs_integral);
-		homa_append_metric(homa,
-				"grant_recalc_calls        %15llu  Number of calls to homa_grant_recalc\n",
-				m->grant_recalc_calls);
-		homa_append_metric(homa,
-				"grant_recalc_cycles       %15llu  Time spent in homa_grant_recalc\n",
-				m->grant_recalc_cycles);
-		homa_append_metric(homa,
-				"grant_recalc_skips        %15llu  Number of times homa_grant_recalc skipped redundant work\n",
-				m->grant_recalc_skips);
-		homa_append_metric(homa,
-				"grant_recalc_loops        %15llu  Number of times homa_grant_recalc looped back\n",
-				m->grant_recalc_loops);
-		homa_append_metric(homa,
-				"grant_priority_bumps      %15llu  Number of times an RPC moved up in the grant priority order\n",
-				m->grant_priority_bumps);
-		homa_append_metric(homa,
-				"fifo_grants               %15llu  Grants issued using FIFO priority\n",
-				m->fifo_grants);
-		homa_append_metric(homa,
-				"fifo_grants_no_incoming   %15llu  FIFO grants to messages with no outstanding grants\n",
-				m->fifo_grants_no_incoming);
-		homa_append_metric(homa,
-				"disabled_reaps            %15llu  Reaper invocations that were disabled\n",
-				m->disabled_reaps);
-		homa_append_metric(homa,
-				"disabled_rpc_reaps        %15llu  Disabled RPCs skipped by reaper\n",
-				m->disabled_rpc_reaps);
-		homa_append_metric(homa,
-				"reaper_calls              %15llu  Reaper invocations that were not disabled\n",
-				m->reaper_calls);
-		homa_append_metric(homa,
-				"reaper_dead_skbs          %15llu  Sum of hsk->dead_skbs across all reaper alls\n",
-				m->reaper_dead_skbs);
-		homa_append_metric(homa,
-				"forced_reaps              %15llu  Reaps forced by accumulation of dead RPCs\n",
-				m->forced_reaps);
-		homa_append_metric(homa,
-				"throttle_list_adds        %15llu  Calls to homa_add_to_throttled\n",
-				m->throttle_list_adds);
-		homa_append_metric(homa,
-				"throttle_list_checks      %15llu  List elements checked in homa_add_to_throttled\n",
-				m->throttle_list_checks);
-		homa_append_metric(homa,
-				"ack_overflows             %15llu  Explicit ACKs sent because peer->acks was full\n",
-				m->ack_overflows);
-		homa_append_metric(homa,
-				"ignored_need_acks         %15llu  NEED_ACKs ignored because RPC result not yet received\n",
-				m->ignored_need_acks);
-		homa_append_metric(homa,
-				"bpage_reuses              %15llu  Buffer page could be reused because ref count was zero\n",
-				m->bpage_reuses);
-		homa_append_metric(homa,
-				"buffer_alloc_failures     %15llu  homa_pool_allocate didn't find enough buffer space for an RPC\n",
-				m->buffer_alloc_failures);
-		homa_append_metric(homa,
-				"linux_pkt_alloc_bytes     %15llu  Bytes allocated in new packets by NIC driver due to cache overflows\n",
-				m->linux_pkt_alloc_bytes);
-		homa_append_metric(homa,
-				"dropped_data_no_bufs      %15llu  Data bytes dropped because app buffers full\n",
-				m->dropped_data_no_bufs);
-		homa_append_metric(homa,
-				"gen3_handoffs             %15llu  GRO->SoftIRQ handoffs made by Gen3 balancer\n",
-				m->gen3_handoffs);
-		homa_append_metric(homa,
-				"gen3_alt_handoffs         %15llu  Gen3 handoffs to secondary core (primary was busy)\n",
-				m->gen3_alt_handoffs);
-		homa_append_metric(homa,
-				"gro_grant_bypasses        %15llu  Grant packets passed directly to homa_softirq by homa_gro_receive\n",
-				m->gro_grant_bypasses);
-		homa_append_metric(homa,
-				"gro_data_bypasses         %15llu  Data packets passed directly to homa_softirq by homa_gro_receive\n",
-				m->gro_data_bypasses);
-		for (i = 0; i < NUM_TEMP_METRICS;  i++)
-			homa_append_metric(homa,
-					"temp%-2d                  %15llu  Temporary use in testing\n",
-					i, m->temp[i]);
-	}
-
-	return homa->metrics;
-}
-
 /**
  * homa_prios_changed() - This function is called whenever configuration
  * information related to priorities, such as @homa->unsched_cutoffs or
diff --git a/homa_wire.h b/homa_wire.h
index a6314db..da9c41b 100644
--- a/homa_wire.h
+++ b/homa_wire.h
@@ -5,6 +5,8 @@
 #ifndef _HOMA_WIRE_H
 #define _HOMA_WIRE_H
 
+#include <linux/skbuff.h>
+
 /**
  * enum homa_packet_type - Defines the possible types of Homa packets.
  *
diff --git a/test/Makefile b/test/Makefile
index 8571b6b..bf03aa9 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -40,6 +40,7 @@ CCFLAGS :=   -std=c++11 $(WARNS) -MD -g $(CCINCLUDES) $(DEFS) -fsanitize=address
 TEST_SRCS :=  unit_homa_grant.c \
 	      unit_homa_incoming.c \
 	      unit_homa_offload.c \
+	      unit_homa_metrics.c \
 	      unit_homa_outgoing.c \
 	      unit_homa_peertab.c \
 	      unit_homa_pool.c \
@@ -53,6 +54,7 @@ TEST_OBJS :=  $(patsubst %.c,%.o,$(TEST_SRCS))
 
 HOMA_SRCS :=  homa_grant.c \
 	      homa_incoming.c \
+	      homa_metrics.c \
 	      homa_offload.c \
 	      homa_outgoing.c \
 	      homa_peertab.c \
diff --git a/test/mock.c b/test/mock.c
index 686e902..8850941 100644
--- a/test/mock.c
+++ b/test/mock.c
@@ -1542,6 +1542,8 @@ void mock_teardown(void)
 				mock_active_rcu_locks);
 	mock_active_rcu_locks = 0;
 
+	memset(homa_metrics, 0, sizeof(homa_metrics));
+
 	unit_hook_clear();
 }
 
diff --git a/test/unit_homa_grant.c b/test/unit_homa_grant.c
index ce97e7d..6e8d51a 100644
--- a/test/unit_homa_grant.c
+++ b/test/unit_homa_grant.c
@@ -192,7 +192,7 @@ TEST_F(homa_grant, homa_grant_add_rpc__update_metrics)
 	mock_cycles = 200;
 	test_rpc(self, 100, self->server_ip, 100000);
 	EXPECT_EQ(4, self->homa.num_grantable_rpcs);
-	EXPECT_EQ(300, core_metrics.grantable_rpcs_integral);
+	EXPECT_EQ(300, homa_metrics_per_cpu()->grantable_rpcs_integral);
 	EXPECT_EQ(200, self->homa.last_grantable_change);
 }
 TEST_F(homa_grant, homa_grant_add_rpc__insert_in_peer_list)
@@ -335,7 +335,7 @@ TEST_F(homa_grant, homa_grant_remove_rpc__update_metrics)
 
 	homa_grant_remove_rpc(rpc);
 	EXPECT_EQ(2, self->homa.num_grantable_rpcs);
-	EXPECT_EQ(300, core_metrics.grantable_rpcs_integral);
+	EXPECT_EQ(300, homa_metrics_per_cpu()->grantable_rpcs_integral);
 	EXPECT_EQ(200, self->homa.last_grantable_change);
 }
 TEST_F(homa_grant, homa_grant_remove_rpc__not_first_in_peer_list)
@@ -739,7 +739,7 @@ TEST_F(homa_grant, homa_grant_recalc__basics)
 
 	EXPECT_EQ(2, atomic_read(&rpc2->msgin.rank));
 	EXPECT_EQ(-1, atomic_read(&rpc4->msgin.rank));
-	EXPECT_NE(0, core_metrics.grant_recalc_cycles);
+	EXPECT_NE(0, homa_metrics_per_cpu()->grant_recalc_cycles);
 }
 TEST_F(homa_grant, homa_grant_recalc__already_locked)
 {
@@ -763,7 +763,7 @@ TEST_F(homa_grant, homa_grant_recalc__skip_recalc)
 	EXPECT_STREQ("", unit_log_get());
 	EXPECT_EQ(0, rpc->msgin.granted);
 	EXPECT_EQ(2, atomic_read(&self->homa.grant_recalc_count));
-	EXPECT_EQ(1, core_metrics.grant_recalc_skips);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->grant_recalc_skips);
 }
 TEST_F(homa_grant, homa_grant_recalc__clear_existing_active_rpcs)
 {
@@ -871,14 +871,14 @@ TEST_F(homa_grant, homa_grant_recalc__rpc_fully_granted_but_skip_recalc)
 	unit_hook_register(grantable_spinlock_hook);
 	hook_homa = &self->homa;
 	mock_trylock_errors = 0xfe0;
-	EXPECT_EQ(0, core_metrics.grant_recalc_skips);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->grant_recalc_skips);
 
 	homa_grant_recalc(&self->homa, 0);
 	EXPECT_EQ(10000, rpc1->msgin.granted);
 	EXPECT_EQ(10000, rpc2->msgin.granted);
 	EXPECT_EQ(0, rpc3->msgin.granted);
 	EXPECT_EQ(0, rpc4->msgin.granted);
-	EXPECT_EQ(1, core_metrics.grant_recalc_skips);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->grant_recalc_skips);
 }
 
 TEST_F(homa_grant, homa_grant_pick_rpcs__basics)
@@ -1073,8 +1073,8 @@ TEST_F(homa_grant, homa_grantable_lock_slow__basics)
 	EXPECT_EQ(1, homa_grantable_lock_slow(&self->homa, 0));
 	homa_grantable_unlock(&self->homa);
 
-	EXPECT_EQ(1, core_metrics.grantable_lock_misses);
-	EXPECT_EQ(500, core_metrics.grantable_lock_miss_cycles);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->grantable_lock_misses);
+	EXPECT_EQ(500, homa_metrics_per_cpu()->grantable_lock_miss_cycles);
 }
 TEST_F(homa_grant, homa_grantable_lock_slow__recalc_count)
 {
@@ -1086,12 +1086,12 @@ TEST_F(homa_grant, homa_grantable_lock_slow__recalc_count)
 	EXPECT_EQ(0, homa_grantable_lock_slow(&self->homa, 1));
 	hook_homa = NULL;
 
-	EXPECT_EQ(1, core_metrics.grantable_lock_misses);
-	EXPECT_EQ(500, core_metrics.grantable_lock_miss_cycles);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->grantable_lock_misses);
+	EXPECT_EQ(500, homa_metrics_per_cpu()->grantable_lock_miss_cycles);
 
 	/* Make sure the check only occurs if the recalc argument is set. */
 	mock_trylock_errors = 0xff;
 	EXPECT_EQ(1, homa_grantable_lock_slow(&self->homa, 0));
-	EXPECT_EQ(2, core_metrics.grantable_lock_misses);
+	EXPECT_EQ(2, homa_metrics_per_cpu()->grantable_lock_misses);
 	homa_grantable_unlock(&self->homa);
 }
\ No newline at end of file
diff --git a/test/unit_homa_incoming.c b/test/unit_homa_incoming.c
index 4b7f306..dbfc6f0 100644
--- a/test/unit_homa_incoming.c
+++ b/test/unit_homa_incoming.c
@@ -222,11 +222,11 @@ TEST_F(homa_incoming, homa_message_in_init__update_metrics)
 	EXPECT_EQ(0, homa_message_in_init(crpc, 0x3000, 0));
 	EXPECT_EQ(0, homa_message_in_init(crpc, 1000000, 0));
 	EXPECT_EQ(0, homa_message_in_init(crpc, 900000, 0));
-	EXPECT_EQ(270, core_metrics.small_msg_bytes[2]);
-	EXPECT_EQ(0xfff, core_metrics.small_msg_bytes[63]);
-	EXPECT_EQ(0x3000, core_metrics.medium_msg_bytes[11]);
-	EXPECT_EQ(0, core_metrics.medium_msg_bytes[15]);
-	EXPECT_EQ(1900000, core_metrics.large_msg_bytes);
+	EXPECT_EQ(270, homa_metrics_per_cpu()->small_msg_bytes[2]);
+	EXPECT_EQ(0xfff, homa_metrics_per_cpu()->small_msg_bytes[63]);
+	EXPECT_EQ(0x3000, homa_metrics_per_cpu()->medium_msg_bytes[11]);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->medium_msg_bytes[15]);
+	EXPECT_EQ(1900000, homa_metrics_per_cpu()->large_msg_bytes);
 }
 
 TEST_F(homa_incoming, homa_gap_retry)
@@ -567,21 +567,21 @@ TEST_F(homa_incoming, homa_add_packet__metrics)
 	homa_add_packet(crpc, mock_skb_new(self->client_ip,
 			&self->data.common, 1400, 0));
 	EXPECT_EQ(0, skb_queue_len(&crpc->msgin.packets));
-	EXPECT_EQ(0, core_metrics.resent_discards);
-	EXPECT_EQ(1, core_metrics.packet_discards);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->resent_discards);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->packet_discards);
 
 	self->data.retransmit = 1;
 	homa_add_packet(crpc, mock_skb_new(self->client_ip,
 			&self->data.common, 1400, 0));
 	EXPECT_EQ(0, skb_queue_len(&crpc->msgin.packets));
-	EXPECT_EQ(1, core_metrics.resent_discards);
-	EXPECT_EQ(1, core_metrics.packet_discards);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->resent_discards);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->packet_discards);
 
 	self->data.seg.offset = htonl(4200);
 	homa_add_packet(crpc, mock_skb_new(self->client_ip,
 			&self->data.common, 1400, 4200));
 	EXPECT_EQ(1, skb_queue_len(&crpc->msgin.packets));
-	EXPECT_EQ(1, core_metrics.resent_packets_used);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->resent_packets_used);
 }
 
 TEST_F(homa_incoming, homa_copy_to_user__basics)
@@ -859,7 +859,7 @@ TEST_F(homa_incoming, homa_dispatch_pkts__cant_create_server_rpc)
 			1400, 0), &self->homa);
 	EXPECT_EQ(0, unit_list_length(&self->hsk.active_rpcs));
 	EXPECT_EQ(0, mock_skb_count());
-	EXPECT_EQ(1, core_metrics.server_cant_create_rpcs);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->server_cant_create_rpcs);
 }
 TEST_F(homa_incoming, homa_dispatch_pkts__existing_server_rpc)
 {
@@ -922,7 +922,7 @@ TEST_F(homa_incoming, homa_dispatch_pkts__unknown_client_rpc)
 	mock_xmit_log_verbose = 1;
 	homa_dispatch_pkts(mock_skb_new(self->client_ip, &h.common, 0, 0),
 			&self->homa);
-	EXPECT_EQ(1, core_metrics.unknown_rpcs);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->unknown_rpcs);
 }
 TEST_F(homa_incoming, homa_dispatch_pkts__unknown_server_rpc)
 {
@@ -933,7 +933,7 @@ TEST_F(homa_incoming, homa_dispatch_pkts__unknown_server_rpc)
 	mock_xmit_log_verbose = 1;
 	homa_dispatch_pkts(mock_skb_new(self->client_ip, &h.common, 0, 0),
 			&self->homa);
-	EXPECT_EQ(0, core_metrics.unknown_rpcs);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->unknown_rpcs);
 }
 TEST_F(homa_incoming, homa_dispatch_pkts__cutoffs_for_unknown_client_rpc)
 {
@@ -1009,7 +1009,7 @@ TEST_F(homa_incoming, homa_dispatch_pkts__unknown_type)
 	                .dport = htons(self->hsk.port),
 			.sender_id = cpu_to_be64(self->server_id), .type = 99};
 	homa_dispatch_pkts(mock_skb_new(self->client_ip, &h, 0, 0), &self->homa);
-	EXPECT_EQ(1, core_metrics.unknown_packet_types);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->unknown_packet_types);
 }
 TEST_F(homa_incoming, homa_dispatch_pkts__handle_ack)
 {
@@ -1077,7 +1077,7 @@ TEST_F(homa_incoming, homa_dispatch_pkts__forced_reap)
 	homa_dispatch_pkts(mock_skb_new(self->client_ip, &self->data.common,
 			1400, 0), &self->homa);
 	EXPECT_EQ(31, self->hsk.dead_skbs);
-	EXPECT_EQ(0, core_metrics.data_pkt_reap_cycles);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->data_pkt_reap_cycles);
 
 	/* Second packet: must reap. */
 	self->homa.dead_buffs_limit = 15;
@@ -1085,7 +1085,7 @@ TEST_F(homa_incoming, homa_dispatch_pkts__forced_reap)
 	homa_dispatch_pkts(mock_skb_new(self->client_ip, &self->data.common,
 			1400, 0), &self->homa);
 	EXPECT_EQ(21, self->hsk.dead_skbs);
-	EXPECT_NE(0, core_metrics.data_pkt_reap_cycles);
+	EXPECT_NE(0, homa_metrics_per_cpu()->data_pkt_reap_cycles);
 }
 
 TEST_F(homa_incoming, homa_data_pkt__basics)
@@ -1104,7 +1104,7 @@ TEST_F(homa_incoming, homa_data_pkt__basics)
 	EXPECT_EQ(200, crpc->msgin.bytes_remaining);
 	EXPECT_EQ(1, skb_queue_len(&crpc->msgin.packets));
 	EXPECT_EQ(1600, crpc->msgin.granted);
-	EXPECT_EQ(1, core_metrics.responses_received);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->responses_received);
 }
 TEST_F(homa_incoming, homa_data_pkt__wrong_client_rpc_state)
 {
@@ -1169,7 +1169,7 @@ TEST_F(homa_incoming, homa_data_pkt__no_buffers)
 	atomic_set(&self->hsk.buffer_pool.free_bpages, 0);
 	homa_data_pkt(mock_skb_new(self->server_ip, &self->data.common,
 			1400, 0), crpc);
-	EXPECT_EQ(1400, core_metrics.dropped_data_no_bufs);
+	EXPECT_EQ(1400, homa_metrics_per_cpu()->dropped_data_no_bufs);
 	EXPECT_EQ(0, skb_queue_len(&crpc->msgin.packets));
 }
 TEST_F(homa_incoming, homa_data_pkt__update_delta)
@@ -1598,7 +1598,7 @@ TEST_F(homa_incoming, homa_cutoffs__cant_find_peer)
 	struct sk_buff *skb = mock_skb_new(self->server_ip, &h.common, 0, 0);
 	mock_kmalloc_errors = 1;
 	homa_cutoffs_pkt(skb, &self->hsk);
-	EXPECT_EQ(1, core_metrics.peer_kmalloc_errors);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->peer_kmalloc_errors);
 	peer = homa_peer_find(&self->homa.peers, self->server_ip,
 			&self->hsk.inet);
 	ASSERT_FALSE(IS_ERR(peer));
@@ -1622,7 +1622,7 @@ TEST_F(homa_incoming, homa_need_ack_pkt__rpc_response_fully_received)
 			&self->homa);
 	EXPECT_STREQ("xmit ACK from 0.0.0.0:32768, dport 99, id 1234, acks",
 			unit_log_get());
-	EXPECT_EQ(1, core_metrics.packets_received[
+	EXPECT_EQ(1, homa_metrics_per_cpu()->packets_received[
 			NEED_ACK - DATA]);
 }
 TEST_F(homa_incoming, homa_need_ack_pkt__rpc_response_not_fully_received)
@@ -1641,7 +1641,7 @@ TEST_F(homa_incoming, homa_need_ack_pkt__rpc_response_not_fully_received)
 	homa_dispatch_pkts(mock_skb_new(self->server_ip, &h.common, 0, 0),
 			&self->homa);
 	EXPECT_STREQ("", unit_log_get());
-	EXPECT_EQ(1, core_metrics.packets_received[
+	EXPECT_EQ(1, homa_metrics_per_cpu()->packets_received[
 			NEED_ACK - DATA]);
 }
 TEST_F(homa_incoming, homa_need_ack_pkt__rpc_not_incoming)
@@ -1660,7 +1660,7 @@ TEST_F(homa_incoming, homa_need_ack_pkt__rpc_not_incoming)
 	homa_dispatch_pkts(mock_skb_new(self->server_ip, &h.common, 0, 0),
 			&self->homa);
 	EXPECT_STREQ("", unit_log_get());
-	EXPECT_EQ(1, core_metrics.packets_received[
+	EXPECT_EQ(1, homa_metrics_per_cpu()->packets_received[
 			NEED_ACK - DATA]);
 }
 TEST_F(homa_incoming, homa_need_ack_pkt__rpc_doesnt_exist)
@@ -1701,7 +1701,7 @@ TEST_F(homa_incoming, homa_ack_pkt__target_rpc_exists)
 	homa_dispatch_pkts(mock_skb_new(self->client_ip, &h.common, 0, 0),
 			&self->homa);
 	EXPECT_EQ(0, unit_list_length(&self->hsk2.active_rpcs));
-	EXPECT_EQ(1, core_metrics.packets_received[ACK - DATA]);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->packets_received[ACK - DATA]);
 }
 TEST_F(homa_incoming, homa_ack_pkt__target_rpc_doesnt_exist)
 {
diff --git a/test/unit_homa_metrics.c b/test/unit_homa_metrics.c
new file mode 100644
index 0000000..dc5d504
--- /dev/null
+++ b/test/unit_homa_metrics.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: BSD-2-Clause
+
+#include "homa_impl.h"
+#define KSELFTEST_NOT_MAIN 1
+#include "kselftest_harness.h"
+#include "ccutils.h"
+#include "mock.h"
+#include "utils.h"
+
+extern struct homa *homa;
+
+FIXTURE(homa_metrics) {
+	struct homa homa;
+};
+FIXTURE_SETUP(homa_metrics)
+{
+	homa_init(&self->homa);
+	homa = &self->homa;
+}
+FIXTURE_TEARDOWN(homa_metrics)
+{
+	homa = NULL;
+	homa_destroy(&self->homa);
+	unit_teardown();
+}
+
+TEST_F(homa_metrics, homa_metric_append)
+{
+	self->homa.metrics_length = 0;
+	homa_metric_append(&self->homa, "x: %d, y: %d", 10, 20);
+	EXPECT_EQ(12, self->homa.metrics_length);
+	EXPECT_STREQ("x: 10, y: 20", self->homa.metrics);
+
+	homa_metric_append(&self->homa, ", z: %d", 12345);
+	EXPECT_EQ(22, self->homa.metrics_length);
+	EXPECT_STREQ("x: 10, y: 20, z: 12345", self->homa.metrics);
+	EXPECT_EQ(30, self->homa.metrics_capacity);
+
+	homa_metric_append(&self->homa, ", q: %050d", 88);
+	EXPECT_EQ(77, self->homa.metrics_length);
+	EXPECT_STREQ("x: 10, y: 20, z: 12345, "
+			"q: 00000000000000000000000000000000000000000000000088",
+			self->homa.metrics);
+	EXPECT_EQ(120, self->homa.metrics_capacity);
+}
+TEST_F(homa_metrics, homa_metrics_open)
+{
+	EXPECT_EQ(0, homa_metrics_open(NULL, NULL));
+	EXPECT_NE(NULL, self->homa.metrics);
+
+	strcpy(self->homa.metrics, "12345");
+	EXPECT_EQ(0, homa_metrics_open(NULL, NULL));
+	EXPECT_EQ(5, strlen(self->homa.metrics));
+	EXPECT_EQ(2, self->homa.metrics_active_opens);
+}
+TEST_F(homa_metrics, homa_metrics_read__basics)
+{
+	char buffer[1000];
+	loff_t offset = 10;
+	self->homa.metrics = kmalloc(100, GFP_KERNEL);
+	self->homa.metrics_capacity = 100;
+	strcpy(self->homa.metrics, "0123456789abcdefghijklmnop");
+	self->homa.metrics_length = 26;
+	EXPECT_EQ(5, homa_metrics_read(NULL, buffer, 5, &offset));
+	EXPECT_SUBSTR("_copy_to_user copied 5 bytes", unit_log_get());
+	EXPECT_EQ(15, offset);
+
+	unit_log_clear();
+	EXPECT_EQ(11, homa_metrics_read(NULL, buffer, 1000, &offset));
+	EXPECT_SUBSTR("_copy_to_user copied 11 bytes", unit_log_get());
+	EXPECT_EQ(26, offset);
+
+	unit_log_clear();
+	EXPECT_EQ(0, homa_metrics_read(NULL, buffer, 1000, &offset));
+	EXPECT_STREQ("", unit_log_get());
+	EXPECT_EQ(26, offset);
+}
+TEST_F(homa_metrics, homa_metrics_read__error_copying_to_user)
+{
+	char buffer[1000];
+	loff_t offset = 10;
+	self->homa.metrics = kmalloc(100, GFP_KERNEL);
+	self->homa.metrics_capacity = 100;
+	strcpy(self->homa.metrics, "0123456789abcdefghijklmnop");
+	self->homa.metrics_length = 26;
+	mock_copy_to_user_errors = 1;
+	EXPECT_EQ(EFAULT, -homa_metrics_read(NULL, buffer, 5, &offset));
+}
+
+TEST_F(homa_metrics, homa_metrics_release)
+{
+	self->homa.metrics_active_opens = 2;
+	EXPECT_EQ(0, homa_metrics_release(NULL, NULL));
+	EXPECT_EQ(1, self->homa.metrics_active_opens);
+
+	EXPECT_EQ(0, homa_metrics_release(NULL, NULL));
+	EXPECT_EQ(0, self->homa.metrics_active_opens);
+}
\ No newline at end of file
diff --git a/test/unit_homa_offload.c b/test/unit_homa_offload.c
index 59deb72..3b90788 100644
--- a/test/unit_homa_offload.c
+++ b/test/unit_homa_offload.c
@@ -267,7 +267,7 @@ TEST_F(homa_offload, homa_gro_receive__HOMA_GRO_SHORT_BYPASS)
 	skb = mock_skb_new(&self->ip, &h.common, 1400, 2000);
 	struct sk_buff *result = homa_gro_receive(&self->empty_list, skb);
 	EXPECT_EQ(0, -PTR_ERR(result));
-	EXPECT_EQ(0, core_metrics.gro_data_bypasses);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->gro_data_bypasses);
 
 	/* Second attempt: HOMA_GRO_SHORT_BYPASS enabled but message longer
 	 * than one packet.
@@ -277,7 +277,7 @@ TEST_F(homa_offload, homa_gro_receive__HOMA_GRO_SHORT_BYPASS)
 	skb2 = mock_skb_new(&self->ip, &h.common, 1400, 2000);
 	result = homa_gro_receive(&self->empty_list, skb2);
 	EXPECT_EQ(0, -PTR_ERR(result));
-	EXPECT_EQ(0, core_metrics.gro_data_bypasses);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->gro_data_bypasses);
 
 	/* Third attempt: bypass should happen. */
 	h.message_length = htonl(1400);
@@ -286,14 +286,14 @@ TEST_F(homa_offload, homa_gro_receive__HOMA_GRO_SHORT_BYPASS)
 	skb3 = mock_skb_new(&self->ip, &h.common, 1400, 4000);
 	result = homa_gro_receive(&self->empty_list, skb3);
 	EXPECT_EQ(EINPROGRESS, -PTR_ERR(result));
-	EXPECT_EQ(1, core_metrics.gro_data_bypasses);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->gro_data_bypasses);
 
 	/* Third attempt: no bypass because core busy. */
 	cur_core->last_gro = 600;
 	skb4 = mock_skb_new(&self->ip, &h.common, 1400, 4000);
 	result = homa_gro_receive(&self->empty_list, skb3);
 	EXPECT_EQ(0, -PTR_ERR(result));
-	EXPECT_EQ(1, core_metrics.gro_data_bypasses);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->gro_data_bypasses);
 
 	kfree_skb(skb);
 	kfree_skb(skb2);
@@ -326,7 +326,7 @@ TEST_F(homa_offload, homa_gro_receive__fast_grant_optimization)
 	struct sk_buff *skb = mock_skb_new(&client_ip, &h.common, 0, 0);
 	struct sk_buff *result = homa_gro_receive(&self->empty_list, skb);
 	EXPECT_EQ(0, -PTR_ERR(result));
-	EXPECT_EQ(0, core_metrics.gro_grant_bypasses);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->gro_grant_bypasses);
 	EXPECT_STREQ("", unit_log_get());
 
 	/* Second attempt: HOMA_FAST_GRANTS is enabled. */
@@ -335,7 +335,7 @@ TEST_F(homa_offload, homa_gro_receive__fast_grant_optimization)
 	struct sk_buff *skb2 = mock_skb_new(&client_ip, &h.common, 0, 0);
 	result = homa_gro_receive(&self->empty_list, skb2);
 	EXPECT_EQ(EINPROGRESS, -PTR_ERR(result));
-	EXPECT_EQ(1, core_metrics.gro_grant_bypasses);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->gro_grant_bypasses);
 	EXPECT_SUBSTR("xmit DATA 1400@10000", unit_log_get());
 
 	/* Third attempt: core is too busy for fast grants. */
@@ -343,7 +343,7 @@ TEST_F(homa_offload, homa_gro_receive__fast_grant_optimization)
 	struct sk_buff *skb3 = mock_skb_new(&client_ip, &h.common, 0, 0);
 	result = homa_gro_receive(&self->empty_list, skb3);
 	EXPECT_EQ(0, -PTR_ERR(result));
-	EXPECT_EQ(1, core_metrics.gro_grant_bypasses);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->gro_grant_bypasses);
 	kfree_skb(skb);
 	kfree_skb(skb3);
 }
diff --git a/test/unit_homa_outgoing.c b/test/unit_homa_outgoing.c
index 83002cb..8ff9143 100644
--- a/test/unit_homa_outgoing.c
+++ b/test/unit_homa_outgoing.c
@@ -559,7 +559,7 @@ TEST_F(homa_outgoing, __homa_xmit_control__ipv4_error)
 	mock_ip_queue_xmit_errors = 1;
 	EXPECT_EQ(ENETDOWN, -homa_xmit_control(GRANT, &h, sizeof(h), srpc));
 	EXPECT_STREQ("", unit_log_get());
-	EXPECT_EQ(1, core_metrics.control_xmit_errors);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->control_xmit_errors);
 }
 TEST_F(homa_outgoing, __homa_xmit_control__ipv6_error)
 {
@@ -583,7 +583,7 @@ TEST_F(homa_outgoing, __homa_xmit_control__ipv6_error)
 	mock_ip6_xmit_errors = 1;
 	EXPECT_EQ(ENETDOWN, -homa_xmit_control(GRANT, &h, sizeof(h), srpc));
 	EXPECT_STREQ("", unit_log_get());
-	EXPECT_EQ(1, core_metrics.control_xmit_errors);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->control_xmit_errors);
 }
 
 TEST_F(homa_outgoing, homa_xmit_unknown)
@@ -763,7 +763,7 @@ TEST_F(homa_outgoing, __homa_xmit_data__ipv4_transmit_error)
 	mock_ip_queue_xmit_errors = 1;
 	skb_get(crpc->msgout.packets);
 	__homa_xmit_data(crpc->msgout.packets, crpc, 5);
-	EXPECT_EQ(1, core_metrics.data_xmit_errors);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->data_xmit_errors);
 }
 TEST_F(homa_outgoing, __homa_xmit_data__ipv6_transmit_error)
 {
@@ -779,7 +779,7 @@ TEST_F(homa_outgoing, __homa_xmit_data__ipv6_transmit_error)
 	mock_ip6_xmit_errors = 1;
 	skb_get(crpc->msgout.packets);
 	__homa_xmit_data(crpc->msgout.packets, crpc, 5);
-	EXPECT_EQ(1, core_metrics.data_xmit_errors);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->data_xmit_errors);
 }
 
 TEST_F(homa_outgoing, homa_resend_data__basics)
@@ -984,8 +984,8 @@ TEST_F(homa_outgoing, homa_check_nic_queue__pacer_metrics)
 	EXPECT_EQ(1, homa_check_nic_queue(&self->homa, crpc->msgout.packets,
 			true));
 	EXPECT_EQ(10500, atomic64_read(&self->homa.link_idle_time));
-	EXPECT_EQ(500, core_metrics.pacer_bytes);
-	EXPECT_EQ(200, core_metrics.pacer_lost_cycles);
+	EXPECT_EQ(500, homa_metrics_per_cpu()->pacer_bytes);
+	EXPECT_EQ(200, homa_metrics_per_cpu()->pacer_lost_cycles);
 }
 TEST_F(homa_outgoing, homa_check_nic_queue__queue_empty)
 {
@@ -1141,7 +1141,7 @@ TEST_F(homa_outgoing, homa_pacer_xmit__rpc_locked)
 	mock_trylock_errors = ~1;
 	homa_pacer_xmit(&self->homa);
 	EXPECT_STREQ("", unit_log_get());
-	EXPECT_EQ(1, core_metrics.pacer_skipped_rpcs);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->pacer_skipped_rpcs);
 	unit_log_clear();
 	mock_trylock_errors = 0;
 	homa_pacer_xmit(&self->homa);
@@ -1234,16 +1234,16 @@ TEST_F(homa_outgoing, homa_add_to_throttled__inc_metrics)
 			self->server_port, self->client_id+4, 15000, 1000);
 
 	homa_add_to_throttled(crpc1);
-	EXPECT_EQ(1, core_metrics.throttle_list_adds);
-	EXPECT_EQ(0, core_metrics.throttle_list_checks);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->throttle_list_adds);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->throttle_list_checks);
 
 	homa_add_to_throttled(crpc2);
-	EXPECT_EQ(2, core_metrics.throttle_list_adds);
-	EXPECT_EQ(1, core_metrics.throttle_list_checks);
+	EXPECT_EQ(2, homa_metrics_per_cpu()->throttle_list_adds);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->throttle_list_checks);
 
 	homa_add_to_throttled(crpc3);
-	EXPECT_EQ(3, core_metrics.throttle_list_adds);
-	EXPECT_EQ(3, core_metrics.throttle_list_checks);
+	EXPECT_EQ(3, homa_metrics_per_cpu()->throttle_list_adds);
+	EXPECT_EQ(3, homa_metrics_per_cpu()->throttle_list_checks);
 }
 
 TEST_F(homa_outgoing, homa_remove_from_throttled)
diff --git a/test/unit_homa_peertab.c b/test/unit_homa_peertab.c
index d1645ca..92c352e 100644
--- a/test/unit_homa_peertab.c
+++ b/test/unit_homa_peertab.c
@@ -72,7 +72,7 @@ TEST_F(homa_peertab, homa_peer_find__basics)
 	peer2 = homa_peer_find(&self->peertab, ip2222, &self->hsk.inet);
 	EXPECT_NE(peer, peer2);
 
-	EXPECT_EQ(2, core_metrics.peer_new_entries);
+	EXPECT_EQ(2, homa_metrics_per_cpu()->peer_new_entries);
 }
 
 static struct _test_data_homa_peertab *test_data;
@@ -191,7 +191,7 @@ TEST_F(homa_peertab, homa_peer_find__kmalloc_error)
 	peer = homa_peer_find(&self->peertab, ip3333, &self->hsk.inet);
 	EXPECT_EQ(ENOMEM, -PTR_ERR(peer));
 
-	EXPECT_EQ(1, core_metrics.peer_kmalloc_errors);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->peer_kmalloc_errors);
 }
 TEST_F(homa_peertab, homa_peer_find__route_error)
 {
@@ -201,7 +201,7 @@ TEST_F(homa_peertab, homa_peer_find__route_error)
 	peer = homa_peer_find(&self->peertab, ip3333, &self->hsk.inet);
 	EXPECT_EQ(EHOSTUNREACH, -PTR_ERR(peer));
 
-	EXPECT_EQ(1, core_metrics.peer_route_errors);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->peer_route_errors);
 }
 
 TEST_F(homa_peertab, homa_dst_refresh__basics)
@@ -229,7 +229,7 @@ TEST_F(homa_peertab, homa_dst_refresh__routing_error)
 	mock_route_errors = 1;
 	homa_dst_refresh(&self->homa.peers, peer, &self->hsk);
 	EXPECT_EQ(old_dst, peer->dst);
-	EXPECT_EQ(1, core_metrics.peer_route_errors);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->peer_route_errors);
 	EXPECT_EQ(0, dead_count(&self->homa.peers));
 }
 TEST_F(homa_peertab, homa_dst_refresh__malloc_error)
@@ -324,15 +324,15 @@ TEST_F(homa_peertab, homa_peer_lock_slow)
 	ASSERT_NE(NULL, peer);
 
 	homa_peer_lock(peer);
-	EXPECT_EQ(0, core_metrics.peer_ack_lock_misses);
-	EXPECT_EQ(0, core_metrics.peer_ack_lock_miss_cycles);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->peer_ack_lock_misses);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->peer_ack_lock_miss_cycles);
 	homa_peer_unlock(peer);
 
 	mock_trylock_errors = 1;
 	unit_hook_register(peer_spinlock_hook);
 	homa_peer_lock(peer);
-	EXPECT_EQ(1, core_metrics.peer_ack_lock_misses);
-	EXPECT_EQ(1000, core_metrics.peer_ack_lock_miss_cycles);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->peer_ack_lock_misses);
+	EXPECT_EQ(1000, homa_metrics_per_cpu()->peer_ack_lock_miss_cycles);
 	homa_peer_unlock(peer);
 }
 
diff --git a/test/unit_homa_plumbing.c b/test/unit_homa_plumbing.c
index 6dbf40e..ad47ca1 100644
--- a/test/unit_homa_plumbing.c
+++ b/test/unit_homa_plumbing.c
@@ -268,7 +268,7 @@ TEST_F(homa_plumbing, homa_set_sock_opt__success)
 			sizeof(struct homa_set_buf_args)));
 	EXPECT_EQ(args.start, self->hsk.buffer_pool.region);
 	EXPECT_EQ(64, self->hsk.buffer_pool.num_bpages);
-	EXPECT_EQ(1, core_metrics.so_set_buf_calls);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->so_set_buf_calls);
 }
 
 TEST_F(homa_plumbing, homa_sendmsg__args_not_in_user_space)
@@ -686,7 +686,7 @@ TEST_F(homa_plumbing, homa_softirq__packet_too_short)
 	skb->len -= 1;
 	homa_softirq(skb);
 	EXPECT_EQ(0, unit_list_length(&self->hsk.active_rpcs));
-	EXPECT_EQ(1, core_metrics.short_packets);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->short_packets);
 }
 TEST_F(homa_plumbing, homa_softirq__bogus_packet_type)
 {
@@ -695,7 +695,7 @@ TEST_F(homa_plumbing, homa_softirq__bogus_packet_type)
 	skb = mock_skb_new(self->client_ip, &self->data.common, 1400, 1400);
 	homa_softirq(skb);
 	EXPECT_EQ(0, unit_list_length(&self->hsk.active_rpcs));
-	EXPECT_EQ(1, core_metrics.short_packets);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->short_packets);
 }
 TEST_F(homa_plumbing, homa_softirq__process_short_messages_first)
 {
@@ -824,57 +824,3 @@ TEST_F(homa_plumbing, homa_softirq__per_rpc_batching)
 			"sk->sk_data_ready invoked",
 			unit_log_get());
 }
-
-TEST_F(homa_plumbing, homa_metrics_open)
-{
-	EXPECT_EQ(0, homa_metrics_open(NULL, NULL));
-	EXPECT_NE(NULL, self->homa.metrics);
-
-	strcpy(self->homa.metrics, "12345");
-	EXPECT_EQ(0, homa_metrics_open(NULL, NULL));
-	EXPECT_EQ(5, strlen(self->homa.metrics));
-	EXPECT_EQ(2, self->homa.metrics_active_opens);
-}
-TEST_F(homa_plumbing, homa_metrics_read__basics)
-{
-	char buffer[1000];
-	loff_t offset = 10;
-	self->homa.metrics = kmalloc(100, GFP_KERNEL);
-	self->homa.metrics_capacity = 100;
-	strcpy(self->homa.metrics, "0123456789abcdefghijklmnop");
-	self->homa.metrics_length = 26;
-	EXPECT_EQ(5, homa_metrics_read(NULL, buffer, 5, &offset));
-	EXPECT_SUBSTR("_copy_to_user copied 5 bytes", unit_log_get());
-	EXPECT_EQ(15, offset);
-
-	unit_log_clear();
-	EXPECT_EQ(11, homa_metrics_read(NULL, buffer, 1000, &offset));
-	EXPECT_SUBSTR("_copy_to_user copied 11 bytes", unit_log_get());
-	EXPECT_EQ(26, offset);
-
-	unit_log_clear();
-	EXPECT_EQ(0, homa_metrics_read(NULL, buffer, 1000, &offset));
-	EXPECT_STREQ("", unit_log_get());
-	EXPECT_EQ(26, offset);
-}
-TEST_F(homa_plumbing, homa_metrics_read__error_copying_to_user)
-{
-	char buffer[1000];
-	loff_t offset = 10;
-	self->homa.metrics = kmalloc(100, GFP_KERNEL);
-	self->homa.metrics_capacity = 100;
-	strcpy(self->homa.metrics, "0123456789abcdefghijklmnop");
-	self->homa.metrics_length = 26;
-	mock_copy_to_user_errors = 1;
-	EXPECT_EQ(EFAULT, -homa_metrics_read(NULL, buffer, 5, &offset));
-}
-
-TEST_F(homa_plumbing, homa_metrics_release)
-{
-	self->homa.metrics_active_opens = 2;
-	EXPECT_EQ(0, homa_metrics_release(NULL, NULL));
-	EXPECT_EQ(1, self->homa.metrics_active_opens);
-
-	EXPECT_EQ(0, homa_metrics_release(NULL, NULL));
-	EXPECT_EQ(0, self->homa.metrics_active_opens);
-}
diff --git a/test/unit_homa_pool.c b/test/unit_homa_pool.c
index 43ea267..9649671 100644
--- a/test/unit_homa_pool.c
+++ b/test/unit_homa_pool.c
@@ -320,7 +320,7 @@ TEST_F(homa_pool, homa_pool_allocate__page_wrap_around)
 	EXPECT_EQ(2*HOMA_BPAGE_SIZE, crpc->msgin.bpage_offsets[0]);
 	EXPECT_EQ(2000, pool->cores[raw_smp_processor_id()].allocated);
 	EXPECT_EQ(raw_smp_processor_id(), pool->descriptors[2].owner);
-	EXPECT_EQ(1, core_metrics.bpage_reuses);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->bpage_reuses);
 }
 TEST_F(homa_pool, homa_pool_allocate__owned_page_overflow)
 {
@@ -405,7 +405,7 @@ TEST_F(homa_pool, homa_pool_allocate__out_of_space)
 	rpc = list_next_entry(rpc, buf_links);
 	EXPECT_EQ(100, rpc->id);
 	EXPECT_TRUE(list_is_last(&rpc->buf_links, &self->hsk.waiting_for_bufs));
-	EXPECT_EQ(3, core_metrics.buffer_alloc_failures);
+	EXPECT_EQ(3, homa_metrics_per_cpu()->buffer_alloc_failures);
 	EXPECT_EQ(1, pool->bpages_needed);
 }
 
diff --git a/test/unit_homa_skb.c b/test/unit_homa_skb.c
index 6de0ef1..1995913 100644
--- a/test/unit_homa_skb.c
+++ b/test/unit_homa_skb.c
@@ -304,8 +304,8 @@ TEST_F(homa_skb, homa_skb_page_alloc__new_large_page)
 	EXPECT_TRUE(homa_skb_page_alloc(&self->homa, core));
 	EXPECT_NE(NULL, core->skb_page);
 	EXPECT_EQ(HOMA_SKB_PAGE_SIZE, core->page_size);
-	EXPECT_EQ(1, core_metrics.skb_page_allocs);
-	EXPECT_NE(0, core_metrics.skb_page_alloc_cycles);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->skb_page_allocs);
+	EXPECT_NE(0, homa_metrics_per_cpu()->skb_page_alloc_cycles);
 }
 TEST_F(homa_skb, homa_skb_page_alloc__high_order_page_not_available)
 {
@@ -317,8 +317,8 @@ TEST_F(homa_skb, homa_skb_page_alloc__high_order_page_not_available)
 	EXPECT_NE(NULL, core->skb_page);
 	EXPECT_EQ(PAGE_SIZE, core->page_size);
 	EXPECT_EQ(0, core->page_inuse);
-	EXPECT_EQ(1, core_metrics.skb_page_allocs);
-	EXPECT_NE(0, core_metrics.skb_page_alloc_cycles);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->skb_page_allocs);
+	EXPECT_NE(0, homa_metrics_per_cpu()->skb_page_alloc_cycles);
 }
 TEST_F(homa_skb, homa_skb_page_alloc__no_pages_available)
 {
diff --git a/test/unit_homa_socktab.c b/test/unit_homa_socktab.c
index 646182a..ec55140 100644
--- a/test/unit_homa_socktab.c
+++ b/test/unit_homa_socktab.c
@@ -294,13 +294,13 @@ TEST_F(homa_socktab, homa_sock_lock_slow)
 	mock_cycles = ~0;
 
 	homa_sock_lock(&self->hsk, "unit test");
-	EXPECT_EQ(0, core_metrics.socket_lock_misses);
-	EXPECT_EQ(0, core_metrics.socket_lock_miss_cycles);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->socket_lock_misses);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->socket_lock_miss_cycles);
 	homa_sock_unlock(&self->hsk);
 
 	mock_trylock_errors = 1;
 	homa_sock_lock(&self->hsk, "unit test");
-	EXPECT_EQ(1, core_metrics.socket_lock_misses);
-	EXPECT_NE(0, core_metrics.socket_lock_miss_cycles);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->socket_lock_misses);
+	EXPECT_NE(0, homa_metrics_per_cpu()->socket_lock_miss_cycles);
 	homa_sock_unlock(&self->hsk);
 }
\ No newline at end of file
diff --git a/test/unit_homa_timer.c b/test/unit_homa_timer.c
index 33f00a8..c816798 100644
--- a/test/unit_homa_timer.c
+++ b/test/unit_homa_timer.c
@@ -136,11 +136,11 @@ TEST_F(homa_timer, homa_check_rpc__timeout)
 	unit_log_clear();
 	crpc->silent_ticks = self->homa.timeout_ticks-1;
 	homa_check_rpc(crpc);
-	EXPECT_EQ(0, core_metrics.rpc_timeouts);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->rpc_timeouts);
 	EXPECT_EQ(0, crpc->error);
 	crpc->silent_ticks = self->homa.timeout_ticks;
 	homa_check_rpc(crpc);
-	EXPECT_EQ(1, core_metrics.rpc_timeouts);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->rpc_timeouts);
 	EXPECT_EQ(ETIMEDOUT, -crpc->error);
 }
 TEST_F(homa_timer, homa_check_rpc__issue_resend)
@@ -250,7 +250,7 @@ TEST_F(homa_timer, homa_timer__basics)
 	unit_log_clear();
 	crpc->peer->outstanding_resends = self->homa.timeout_resends;
 	homa_timer(&self->homa);
-	EXPECT_EQ(1, core_metrics.rpc_timeouts);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->rpc_timeouts);
 	EXPECT_EQ(ETIMEDOUT, -crpc->error);
 }
 TEST_F(homa_timer, homa_timer__reap_dead_rpcs)
diff --git a/test/unit_homa_utils.c b/test/unit_homa_utils.c
index aa03f90..dfd9b9e 100644
--- a/test/unit_homa_utils.c
+++ b/test/unit_homa_utils.c
@@ -280,18 +280,18 @@ TEST_F(homa_utils, homa_bucket_lock_slow)
 	ASSERT_FALSE(IS_ERR(srpc));
 	homa_rpc_unlock(srpc);
 
-	EXPECT_EQ(0, core_metrics.client_lock_misses);
-	EXPECT_EQ(0, core_metrics.client_lock_miss_cycles);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->client_lock_misses);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->client_lock_miss_cycles);
 	homa_bucket_lock_slow(crpc->bucket, crpc->id);
 	homa_rpc_unlock(crpc);
-	EXPECT_EQ(1, core_metrics.client_lock_misses);
-	EXPECT_NE(0, core_metrics.client_lock_miss_cycles);
-	EXPECT_EQ(0, core_metrics.server_lock_misses);
-	EXPECT_EQ(0, core_metrics.server_lock_miss_cycles);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->client_lock_misses);
+	EXPECT_NE(0, homa_metrics_per_cpu()->client_lock_miss_cycles);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->server_lock_misses);
+	EXPECT_EQ(0, homa_metrics_per_cpu()->server_lock_miss_cycles);
 	homa_bucket_lock_slow(srpc->bucket, srpc->id);
 	homa_rpc_unlock(srpc);
-	EXPECT_EQ(1, core_metrics.server_lock_misses);
-	EXPECT_NE(0, core_metrics.server_lock_miss_cycles);
+	EXPECT_EQ(1, homa_metrics_per_cpu()->server_lock_misses);
+	EXPECT_NE(0, homa_metrics_per_cpu()->server_lock_miss_cycles);
 }
 
 TEST_F(homa_utils, homa_rpc_acked__basics)
@@ -741,26 +741,6 @@ TEST_F(homa_utils, homa_snprintf)
 			buffer);
 }
 
-TEST_F(homa_utils, homa_append_metric)
-{
-	self->homa.metrics_length = 0;
-	homa_append_metric(&self->homa,  "x: %d, y: %d", 10, 20);
-	EXPECT_EQ(12, self->homa.metrics_length);
-	EXPECT_STREQ("x: 10, y: 20", self->homa.metrics);
-
-	homa_append_metric(&self->homa, ", z: %d", 12345);
-	EXPECT_EQ(22, self->homa.metrics_length);
-	EXPECT_STREQ("x: 10, y: 20, z: 12345", self->homa.metrics);
-	EXPECT_EQ(30, self->homa.metrics_capacity);
-
-	homa_append_metric(&self->homa, ", q: %050d", 88);
-	EXPECT_EQ(77, self->homa.metrics_length);
-	EXPECT_STREQ("x: 10, y: 20, z: 12345, "
-			"q: 00000000000000000000000000000000000000000000000088",
-			self->homa.metrics);
-	EXPECT_EQ(120, self->homa.metrics_capacity);
-}
-
 TEST_F(homa_utils, homa_prios_changed__basics)
 {
 	set_cutoffs(&self->homa, 90, 80, HOMA_MAX_MESSAGE_LENGTH*2, 60, 50,
diff --git a/test/utils.h b/test/utils.h
index f782266..988c778 100644
--- a/test/utils.h
+++ b/test/utils.h
@@ -30,8 +30,6 @@ enum unit_rpc_state {
 	UNIT_IN_SERVICE     = 24,
 };
 
-#define core_metrics homa_cores[raw_smp_processor_id()]->metrics
-
 #define cur_core homa_cores[raw_smp_processor_id()]
 
 extern char         *unit_ack_string(struct homa_ack *ack);
diff --git a/timetrace.c b/timetrace.c
index 7c44cdd..fdac629 100644
--- a/timetrace.c
+++ b/timetrace.c
@@ -845,8 +845,7 @@ void tt_inc_metric(int metric, __u64 count)
 		offsetof(struct homa_metrics, linux_softirq_cycles),
 		offsetof(struct homa_metrics, linux_pkt_alloc_bytes),
 	};
-	__u64 *metric_addr = (__u64 *)(((char *)
-			&homa_cores[raw_smp_processor_id()]->metrics)
+	__u64 *metric_addr = (__u64 *)(((char *) homa_metrics_per_cpu())
 			+ offsets[metric]);
 	*metric_addr += count;
 }