Improved GRO_IDLE_NEW offload policy

(Keep an atomic cound of pending SoftIRQ batches for each core, rather than just a single bit. The bit got confused.)
PlatformLab · Jul 19, 2022 · 6f73438 · 6f73438
1 parent 79df0f4
commit 6f73438
Show file tree

Hide file tree

Showing 6 changed files with 53 additions and 47 deletions.
diff --git a/homa_impl.h b/homa_impl.h
@@ -2306,18 +2306,18 @@ struct homa_core {
 	__u64 last_gro;
 
 	/**
-	 * @softirq_busy: nonzero means that packets have been assigned
-	 * to this core for SoftIRQ processing, but the processing is not
-	 * yet complete.
+	 * @softirq_backlog: the number of batches of packets that have
+	 * been queued for SoftIRQ processing on this core but haven't
+	 * yet been processed.
 	 */
-	__s8 softirq_busy;
+	atomic_t softirq_backlog;
 
 	/**
 	 * @softirq_offset: used when rotating SoftIRQ assignment among
 	 * the next cores; contains an offset to add to the current core
 	 * to produce the core for SoftIRQ.
 	 */
-	__s8 softirq_offset;
+	int softirq_offset;
 
         /**
          * held_skb: last packet buffer known to be available for

diff --git a/homa_offload.c b/homa_offload.c
@@ -228,11 +228,11 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list,
  */
 int homa_gro_complete(struct sk_buff *skb, int hoffset)
 {	
-//	struct common_header *h = (struct common_header *)
-//			skb_transport_header(skb);
-//	struct data_header *d = (struct data_header *) h;
+	struct common_header *h = (struct common_header *)
+			skb_transport_header(skb);
+	struct data_header *d = (struct data_header *) h;
 //	tt_record4("homa_gro_complete type %d, id %d, offset %d, count %d",
-//			h->type, h->sender_id, ntohl(d->seg.offset),
+//			h->type, homa_local_id(h->sender_id), ntohl(d->seg.offset),
 //			NAPI_GRO_CB(skb)->count);
 
 #define CORES_TO_CHECK 4
@@ -245,42 +245,45 @@ int homa_gro_complete(struct sk_buff *skb, int hoffset)
 		 * is no such core, just rotate among the next cores.
 		 */
 		int i;
-		int candidate = raw_smp_processor_id();
-		int this_core = candidate;
+		int this_core = raw_smp_processor_id();
+		int candidate = this_core;
 		__u64 now = get_cycles();
 		struct homa_core *core;
 		for (i = CORES_TO_CHECK; i > 0; i--) {
 			candidate++;
 			if (unlikely(candidate >= nr_cpu_ids))
 				candidate = 0;
 			core = homa_cores[candidate];
-			if (!core->softirq_busy && ((core->last_gro
-					+ homa->gro_busy_cycles) < now)) {
-				tt_record1("homa_gro_complete chose core %d "
-						"with IDLE_NEW policy",
-						candidate);
-				break;
-			}
+			if (atomic_read(&core->softirq_backlog)  > 0)
+				continue;
+			if ((core->last_gro + homa->gro_busy_cycles) > now)
+				continue;
+			tt_record3("homa_gro_complete chose core %d for id %d "
+					"offset %d with IDLE_NEW policy",
+					candidate, homa_local_id(h->sender_id),
+					ntohl(d->seg.offset));
+			break;
 		}
 		if (i <= 0) {
 			/* All of the candidates appear to be busy; just
 			 * rotate among them.
 			 */
-			int offset = homa_cores[candidate]->softirq_offset;
+			int offset = homa_cores[this_core]->softirq_offset;
 			offset += 1;
 			if (offset > CORES_TO_CHECK)
 				offset = 1;
-			homa_cores[candidate]->softirq_offset = offset;
-			candidate = this_core
-					+ homa_cores[candidate]->softirq_offset;
+			homa_cores[this_core]->softirq_offset = offset;
+			candidate = this_core + offset;
 			while (candidate >= nr_cpu_ids) {
 				candidate -= nr_cpu_ids;
 			}
-			tt_record1("homa_gro_complete chose core %d with "
-					"IDLE_NEW policy (all cores busy)",
-					candidate);
+			tt_record3("homa_gro_complete chose core %d for id %d "
+					"offset %d with IDLE_NEW policy "
+					"(all cores busy)",
+					candidate, homa_local_id(h->sender_id),
+					ntohl(d->seg.offset));
 		}
-		homa_cores[candidate]->softirq_busy = 1;
+		atomic_inc(&homa_cores[candidate]->softirq_backlog);
 		homa_cores[this_core]->last_gro = now;
 		homa_set_softirq_cpu(skb, candidate);
 	} else if (homa->gro_policy & HOMA_GRO_IDLE) {
@@ -307,8 +310,10 @@ int homa_gro_complete(struct sk_buff *skb, int hoffset)
 			}
 		}
 		homa_set_softirq_cpu(skb, best);
-		tt_record1("homa_gro_complete chose core %d with IDLE policy",
-				best);
+		tt_record3("homa_gro_complete chose core %d for id %d "
+				"offset %d with IDLE policy",
+				best, homa_local_id(h->sender_id),
+				ntohl(d->seg.offset));
 	} else if (homa->gro_policy & HOMA_GRO_NEXT) {
 		/* Use the next core (in circular order) to handle the
 		 * SoftIRQ processing.
@@ -317,8 +322,10 @@ int homa_gro_complete(struct sk_buff *skb, int hoffset)
 		if (unlikely(target >= nr_cpu_ids))
 			target = 0;
 		homa_set_softirq_cpu(skb, target);
-		tt_record1("homa_gro_complete chose core %d with NEXT policy",
-				target);
+		tt_record3("homa_gro_complete chose core %d for id %d "
+				"offset %d with NEXT policy",
+				target, homa_local_id(h->sender_id),
+				ntohl(d->seg.offset));
 	}
 
 	return 0;

diff --git a/homa_plumbing.c b/homa_plumbing.c
@@ -18,6 +18,7 @@
  */
 
 #include "homa_impl.h"
+#include "homa_lcache.h"
 
 #ifndef __UNIT_TEST__
 MODULE_LICENSE("Dual MIT/GPL");
@@ -1119,10 +1120,12 @@ int homa_softirq(struct sk_buff *skb) {
 	struct homa_sock *hsk;
 	int num_packets = 0;
 	int pull_length;
+	struct homa_lcache lcache;
 
 	start = get_cycles();
 	INC_METRIC(softirq_calls, 1);
 	homa_cores[raw_smp_processor_id()]->last_active = start;
+	homa_lcache_init(&lcache);
 	if ((start - last) > 1000000) {
 		int scaled_ms = (int) (10*(start-last)/cpu_khz);
 		if ((scaled_ms >= 50) && (scaled_ms < 10000)) {
@@ -1163,13 +1166,6 @@ int homa_softirq(struct sk_buff *skb) {
 
 	for (skb = packets; skb != NULL; skb = next) {
 		next = skb->next;
-		if (next == NULL) {
-			/* Once we're down to a single packet to process,
-			 * it's OK for GRO to start assigning us more
-			 * work.
-			 */
-			homa_cores[raw_smp_processor_id()]->softirq_busy = 0;
-		}
 		saddr = ip_hdr(skb)->saddr;
 		num_packets++;
 
@@ -1246,14 +1242,16 @@ int homa_softirq(struct sk_buff *skb) {
 			goto discard;
 		}
 
-		homa_pkt_dispatch(skb, hsk);
+		homa_pkt_dispatch(skb, hsk, &lcache);
 		continue;
 
 discard:
 		kfree_skb(skb);
 	}
 
+	homa_lcache_release(&lcache);
 	homa_send_grants(homa);
+	atomic_dec(&homa_cores[raw_smp_processor_id()]->softirq_backlog);
 	INC_METRIC(softirq_cycles, get_cycles() - start);
 	return 0;
 }

diff --git a/homa_utils.c b/homa_utils.c
@@ -61,7 +61,7 @@ int homa_init(struct homa *homa)
 			homa_cores[i] = core;
 			core->last_active = 0;
 			core->last_gro = 0;
-			core->softirq_busy = 0;
+			atomic_set(&core->softirq_backlog, 0);
 			core->softirq_offset = 0;
 			core->held_skb = NULL;
 			core->held_bucket = 0;

diff --git a/test/unit_homa_offload.c b/test/unit_homa_offload.c
@@ -198,35 +198,35 @@ TEST_F(homa_offload, homa_gro_complete__GRO_IDLE_NEW)
 	mock_cycles = 1000;
 	homa->gro_busy_cycles = 100;
 	cpu_number = 5;
-	homa_cores[6]->softirq_busy = 1;
+	atomic_set(&homa_cores[6]->softirq_backlog, 1);
 	homa_cores[6]->last_gro = 0;
-	homa_cores[7]->softirq_busy = 0;
+	atomic_set(&homa_cores[7]->softirq_backlog, 0);
 	homa_cores[7]->last_gro = 901;
-	homa_cores[0]->softirq_busy = 1;
+	atomic_set(&homa_cores[0]->softirq_backlog, 2);
 	homa_cores[0]->last_gro = 0;
-	homa_cores[1]->softirq_busy = 0;
+	atomic_set(&homa_cores[1]->softirq_backlog, 0);
 	homa_cores[1]->last_gro = 899;
-	homa_cores[2]->softirq_busy = 0;
+	atomic_set(&homa_cores[2]->softirq_backlog, 0);
 	homa_cores[2]->last_gro = 0;
 
 	// Avoid busy cores.
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(1, self->skb->hash - 32);
-	EXPECT_EQ(1, homa_cores[1]->softirq_busy);
+	EXPECT_EQ(1, atomic_read(&homa_cores[1]->softirq_backlog));
 
 	// All cores busy; must rotate.
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(6, self->skb->hash - 32);
-	EXPECT_EQ(1, homa_cores[1]->softirq_offset);
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(7, self->skb->hash - 32);
+	EXPECT_EQ(2, homa_cores[5]->softirq_offset);
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(0, self->skb->hash - 32);
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(1, self->skb->hash - 32);
 	homa_gro_complete(self->skb, 0);
 	EXPECT_EQ(6, self->skb->hash - 32);
-	EXPECT_EQ(1, homa_cores[1]->softirq_offset);
+	EXPECT_EQ(1, homa_cores[5]->softirq_offset);
 }
 
 TEST_F(homa_offload, homa_gro_complete__GRO_IDLE)

diff --git a/test/unit_homa_plumbing.c b/test/unit_homa_plumbing.c
@@ -521,6 +521,7 @@ TEST_F(homa_plumbing, homa_softirq__multiple_packets_different_sockets)
 	homa_sock_bind(&self->homa.port_map, &sock2, self->server_port+1);
 
 	skb = mock_skb_new(self->client_ip, &self->data.common, 1400, 1400);
+	self->data.common.sender_id += 2;
 	self->data.common.dport = htons(self->server_port+1);
 	skb2 = mock_skb_new(self->client_ip, &self->data.common, 1400, 1400);
 	skb_shinfo(skb)->frag_list = skb2;