diff --git a/homa_impl.h b/homa_impl.h index b0d9c4d..2c517af 100644 --- a/homa_impl.h +++ b/homa_impl.h @@ -3456,6 +3456,7 @@ extern struct homa_rpc const struct in6_addr *source, struct data_header *h, int *created); extern int homa_rpc_reap(struct homa_sock *hsk, int count); +extern void homa_send_ipis(void); extern int homa_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); extern int homa_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); diff --git a/homa_offload.c b/homa_offload.c index 18307f8..9d5904a 100644 --- a/homa_offload.c +++ b/homa_offload.c @@ -71,6 +71,38 @@ static inline void homa_set_softirq_cpu(struct sk_buff *skb, int cpu) __skb_set_sw_hash(skb, hash, false); } +/** + * homa_send_ipis() - If there are any interprocessor interrupts pending + * from this core to others (for packets queued for SoftIRQ processing) + * issue those interrupts now. This function is needed because calling + * netif_receive_skb doesn't actually issue IPIs; it queues them until + * all NAPI processing is finished, and this could be a long time if a + * lot more packets are available for processing. + */ +void homa_send_ipis(void) +{ +#if defined(CONFIG_RPS) && !defined(__UNIT_TEST__) + /* This function duplicates the code from net_rps_send_ipi because + * we can't call that function from here. + */ + struct softnet_data *sd = this_cpu_ptr(&softnet_data); + struct softnet_data *remsd; + + local_irq_disable(); + remsd = sd->rps_ipi_list; + sd->rps_ipi_list = NULL; + local_irq_enable(); + + while (remsd) { + struct softnet_data *next = remsd->rps_ipi_next; + + if (cpu_online(remsd->cpu)) + smp_call_function_single_async(remsd->cpu, &remsd->csd); + remsd = next; + } +#endif +} + /** * homa_gso_segment() - Split up a large outgoing Homa packet (larger than MTU) * into multiple smaller packets. @@ -242,6 +274,7 @@ struct sk_buff *homa_gro_receive(struct list_head *held_list, skb_list_del_init(held_skb); homa_gro_complete(held_skb, 0); netif_receive_skb(held_skb); + homa_send_ipis(); napi->gro_hash[core->held_bucket].count--; if (napi->gro_hash[core->held_bucket].count == 0) __clear_bit(core->held_bucket,