Skip to content

Commit

Permalink
lib: clean up nexthop hashing mess
Browse files Browse the repository at this point in the history
We were hashing 4 bytes of the address.  Even for IPv6 addresses.

Oops.

The reason this was done was to try to make it faster, but made a
complex maze out of everything.  Time for a refactor.

Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
  • Loading branch information
eqvinox committed Jan 29, 2025
1 parent 4a0e141 commit 001fcfa
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 111 deletions.
87 changes: 12 additions & 75 deletions lib/nexthop.c
Original file line number Diff line number Diff line change
Expand Up @@ -772,68 +772,30 @@ unsigned int nexthop_level(const struct nexthop *nexthop)
return rv;
}

/* Only hash word-sized things, let cmp do the rest. */
uint32_t nexthop_hash_quick(const struct nexthop *nexthop)
uint32_t nexthop_hash(const struct nexthop *nexthop)
{
uint32_t key = 0x45afe398;
int i;

key = jhash_3words(nexthop->type, nexthop->vrf_id,
nexthop->nh_label_type, key);

if (nexthop->nh_label) {
int labels = nexthop->nh_label->num_labels;
/* type, vrf, ifindex, ip addresses - see nexthop.h */
key = _nexthop_hash_bytes(nexthop, key);

i = 0;
key = jhash_1word(nexthop->flags & NEXTHOP_FLAGS_HASHED, key);

while (labels >= 3) {
key = jhash_3words(nexthop->nh_label->label[i],
nexthop->nh_label->label[i + 1],
nexthop->nh_label->label[i + 2],
key);
labels -= 3;
i += 3;
}

if (labels >= 2) {
key = jhash_2words(nexthop->nh_label->label[i],
nexthop->nh_label->label[i + 1],
key);
labels -= 2;
i += 2;
}
if (nexthop->nh_label) {
const struct mpls_label_stack *ls = nexthop->nh_label;

if (labels >= 1)
key = jhash_1word(nexthop->nh_label->label[i], key);
/* num_labels itself isn't useful to hash, if the number of
* labels is different, the hash value will change just due to
* that already.
*/
key = jhash(ls->label, sizeof(ls->label[0]) * ls->num_labels, key);
}

key = jhash_2words(nexthop->ifindex,
CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK),
key);

/* Include backup nexthops, if present */
if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) {
int backups = nexthop->backup_num;

i = 0;

while (backups >= 3) {
key = jhash_3words(nexthop->backup_idx[i],
nexthop->backup_idx[i + 1],
nexthop->backup_idx[i + 2], key);
backups -= 3;
i += 3;
}

while (backups >= 2) {
key = jhash_2words(nexthop->backup_idx[i],
nexthop->backup_idx[i + 1], key);
backups -= 2;
i += 2;
}

if (backups >= 1)
key = jhash_1word(nexthop->backup_idx[i], key);
key = jhash(nexthop->backup_idx, sizeof(nexthop->backup_idx[0]) * backups, key);
}

if (nexthop->nh_srv6) {
Expand Down Expand Up @@ -868,31 +830,6 @@ uint32_t nexthop_hash_quick(const struct nexthop *nexthop)
return key;
}


#define GATE_SIZE 4 /* Number of uint32_t words in struct g_addr */

/* For a more granular hash */
uint32_t nexthop_hash(const struct nexthop *nexthop)
{
uint32_t gate_src_rmap_raw[GATE_SIZE * 3] = {};
/* Get all the quick stuff */
uint32_t key = nexthop_hash_quick(nexthop);

assert(((sizeof(nexthop->gate) + sizeof(nexthop->src)
+ sizeof(nexthop->rmap_src))
/ 3)
== (GATE_SIZE * sizeof(uint32_t)));

memcpy(gate_src_rmap_raw, &nexthop->gate, GATE_SIZE);
memcpy(gate_src_rmap_raw + GATE_SIZE, &nexthop->src, GATE_SIZE);
memcpy(gate_src_rmap_raw + (2 * GATE_SIZE), &nexthop->rmap_src,
GATE_SIZE);

key = jhash2(gate_src_rmap_raw, (GATE_SIZE * 3), key);

return key;
}

void nexthop_copy_no_recurse(struct nexthop *copy,
const struct nexthop *nexthop,
struct nexthop *rparent)
Expand Down
104 changes: 68 additions & 36 deletions lib/nexthop.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#ifndef _LIB_NEXTHOP_H
#define _LIB_NEXTHOP_H

#include "jhash.h"
#include "prefix.h"
#include "mpls.h"
#include "vxlan.h"
Expand Down Expand Up @@ -56,15 +57,48 @@ struct nexthop {
struct nexthop *next;
struct nexthop *prev;

/*
* What vrf is this nexthop associated with?

/* begin of hashed data - all fields from here onwards are given to
* jhash() as one consecutive chunk. DO NOT create "padding holes".
* DO NOT insert pointers that need to be deep-hashed.
*
* static_assert() below needs to be updated when fields are added
*/
char _hash_begin[0];

/* see above */
enum nexthop_types_t type;

/* What vrf is this nexthop associated with? */
vrf_id_t vrf_id;

/* Interface index. */
ifindex_t ifindex;

enum nexthop_types_t type;
/* Type of label(s), if any */
enum lsp_types_t nh_label_type;

/* padding: keep 16 byte alignment here */

/* Nexthop address
* make sure all 16 bytes for IPv6 are zeroed when putting in an IPv4
* address since the entire thing is hashed as-is
*/
union {
union g_addr gate;
enum blackhole_type bh_type;
};
union g_addr src;
union g_addr rmap_src; /* Src is set via routemap */

/* end of hashed data - remaining fields in this struct are not
* directly fed into jhash(). Most of them are actually part of the
* hash but have special rules or handling attached.
*/
char _hash_end[0];

/* Weight of the nexthop ( for unequal cost ECMP ) */
uint8_t weight;

uint16_t flags;
#define NEXTHOP_FLAG_ACTIVE (1 << 0) /* This nexthop is alive. */
Expand All @@ -82,18 +116,15 @@ struct nexthop {
#define NEXTHOP_FLAG_EVPN (1 << 8) /* nexthop is EVPN */
#define NEXTHOP_FLAG_LINKDOWN (1 << 9) /* is not removed on link down */

/* which flags are part of nexthop_hash(). Should probably be split
* off into a separate field...
*/
#define NEXTHOP_FLAGS_HASHED NEXTHOP_FLAG_ONLINK

#define NEXTHOP_IS_ACTIVE(flags) \
(CHECK_FLAG(flags, NEXTHOP_FLAG_ACTIVE) \
&& !CHECK_FLAG(flags, NEXTHOP_FLAG_DUPLICATE))

/* Nexthop address */
union {
union g_addr gate;
enum blackhole_type bh_type;
};
union g_addr src;
union g_addr rmap_src; /* Src is set via routemap */

/* Nexthops obtained by recursive resolution.
*
* If the nexthop struct needs to be resolved recursively,
Expand All @@ -104,15 +135,9 @@ struct nexthop {
/* Recursive parent */
struct nexthop *rparent;

/* Type of label(s), if any */
enum lsp_types_t nh_label_type;

/* Label(s) associated with this nexthop. */
struct mpls_label_stack *nh_label;

/* Weight of the nexthop ( for unequal cost ECMP ) */
uint8_t weight;

/* Count and index of corresponding backup nexthop(s) in a backup list;
* only meaningful if the HAS_BACKUP flag is set.
*/
Expand All @@ -138,6 +163,29 @@ struct nexthop {
struct nexthop_srv6 *nh_srv6;
};

/* all hashed fields (including padding, if it is necessary to add) need to
* be listed in the static_assert below
*/

#define S(field) sizeof(((struct nexthop *)NULL)->field)

static_assert(
offsetof(struct nexthop, _hash_end) - offsetof(struct nexthop, _hash_begin) ==
S(type) + S(vrf_id) + S(ifindex) + S(nh_label_type) + S(gate) + S(src) + S(rmap_src),
"struct nexthop contains padding, this can break things. insert _pad fields at appropriate places");

#undef S

/* this is here to show exactly what is meant by the comments above about
* the hashing
*/
static inline uint32_t _nexthop_hash_bytes(const struct nexthop *nh, uint32_t seed)
{
return jhash(&nh->_hash_begin,
offsetof(struct nexthop, _hash_end) - offsetof(struct nexthop, _hash_begin),
seed);
}

/* Utility to append one nexthop to another. */
#define NEXTHOP_APPEND(to, new) \
do { \
Expand Down Expand Up @@ -183,27 +231,11 @@ struct nexthop *nexthop_from_blackhole(enum blackhole_type bh_type,
/*
* Hash a nexthop. Suitable for use with hash tables.
*
* This function uses the following values when computing the hash:
* - vrf_id
* - ifindex
* - type
* - gate
*
* nexthop
* The nexthop to hash
*
* Returns:
* 32-bit hash of nexthop
* Please double check the code on what is included in the hash, there was
* documentation here but it got outdated and the only thing worse than no
* doc is incorrect doc.
*/
uint32_t nexthop_hash(const struct nexthop *nexthop);
/*
* Hash a nexthop only on word-sized attributes:
* - vrf_id
* - ifindex
* - type
* - (some) flags
*/
uint32_t nexthop_hash_quick(const struct nexthop *nexthop);

extern bool nexthop_same(const struct nexthop *nh1, const struct nexthop *nh2);
extern bool nexthop_same_no_labels(const struct nexthop *nh1,
Expand Down

0 comments on commit 001fcfa

Please sign in to comment.