diff --git a/README.md b/README.md index 4e835e5..71fce62 100644 --- a/README.md +++ b/README.md @@ -48,18 +48,16 @@ the backtracking algorithm is as fast as possible. func (t *Table[V]) Lookup(ip netip.Addr) (lpm netip.Prefix, val V, ok bool) func (t *Table[V]) LookupShortest(ip netip.Addr) (spm netip.Prefix, val V, ok bool) + func (t *Table[V]) Overlaps(o *Table[V]) bool func (t *Table[V]) OverlapsPrefix(pfx netip.Prefix) bool func (t *Table[V]) String() string func (t *Table[V]) Fprint(w io.Writer) error - func (t *Table[V]) DumpList(is4 bool) []DumpListNode[V] - func (t *Table[V]) MarshalJSON() ([]byte, error) func (t *Table[V]) MarshalText() ([]byte, error) -``` - -# TODO + func (t *Table[V]) MarshalJSON() ([]byte, error) -- [ ] implement Overlaps ... + func (t *Table[V]) DumpList(is4 bool) []DumpListNode[V] +``` # CONTRIBUTION diff --git a/fulltable_test.go b/fulltable_test.go index f9f452a..8df0705 100644 --- a/fulltable_test.go +++ b/fulltable_test.go @@ -41,10 +41,10 @@ func TestFullNew(t *testing.T) { runtime.ReadMemStats(&endMem) rawBytes := endMem.TotalAlloc - startMem.TotalAlloc - rtBart := bart.Table[any]{} + rt := bart.Table[any]{} runtime.ReadMemStats(&startMem) for _, route := range nRoutes { - rtBart.Insert(route.CIDR, nil) + rt.Insert(route.CIDR, nil) } runtime.ReadMemStats(&endMem) bartBytes := endMem.TotalAlloc - startMem.TotalAlloc @@ -64,10 +64,10 @@ func TestFullNewV4(t *testing.T) { runtime.ReadMemStats(&endMem) rawBytes := endMem.TotalAlloc - startMem.TotalAlloc - rtBart := bart.Table[any]{} + rt := bart.Table[any]{} runtime.ReadMemStats(&startMem) for _, route := range nRoutes { - rtBart.Insert(route.CIDR, nil) + rt.Insert(route.CIDR, nil) } runtime.ReadMemStats(&endMem) bartBytes := endMem.TotalAlloc - startMem.TotalAlloc @@ -87,10 +87,10 @@ func TestFullNewV6(t *testing.T) { runtime.ReadMemStats(&endMem) rawBytes := endMem.TotalAlloc - startMem.TotalAlloc - rtBart := bart.Table[any]{} + rt := bart.Table[any]{} runtime.ReadMemStats(&startMem) for _, route := range nRoutes { - rtBart.Insert(route.CIDR, nil) + rt.Insert(route.CIDR, nil) } runtime.ReadMemStats(&endMem) bartBytes := endMem.TotalAlloc - startMem.TotalAlloc @@ -108,10 +108,10 @@ var ( ) func BenchmarkFullMatchV4(b *testing.B) { - var rtBart bart.Table[int] + var rt bart.Table[int] for i, route := range routes { - rtBart.Insert(route.CIDR, i) + rt.Insert(route.CIDR, i) } var ip netip.Addr @@ -119,7 +119,7 @@ func BenchmarkFullMatchV4(b *testing.B) { // find a random match for { ip = randomIP4() - _, ok := rtBart.Get(ip) + _, ok := rt.Get(ip) if ok { break } @@ -128,38 +128,38 @@ func BenchmarkFullMatchV4(b *testing.B) { b.Run("Get", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - intSink, okSink = rtBart.Get(ip) + intSink, okSink = rt.Get(ip) } }) b.Run("Lookup", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - _, intSink, okSink = rtBart.Lookup(ip) + _, intSink, okSink = rt.Lookup(ip) } }) b.Run("LookupSCP", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - _, _, okSink = rtBart.LookupShortest(ip) + _, _, okSink = rt.LookupShortest(ip) } }) pfx := randomPrefix4() - b.Run("OverlapsBart", func(b *testing.B) { + b.Run("Overlaps", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - okSink = rtBart.OverlapsPrefix(pfx) + okSink = rt.OverlapsPrefix(pfx) } }) } func BenchmarkFullMatchV6(b *testing.B) { - var rtBart bart.Table[int] + var rt bart.Table[int] for i, route := range routes { - rtBart.Insert(route.CIDR, i) + rt.Insert(route.CIDR, i) } var ip netip.Addr @@ -167,7 +167,7 @@ func BenchmarkFullMatchV6(b *testing.B) { // find a random match for { ip = randomIP6() - _, ok := rtBart.Get(ip) + _, ok := rt.Get(ip) if ok { break } @@ -176,45 +176,45 @@ func BenchmarkFullMatchV6(b *testing.B) { b.Run("Get", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - intSink, okSink = rtBart.Get(ip) + intSink, okSink = rt.Get(ip) } }) b.Run("Lookup", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - _, intSink, okSink = rtBart.Lookup(ip) + _, intSink, okSink = rt.Lookup(ip) } }) b.Run("LookupSCP", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - _, _, okSink = rtBart.LookupShortest(ip) + _, _, okSink = rt.LookupShortest(ip) } }) pfx := randomPrefix6() - b.Run("OverlapsBart", func(b *testing.B) { + b.Run("Overlaps", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - okSink = rtBart.OverlapsPrefix(pfx) + okSink = rt.OverlapsPrefix(pfx) } }) } func BenchmarkFullMissV4(b *testing.B) { - var rtBart bart.Table[int] + var rt bart.Table[int] for i, route := range routes { - rtBart.Insert(route.CIDR, i) + rt.Insert(route.CIDR, i) } var ip netip.Addr for { ip = randomIP4() - _, ok := rtBart.Get(ip) + _, ok := rt.Get(ip) if !ok { break } @@ -223,37 +223,37 @@ func BenchmarkFullMissV4(b *testing.B) { b.Run("Get", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - intSink, okSink = rtBart.Get(ip) + intSink, okSink = rt.Get(ip) } }) b.Run("Lookup", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - _, intSink, okSink = rtBart.Lookup(ip) + _, intSink, okSink = rt.Lookup(ip) } }) b.Run("LookupSCP", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - _, _, okSink = rtBart.LookupShortest(ip) + _, _, okSink = rt.LookupShortest(ip) } }) } func BenchmarkFullMissV6(b *testing.B) { - var rtBart bart.Table[int] + var rt bart.Table[int] for i, route := range routes { - rtBart.Insert(route.CIDR, i) + rt.Insert(route.CIDR, i) } var ip netip.Addr for { ip = randomIP6() - _, ok := rtBart.Get(ip) + _, ok := rt.Get(ip) if !ok { break } @@ -262,21 +262,21 @@ func BenchmarkFullMissV6(b *testing.B) { b.Run("Get", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - intSink, okSink = rtBart.Get(ip) + intSink, okSink = rt.Get(ip) } }) b.Run("Lookup", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - _, intSink, okSink = rtBart.Lookup(ip) + _, intSink, okSink = rt.Lookup(ip) } }) b.Run("LookupSCP", func(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - _, _, okSink = rtBart.LookupShortest(ip) + _, _, okSink = rt.LookupShortest(ip) } }) } diff --git a/index_funcs.go b/index_funcs.go new file mode 100644 index 0000000..565fa7e --- /dev/null +++ b/index_funcs.go @@ -0,0 +1,612 @@ +package bart + +import ( + "math/bits" + "strconv" +) + +// Please read the ART paper to understand the algorithm. +// https://cseweb.ucsd.edu//~varghese/TEACH/cs228/artlookup.pdf + +const ( + // firstHostIndex is the baseIndex of the first host route. This is hostIndex(0/8). + firstHostIndex = 0b1_0000_0000 // 256 + + // lastHostIndex is the baseIndex of the last host route. This is hostIndex(0xFF/8). + lastHostIndex = 0b1_1111_1111 // 511 +) + +var hostMasks = []uint8{ + /* bits = 0 */ 0b1111_1111, + /* bits = 1 */ 0b0111_1111, + /* bits = 2 */ 0b0011_1111, + /* bits = 3 */ 0b0001_1111, + /* bits = 4 */ 0b0000_1111, + /* bits = 5 */ 0b0000_0111, + /* bits = 6 */ 0b0000_0011, + /* bits = 7 */ 0b0000_0001, + /* bits = 8 */ 0b0000_0000, +} + +// prefixToBaseIndex, maps a prefix table as a 'complete binary tree'. +// This is the so-called baseIndex a.k.a heapFunc: +func prefixToBaseIndex(addr uint, prefixLen int) uint { + return (addr >> (stride - prefixLen)) + (1 << prefixLen) +} + +// addrToBaseIndex, just prefixToBaseIndex(addr, 8), a.k.a host routes +// but faster, use it for host routes in Get and Lookup. +func addrToBaseIndex(addr uint) uint { + return addr + firstHostIndex +} + +// parentIndex returns the index of idx's parent prefix, or 0 if idx +// is the index of 0/0. +func parentIndex(idx uint) uint { + return idx >> 1 +} + +// baseIndexToPrefixLen, just an adapter +func baseIndexToPrefixLen(baseIdx uint) int { + _, bits := baseIndexToPrefix(baseIdx) + return bits +} + +// lastHostIndexOfPrefix returns the bitset index of the last address in addr/len. +func lastHostIndexOfPrefix(addr uint, bits int) uint { + return addrToBaseIndex(addr | uint(hostMasks[bits])) +} + +func lowerUpperBound(idx uint) (uint, uint) { + addr, bits := baseIndexToPrefix(idx) + return addrToBaseIndex(addr), lastHostIndexOfPrefix(addr, bits) +} + +// baseIndexToPrefix returns the address and prefix len of baseIdx. +// It's the inverse to prefixToBaseIndex. +// +// Use the precalc lookup table for speed. +func baseIndexToPrefix(baseIdx uint) (addr uint, pfxLen int) { + pfx := baseIdx2Pfx[baseIdx] + return pfx.addr, pfx.bits +} + +// baseIdx2Pfx, address and CIDR bits of baseIdx. +// It's the inverse to prefixToBaseIndex. +var baseIdx2Pfx [512]struct { + addr uint + bits int +} + +// baseIndexToPrefixPrecalc, inverse baseIndex mapping, +// precalc the lookup table since bits.LeadingZeros is slow. +func baseIndexToPrefixPrecalc() { + var idx uint + + // idx == 0 is invalid for baseIndex, start with 1 + for idx = 1; idx < maxNodePrefixes; idx++ { + lz := bits.LeadingZeros(idx) + bits := strconv.IntSize - lz - 1 + addr := idx & (0xFF >> (stride - bits)) << (stride - bits) + + baseIdx2Pfx[idx].addr = addr + baseIdx2Pfx[idx].bits = bits + } +} + +/* + { + {-, -}, // idx == 0 invalid! + {0, 0}, // idx == 1 + {0, 1}, // idx == 2 + {128, 1}, // idx == 3 + {0, 2}, // idx == 4 + {64, 2}, // idx == 5 + {128, 2}, // idx == 6 + {192, 2}, // idx == 7 + {0, 3}, // idx == 8 + {32, 3}, // idx == 9 + {64, 3}, // idx == 10 + {96, 3}, // idx == 11 + {128, 3}, // idx == 12 + {160, 3}, // idx == 13 + {192, 3}, // idx == 14 + {224, 3}, // idx == 15 + {0, 4}, // idx == 16 + {16, 4}, // idx == 17 + {32, 4}, // idx == 18 + {48, 4}, // idx == 19 + {64, 4}, // idx == 20 + {80, 4}, // idx == 21 + {96, 4}, // idx == 22 + {112, 4}, // idx == 23 + {128, 4}, // idx == 24 + {144, 4}, // idx == 25 + {160, 4}, // idx == 26 + {176, 4}, // idx == 27 + {192, 4}, // idx == 28 + {208, 4}, // idx == 29 + {224, 4}, // idx == 30 + {240, 4}, // idx == 31 + {0, 5}, // idx == 32 + {8, 5}, // idx == 33 + {16, 5}, // idx == 34 + {24, 5}, // idx == 35 + {32, 5}, // idx == 36 + {40, 5}, // idx == 37 + {48, 5}, // idx == 38 + {56, 5}, // idx == 39 + {64, 5}, // idx == 40 + {72, 5}, // idx == 41 + {80, 5}, // idx == 42 + {88, 5}, // idx == 43 + {96, 5}, // idx == 44 + {104, 5}, // idx == 45 + {112, 5}, // idx == 46 + {120, 5}, // idx == 47 + {128, 5}, // idx == 48 + {136, 5}, // idx == 49 + {144, 5}, // idx == 50 + {152, 5}, // idx == 51 + {160, 5}, // idx == 52 + {168, 5}, // idx == 53 + {176, 5}, // idx == 54 + {184, 5}, // idx == 55 + {192, 5}, // idx == 56 + {200, 5}, // idx == 57 + {208, 5}, // idx == 58 + {216, 5}, // idx == 59 + {224, 5}, // idx == 60 + {232, 5}, // idx == 61 + {240, 5}, // idx == 62 + {248, 5}, // idx == 63 + {0, 6}, // idx == 64 + {4, 6}, // idx == 65 + {8, 6}, // idx == 66 + {12, 6}, // idx == 67 + {16, 6}, // idx == 68 + {20, 6}, // idx == 69 + {24, 6}, // idx == 70 + {28, 6}, // idx == 71 + {32, 6}, // idx == 72 + {36, 6}, // idx == 73 + {40, 6}, // idx == 74 + {44, 6}, // idx == 75 + {48, 6}, // idx == 76 + {52, 6}, // idx == 77 + {56, 6}, // idx == 78 + {60, 6}, // idx == 79 + {64, 6}, // idx == 80 + {68, 6}, // idx == 81 + {72, 6}, // idx == 82 + {76, 6}, // idx == 83 + {80, 6}, // idx == 84 + {84, 6}, // idx == 85 + {88, 6}, // idx == 86 + {92, 6}, // idx == 87 + {96, 6}, // idx == 88 + {100, 6}, // idx == 89 + {104, 6}, // idx == 90 + {108, 6}, // idx == 91 + {112, 6}, // idx == 92 + {116, 6}, // idx == 93 + {120, 6}, // idx == 94 + {124, 6}, // idx == 95 + {128, 6}, // idx == 96 + {132, 6}, // idx == 97 + {136, 6}, // idx == 98 + {140, 6}, // idx == 99 + {144, 6}, // idx == 100 + {148, 6}, // idx == 101 + {152, 6}, // idx == 102 + {156, 6}, // idx == 103 + {160, 6}, // idx == 104 + {164, 6}, // idx == 105 + {168, 6}, // idx == 106 + {172, 6}, // idx == 107 + {176, 6}, // idx == 108 + {180, 6}, // idx == 109 + {184, 6}, // idx == 110 + {188, 6}, // idx == 111 + {192, 6}, // idx == 112 + {196, 6}, // idx == 113 + {200, 6}, // idx == 114 + {204, 6}, // idx == 115 + {208, 6}, // idx == 116 + {212, 6}, // idx == 117 + {216, 6}, // idx == 118 + {220, 6}, // idx == 119 + {224, 6}, // idx == 120 + {228, 6}, // idx == 121 + {232, 6}, // idx == 122 + {236, 6}, // idx == 123 + {240, 6}, // idx == 124 + {244, 6}, // idx == 125 + {248, 6}, // idx == 126 + {252, 6}, // idx == 127 + {0, 7}, // idx == 128 + {2, 7}, // idx == 129 + {4, 7}, // idx == 130 + {6, 7}, // idx == 131 + {8, 7}, // idx == 132 + {10, 7}, // idx == 133 + {12, 7}, // idx == 134 + {14, 7}, // idx == 135 + {16, 7}, // idx == 136 + {18, 7}, // idx == 137 + {20, 7}, // idx == 138 + {22, 7}, // idx == 139 + {24, 7}, // idx == 140 + {26, 7}, // idx == 141 + {28, 7}, // idx == 142 + {30, 7}, // idx == 143 + {32, 7}, // idx == 144 + {34, 7}, // idx == 145 + {36, 7}, // idx == 146 + {38, 7}, // idx == 147 + {40, 7}, // idx == 148 + {42, 7}, // idx == 149 + {44, 7}, // idx == 150 + {46, 7}, // idx == 151 + {48, 7}, // idx == 152 + {50, 7}, // idx == 153 + {52, 7}, // idx == 154 + {54, 7}, // idx == 155 + {56, 7}, // idx == 156 + {58, 7}, // idx == 157 + {60, 7}, // idx == 158 + {62, 7}, // idx == 159 + {64, 7}, // idx == 160 + {66, 7}, // idx == 161 + {68, 7}, // idx == 162 + {70, 7}, // idx == 163 + {72, 7}, // idx == 164 + {74, 7}, // idx == 165 + {76, 7}, // idx == 166 + {78, 7}, // idx == 167 + {80, 7}, // idx == 168 + {82, 7}, // idx == 169 + {84, 7}, // idx == 170 + {86, 7}, // idx == 171 + {88, 7}, // idx == 172 + {90, 7}, // idx == 173 + {92, 7}, // idx == 174 + {94, 7}, // idx == 175 + {96, 7}, // idx == 176 + {98, 7}, // idx == 177 + {100, 7}, // idx == 178 + {102, 7}, // idx == 179 + {104, 7}, // idx == 180 + {106, 7}, // idx == 181 + {108, 7}, // idx == 182 + {110, 7}, // idx == 183 + {112, 7}, // idx == 184 + {114, 7}, // idx == 185 + {116, 7}, // idx == 186 + {118, 7}, // idx == 187 + {120, 7}, // idx == 188 + {122, 7}, // idx == 189 + {124, 7}, // idx == 190 + {126, 7}, // idx == 191 + {128, 7}, // idx == 192 + {130, 7}, // idx == 193 + {132, 7}, // idx == 194 + {134, 7}, // idx == 195 + {136, 7}, // idx == 196 + {138, 7}, // idx == 197 + {140, 7}, // idx == 198 + {142, 7}, // idx == 199 + {144, 7}, // idx == 200 + {146, 7}, // idx == 201 + {148, 7}, // idx == 202 + {150, 7}, // idx == 203 + {152, 7}, // idx == 204 + {154, 7}, // idx == 205 + {156, 7}, // idx == 206 + {158, 7}, // idx == 207 + {160, 7}, // idx == 208 + {162, 7}, // idx == 209 + {164, 7}, // idx == 210 + {166, 7}, // idx == 211 + {168, 7}, // idx == 212 + {170, 7}, // idx == 213 + {172, 7}, // idx == 214 + {174, 7}, // idx == 215 + {176, 7}, // idx == 216 + {178, 7}, // idx == 217 + {180, 7}, // idx == 218 + {182, 7}, // idx == 219 + {184, 7}, // idx == 220 + {186, 7}, // idx == 221 + {188, 7}, // idx == 222 + {190, 7}, // idx == 223 + {192, 7}, // idx == 224 + {194, 7}, // idx == 225 + {196, 7}, // idx == 226 + {198, 7}, // idx == 227 + {200, 7}, // idx == 228 + {202, 7}, // idx == 229 + {204, 7}, // idx == 230 + {206, 7}, // idx == 231 + {208, 7}, // idx == 232 + {210, 7}, // idx == 233 + {212, 7}, // idx == 234 + {214, 7}, // idx == 235 + {216, 7}, // idx == 236 + {218, 7}, // idx == 237 + {220, 7}, // idx == 238 + {222, 7}, // idx == 239 + {224, 7}, // idx == 240 + {226, 7}, // idx == 241 + {228, 7}, // idx == 242 + {230, 7}, // idx == 243 + {232, 7}, // idx == 244 + {234, 7}, // idx == 245 + {236, 7}, // idx == 246 + {238, 7}, // idx == 247 + {240, 7}, // idx == 248 + {242, 7}, // idx == 249 + {244, 7}, // idx == 250 + {246, 7}, // idx == 251 + {248, 7}, // idx == 252 + {250, 7}, // idx == 253 + {252, 7}, // idx == 254 + {254, 7}, // idx == 255 + {0, 8}, // first// idx == 256 hostIndex + {1, 8}, // idx == 257 + {2, 8}, // idx == 258 + {3, 8}, // idx == 259 + {4, 8}, // idx == 260 + {5, 8}, // idx == 261 + {6, 8}, // idx == 262 + {7, 8}, // idx == 263 + {8, 8}, // idx == 264 + {9, 8}, // idx == 265 + {10, 8}, // idx == 266 + {11, 8}, // idx == 267 + {12, 8}, // idx == 268 + {13, 8}, // idx == 269 + {14, 8}, // idx == 270 + {15, 8}, // idx == 271 + {16, 8}, // idx == 272 + {17, 8}, // idx == 273 + {18, 8}, // idx == 274 + {19, 8}, // idx == 275 + {20, 8}, // idx == 276 + {21, 8}, // idx == 277 + {22, 8}, // idx == 278 + {23, 8}, // idx == 279 + {24, 8}, // idx == 280 + {25, 8}, // idx == 281 + {26, 8}, // idx == 282 + {27, 8}, // idx == 283 + {28, 8}, // idx == 284 + {29, 8}, // idx == 285 + {30, 8}, // idx == 286 + {31, 8}, // idx == 287 + {32, 8}, // idx == 288 + {33, 8}, // idx == 289 + {34, 8}, // idx == 290 + {35, 8}, // idx == 291 + {36, 8}, // idx == 292 + {37, 8}, // idx == 293 + {38, 8}, // idx == 294 + {39, 8}, // idx == 295 + {40, 8}, // idx == 296 + {41, 8}, // idx == 297 + {42, 8}, // idx == 298 + {43, 8}, // idx == 299 + {44, 8}, // idx == 300 + {45, 8}, // idx == 301 + {46, 8}, // idx == 302 + {47, 8}, // idx == 303 + {48, 8}, // idx == 304 + {49, 8}, // idx == 305 + {50, 8}, // idx == 306 + {51, 8}, // idx == 307 + {52, 8}, // idx == 308 + {53, 8}, // idx == 309 + {54, 8}, // idx == 310 + {55, 8}, // idx == 311 + {56, 8}, // idx == 312 + {57, 8}, // idx == 313 + {58, 8}, // idx == 314 + {59, 8}, // idx == 315 + {60, 8}, // idx == 316 + {61, 8}, // idx == 317 + {62, 8}, // idx == 318 + {63, 8}, // idx == 319 + {64, 8}, // idx == 320 + {65, 8}, // idx == 321 + {66, 8}, // idx == 322 + {67, 8}, // idx == 323 + {68, 8}, // idx == 324 + {69, 8}, // idx == 325 + {70, 8}, // idx == 326 + {71, 8}, // idx == 327 + {72, 8}, // idx == 328 + {73, 8}, // idx == 329 + {74, 8}, // idx == 330 + {75, 8}, // idx == 331 + {76, 8}, // idx == 332 + {77, 8}, // idx == 333 + {78, 8}, // idx == 334 + {79, 8}, // idx == 335 + {80, 8}, // idx == 336 + {81, 8}, // idx == 337 + {82, 8}, // idx == 338 + {83, 8}, // idx == 339 + {84, 8}, // idx == 340 + {85, 8}, // idx == 341 + {86, 8}, // idx == 342 + {87, 8}, // idx == 343 + {88, 8}, // idx == 344 + {89, 8}, // idx == 345 + {90, 8}, // idx == 346 + {91, 8}, // idx == 347 + {92, 8}, // idx == 348 + {93, 8}, // idx == 349 + {94, 8}, // idx == 350 + {95, 8}, // idx == 351 + {96, 8}, // idx == 352 + {97, 8}, // idx == 353 + {98, 8}, // idx == 354 + {99, 8}, // idx == 355 + {100, 8}, // idx == 356 + {101, 8}, // idx == 357 + {102, 8}, // idx == 358 + {103, 8}, // idx == 359 + {104, 8}, // idx == 360 + {105, 8}, // idx == 361 + {106, 8}, // idx == 362 + {107, 8}, // idx == 363 + {108, 8}, // idx == 364 + {109, 8}, // idx == 365 + {110, 8}, // idx == 366 + {111, 8}, // idx == 367 + {112, 8}, // idx == 368 + {113, 8}, // idx == 369 + {114, 8}, // idx == 370 + {115, 8}, // idx == 371 + {116, 8}, // idx == 372 + {117, 8}, // idx == 373 + {118, 8}, // idx == 374 + {119, 8}, // idx == 375 + {120, 8}, // idx == 376 + {121, 8}, // idx == 377 + {122, 8}, // idx == 378 + {123, 8}, // idx == 379 + {124, 8}, // idx == 380 + {125, 8}, // idx == 381 + {126, 8}, // idx == 382 + {127, 8}, // idx == 383 + {128, 8}, // idx == 384 + {129, 8}, // idx == 385 + {130, 8}, // idx == 386 + {131, 8}, // idx == 387 + {132, 8}, // idx == 388 + {133, 8}, // idx == 389 + {134, 8}, // idx == 390 + {135, 8}, // idx == 391 + {136, 8}, // idx == 392 + {137, 8}, // idx == 393 + {138, 8}, // idx == 394 + {139, 8}, // idx == 395 + {140, 8}, // idx == 396 + {141, 8}, // idx == 397 + {142, 8}, // idx == 398 + {143, 8}, // idx == 399 + {144, 8}, // idx == 400 + {145, 8}, // idx == 401 + {146, 8}, // idx == 402 + {147, 8}, // idx == 403 + {148, 8}, // idx == 404 + {149, 8}, // idx == 405 + {150, 8}, // idx == 406 + {151, 8}, // idx == 407 + {152, 8}, // idx == 408 + {153, 8}, // idx == 409 + {154, 8}, // idx == 410 + {155, 8}, // idx == 411 + {156, 8}, // idx == 412 + {157, 8}, // idx == 413 + {158, 8}, // idx == 414 + {159, 8}, // idx == 415 + {160, 8}, // idx == 416 + {161, 8}, // idx == 417 + {162, 8}, // idx == 418 + {163, 8}, // idx == 419 + {164, 8}, // idx == 420 + {165, 8}, // idx == 421 + {166, 8}, // idx == 422 + {167, 8}, // idx == 423 + {168, 8}, // idx == 424 + {169, 8}, // idx == 425 + {170, 8}, // idx == 426 + {171, 8}, // idx == 427 + {172, 8}, // idx == 428 + {173, 8}, // idx == 429 + {174, 8}, // idx == 430 + {175, 8}, // idx == 431 + {176, 8}, // idx == 432 + {177, 8}, // idx == 433 + {178, 8}, // idx == 434 + {179, 8}, // idx == 435 + {180, 8}, // idx == 436 + {181, 8}, // idx == 437 + {182, 8}, // idx == 438 + {183, 8}, // idx == 439 + {184, 8}, // idx == 440 + {185, 8}, // idx == 441 + {186, 8}, // idx == 442 + {187, 8}, // idx == 443 + {188, 8}, // idx == 444 + {189, 8}, // idx == 445 + {190, 8}, // idx == 446 + {191, 8}, // idx == 447 + {192, 8}, // idx == 448 + {193, 8}, // idx == 449 + {194, 8}, // idx == 450 + {195, 8}, // idx == 451 + {196, 8}, // idx == 452 + {197, 8}, // idx == 453 + {198, 8}, // idx == 454 + {199, 8}, // idx == 455 + {200, 8}, // idx == 456 + {201, 8}, // idx == 457 + {202, 8}, // idx == 458 + {203, 8}, // idx == 459 + {204, 8}, // idx == 460 + {205, 8}, // idx == 461 + {206, 8}, // idx == 462 + {207, 8}, // idx == 463 + {208, 8}, // idx == 464 + {209, 8}, // idx == 465 + {210, 8}, // idx == 466 + {211, 8}, // idx == 467 + {212, 8}, // idx == 468 + {213, 8}, // idx == 469 + {214, 8}, // idx == 470 + {215, 8}, // idx == 471 + {216, 8}, // idx == 472 + {217, 8}, // idx == 473 + {218, 8}, // idx == 474 + {219, 8}, // idx == 475 + {220, 8}, // idx == 476 + {221, 8}, // idx == 477 + {222, 8}, // idx == 478 + {223, 8}, // idx == 479 + {224, 8}, // idx == 480 + {225, 8}, // idx == 481 + {226, 8}, // idx == 482 + {227, 8}, // idx == 483 + {228, 8}, // idx == 484 + {229, 8}, // idx == 485 + {230, 8}, // idx == 486 + {231, 8}, // idx == 487 + {232, 8}, // idx == 488 + {233, 8}, // idx == 489 + {234, 8}, // idx == 490 + {235, 8}, // idx == 491 + {236, 8}, // idx == 492 + {237, 8}, // idx == 493 + {238, 8}, // idx == 494 + {239, 8}, // idx == 495 + {240, 8}, // idx == 496 + {241, 8}, // idx == 497 + {242, 8}, // idx == 498 + {243, 8}, // idx == 499 + {244, 8}, // idx == 500 + {245, 8}, // idx == 501 + {246, 8}, // idx == 502 + {247, 8}, // idx == 503 + {248, 8}, // idx == 504 + {249, 8}, // idx == 505 + {250, 8}, // idx == 506 + {251, 8}, // idx == 507 + {252, 8}, // idx == 508 + {253, 8}, // idx == 509 + {254, 8}, // idx == 510 + {255, 8}, // idx == 511 + } +*/ diff --git a/metrics.go b/metrics.go index 8bc2f54..61047af 100644 --- a/metrics.go +++ b/metrics.go @@ -64,7 +64,7 @@ func (t *Table[V]) readTableStats() map[string]any { stats.types4[n.hasType().String()]++ for _, idx := range n.prefixes.allIndexes() { - pfxLen := baseIndexToPrefixLen(idx) + _, pfxLen := baseIndexToPrefix(idx) stats.prefixlen4[stride*depth+pfxLen]++ } case false: @@ -74,7 +74,7 @@ func (t *Table[V]) readTableStats() map[string]any { stats.types6[n.hasType().String()]++ for _, idx := range n.prefixes.allIndexes() { - pfxLen := baseIndexToPrefixLen(idx) + _, pfxLen := baseIndexToPrefix(idx) stats.prefixlen6[stride*depth+pfxLen]++ } } diff --git a/node.go b/node.go index e135627..92af47b 100644 --- a/node.go +++ b/node.go @@ -4,9 +4,7 @@ package bart import ( - "math/bits" "slices" - "strconv" "github.com/bits-and-blooms/bitset" ) @@ -67,11 +65,6 @@ func newNode[V any]() *node[V] { } } -// isEmpty returns true if node has neither prefixes nor children. -func (n *node[V]) isEmpty() bool { - return len(n.prefixes.values) == 0 && len(n.children.nodes) == 0 -} - // ################## prefixes ################################## // rank is the key of the popcount compression algorithm, @@ -195,49 +188,6 @@ func (p *prefixCBTree[V]) spmByAddr(addr uint) (baseIdx uint, val V, ok bool) { return p.spmByIndex(addrToBaseIndex(addr)) } -// overlaps reports whether the route addr/prefixLen overlaps -// with any prefix in this node.. -func (p *prefixCBTree[V]) overlaps(addr uint, pfxLen int) bool { - baseIdx := prefixToBaseIndex(addr, pfxLen) - - // any route in this node overlaps prefix? - if _, _, ok := p.lpmByIndex(baseIdx); ok { - return true - } - - // from here on: reverse direction, - // test if prefix overlaps any route in this node. - - // lower boundary, idx == baseIdx alreday tested with lpm above, - // increase it - idx := baseIdx << 1 - - // upper boundary for addr/pfxLen - lastHostIdx := lastHostIndex(addr, pfxLen) - - var ok bool - for { - if idx, ok = p.indexes.NextSet(idx); !ok { - return false - } - - // out of addr/pfxLen - if idx > lastHostIdx { - return false - } - - // e.g.: 365 -> 182 -> 91 -> 45 -> 22 -> baseIdx(11) STOP - // - for j := idx; j >= baseIdx; j = parentIndex(j) { - if j == baseIdx { - return true - } - } - // next round - idx++ - } -} - // getVal for baseIdx. func (p *prefixCBTree[V]) getVal(baseIdx uint) *V { if p.indexes.Test(baseIdx) { @@ -294,101 +244,197 @@ func (c *childTree[V]) get(addr uint) *node[V] { return c.nodes[c.rank(addr)] } -// overlaps reports whether the prefix addr/pfxLen overlaps -// with any child in this node.. -func (c *childTree[V]) overlaps(addr uint, pfxLen int) bool { - // lower boundary for addr/pfxLen - baseIdx := prefixToBaseIndex(addr, pfxLen) +// allAddrs returns the addrs of all child nodes in ascending order. +func (c *childTree[V]) allAddrs() []uint { + all := make([]uint, maxNodeChildren) + _, all = c.addrs.NextSetMany(0, all) + return all +} - // upper boundary for addr/pfxLen - lastHostIdx := lastHostIndex(addr, pfxLen) +// ################## node ################################### - var ok bool +// isEmpty returns true if node has neither prefixes nor children. +func (n *node[V]) isEmpty() bool { + return len(n.prefixes.values) == 0 && len(n.children.nodes) == 0 +} + +// overlapsRec returns true if any IP in the nodes n or o overlaps. +// First test the routes, then the children and if no match rec-descent +// for child nodes with same addr. +func (n *node[V]) overlapsRec(o *node[V]) bool { + // dynamically allot the host routes from prefixes + nAllotIndex := [maxNodePrefixes]bool{} + oAllotIndex := [maxNodePrefixes]bool{} + + // 1. test if any routes overlaps? + + nOk := len(n.prefixes.values) > 0 + oOk := len(o.prefixes.values) > 0 + var nIdx, oIdx uint + // zig-zag, for all routes in both nodes ... for { - if addr, ok = c.addrs.NextSet(addr); !ok { - return false + if nOk { + // range over bitset, node n + if nIdx, nOk = n.prefixes.indexes.NextSet(nIdx); nOk { + // get range of host routes for this prefix + lowerBound, upperBound := lowerUpperBound(nIdx) + + // insert host routes (addr/8) for this prefix, + // some sort of allotment + for i := lowerBound; i <= upperBound; i++ { + // zig-zag, fast return + if oAllotIndex[i] { + return true + } + nAllotIndex[i] = true + } + nIdx++ + } } - // this addrs baseIdx - hostIdx := addrToBaseIndex(addr) - - // out of addr/pfxLen - if hostIdx > lastHostIdx { - return false + if oOk { + // range over bitset, node o + if oIdx, oOk = o.prefixes.indexes.NextSet(oIdx); oOk { + // get range of host routes for this prefix + lowerBound, upperBound := lowerUpperBound(oIdx) + + // insert host routes (addr/8) for this prefix, + // some sort of allotment + for i := lowerBound; i <= upperBound; i++ { + // zig-zag, fast return + if nAllotIndex[i] { + return true + } + oAllotIndex[i] = true + } + oIdx++ + } + } + if !nOk && !oOk { + break } + } - // check if prefix overlaps this child or any of his parents - // within the limits of addr/pfxLen - for idx := hostIdx; idx >= baseIdx; idx = parentIndex(idx) { - if idx == baseIdx { + // full run, zig-zag didn't already match + if len(n.prefixes.values) > 0 && len(o.prefixes.values) > 0 { + for i := firstHostIndex; i <= lastHostIndex; i++ { + if nAllotIndex[i] && oAllotIndex[i] { return true } } - // next round - addr++ } -} -// allAddrs returns the addrs of all child nodes in ascending order. -func (c *childTree[V]) allAddrs() []uint { - all := make([]uint, maxNodeChildren) - _, all = c.addrs.NextSetMany(0, all) - return all -} + // 2. test if routes overlaps any child -// ################## helpers ################################### + nAddresses := [maxNodeChildren]bool{} + oAddresses := [maxNodeChildren]bool{} -// prefixToBaseIndex, maps a prefix table as a 'complete binary tree'. -// This is the so-called baseIndex a.k.a heapFunc: -// -// https://cseweb.ucsd.edu//~varghese/TEACH/cs228/artlookup.pdf -func prefixToBaseIndex(addr uint, prefixLen int) uint { - return (addr >> (stride - prefixLen)) + (1 << prefixLen) -} + nOk = len(n.children.nodes) > 0 + oOk = len(o.children.nodes) > 0 + var nAddr, oAddr uint + // zig-zag, for all addrs in both nodes ... + for { + // range over bitset, node n + if nOk { + if nAddr, nOk = n.children.addrs.NextSet(nAddr); nOk { + if oAllotIndex[nAddr+firstHostIndex] { + return true + } + nAddresses[nAddr] = true + nAddr++ + } + } -// addrToBaseIndex, just prefixToBaseIndex(addr, 8), a.k.a host routes -// but faster, use it for host routes in Get and Lookup. -func addrToBaseIndex(addr uint) uint { - return addr + 1<> 1 -} + if !nOk && !oOk { + break + } + } -// baseIndexToPrefix returns the address and prefix len of baseIdx. -// It's the inverse to prefixToBaseIndex. -func baseIndexToPrefix(baseIdx uint) (addr uint, pfxLen int) { - nlz := bits.LeadingZeros(baseIdx) - pfxLen = strconv.IntSize - nlz - 1 - addr = baseIdx & (0xFF >> (stride - pfxLen)) << (stride - pfxLen) - return addr, pfxLen -} + // 3. rec-descent call for childs with same addr -// baseIndexToPrefixLen returns the prefix len of baseIdx, partly -// the inverse to prefixToBaseIndex. -// Needed for Lookup, it's faster than: -// -// _, pfxLen := baseIndexToPrefix(idx) -func baseIndexToPrefixLen(baseIdx uint) int { - return strconv.IntSize - bits.LeadingZeros(baseIdx) - 1 -} + if len(n.children.nodes) > 0 && len(o.children.nodes) > 0 { + for i := 0; i < len(nAddresses); i++ { + if nAddresses[i] && oAddresses[i] { + // get next child node for this addr + nc := n.children.get(uint(i)) + oc := o.children.get(uint(i)) -var addrMaskTable = []uint{ - 0b1111_1111, - 0b0111_1111, - 0b0011_1111, - 0b0001_1111, - 0b0000_1111, - 0b0000_0111, - 0b0000_0011, - 0b0000_0001, - 0b0000_0000, + // rec-descent + if nc.overlapsRec(oc) { + return true + } + } + } + } + + return false } -// lastHostIndex returns the array index of the last address in addr/len. -func lastHostIndex(addr uint, bits int) uint { - return addrToBaseIndex(addr | addrMaskTable[bits]) +// overlapsPrefix returns true if node overlaps with prefix. +func (n *node[V]) overlapsPrefix(addr uint, pfxLen int) bool { + // lower/upper boundary for addr/pfxLen + pfxLowerBound := addrToBaseIndex(addr) + pfxUpperBound := lastHostIndexOfPrefix(addr, pfxLen) + + // ################################################# + // 1. test if prefix overlaps any child in this node + + // set start address in bitset search with prefix addr + childAddr := addr + var ok bool + for { + if childAddr, ok = n.children.addrs.NextSet(childAddr); !ok { + break + } + + childIdx := addrToBaseIndex(childAddr) + + if childIdx >= pfxLowerBound && childIdx <= pfxUpperBound { + return true + } + + // next round + childAddr++ + } + + // ################################################## + // 2. test if any route in this node overlaps prefix? + + pfxIdx := prefixToBaseIndex(addr, pfxLen) + if _, _, ok := n.prefixes.lpmByIndex(pfxIdx); ok { + return true + } + + // ################################################# + // 3. test if prefix overlaps any route in this node + + // increment to 'next' routeIdx for start in bitset search + // since routeIdx already testet by lpm in other direction + routeIdx := pfxIdx << 1 + for { + if routeIdx, ok = n.prefixes.indexes.NextSet(routeIdx); !ok { + break + } + + lowerBound, upperBound := lowerUpperBound(routeIdx) + if lowerBound >= pfxLowerBound && upperBound <= pfxUpperBound { + return true + } + + // next route + routeIdx++ + } + + return false } diff --git a/node_test.go b/node_test.go index 497acd8..27f4b22 100644 --- a/node_test.go +++ b/node_test.go @@ -18,6 +18,10 @@ import ( "testing" ) +func init() { + baseIndexToPrefixPrecalc() +} + func TestInverseIndex(t *testing.T) { t.Parallel() for i := 0; i < maxNodeChildren; i++ { @@ -50,7 +54,6 @@ func TestPrefixInsert(t *testing.T) { // every lookup. The naive implementation is very slow, but its behavior is // easy to verify by inspection. - // pfxs := shufflePrefixes(allPrefixes())[:100] pfxs := shufflePrefixes(allPrefixes())[:100] slow := slowTable[int]{pfxs} fast := newNode[int]() @@ -72,14 +75,6 @@ func TestPrefixInsert(t *testing.T) { if !getsEqual(fastVal, fastOK, slowVal, slowOK) { t.Fatalf("spm(%d) = (%v, %v), want (%v, %v)", addr, fastVal, fastOK, slowVal, slowOK) } - - for j := 0; j <= 8; j++ { - slowOK = slow.overlapsPrefix(addr, j) - fastOK = fast.prefixes.overlaps(addr, j) - if !getsEqual(fastVal, fastOK, slowVal, slowOK) { - t.Fatalf("spm(%d/%d) = %v, want %v", addr, j, fastOK, slowOK) - } - } } } @@ -121,7 +116,66 @@ func TestPrefixDelete(t *testing.T) { } } -var prefixRouteCount = []int{10, 50, 100, 200} +func TestPrefixOverlaps(t *testing.T) { + t.Parallel() + + pfxs := shufflePrefixes(allPrefixes())[:100] + slow := slowTable[int]{pfxs} + fast := newNode[int]() + + for _, pfx := range pfxs { + fast.prefixes.insert(pfx.addr, pfx.bits, pfx.val) + } + + for _, tt := range allPrefixes() { + slowOK := slow.overlapsPrefix(uint8(tt.addr), tt.bits) + fastOK := fast.overlapsPrefix(tt.addr, tt.bits) + if slowOK != fastOK { + t.Fatalf("overlapsPrefix(%d, %d) = %v, want %v", tt.addr, tt.bits, fastOK, slowOK) + } + } +} + +func TestNodeOverlaps(t *testing.T) { + t.Parallel() + + // Empirically, between 5 and 6 routes per table results in ~50% + // of random pairs overlapping. Cool example of the birthday + // paradox! + const numEntries = 6 + all := allPrefixes() + + seenResult := map[bool]int{} + for i := 0; i < 100_000; i++ { + shufflePrefixes(all) + pfxs := all[:numEntries] + slow := slowTable[int]{pfxs} + fast := newNode[int]() + for _, pfx := range pfxs { + fast.prefixes.insert(pfx.addr, pfx.bits, pfx.val) + } + + inter := all[numEntries : 2*numEntries] + slowInter := slowTable[int]{inter} + fastInter := newNode[int]() + for _, pfx := range inter { + fastInter.prefixes.insert(pfx.addr, pfx.bits, pfx.val) + } + + gotSlow := slow.overlaps(&slowInter) + gotFast := fast.overlapsRec(fastInter) + if gotSlow != gotFast { + t.Fatalf("node.overlaps = %v, want %v", gotFast, gotSlow) + } + seenResult[gotFast]++ + } + t.Log(seenResult) + if len(seenResult) != 2 { // saw both intersections and non-intersections + t.Fatalf("didn't see both intersections and non-intersections\nIntersects: %d\nNon-intersects: %d", seenResult[true], seenResult[false]) + } +} + +var prefixRouteCount = []int{10, 20, 50, 100, 200, 500} // forPrefixCount runs the benchmark fn with different sets of routes. func forPrefixCount(b *testing.B, fn func(b *testing.B, routes []slowEntry[int])) { @@ -195,40 +249,42 @@ func BenchmarkPrefixDeletion(b *testing.B) { var writeSink int -func BenchmarkPrefixGet(b *testing.B) { - // No need to forCountAndOrdering here, route lookup time is independent of - // the route count. - routes := shufflePrefixes(allPrefixes())[:100] - rt := newNode[int]() - for _, route := range routes { - rt.prefixes.insert(route.addr, route.bits, route.val) - } +func BenchmarkPrefixLPM(b *testing.B) { + forPrefixCount(b, func(b *testing.B, routes []slowEntry[int]) { + val := 0 + rt := newNode[int]() + for _, route := range routes { + rt.prefixes.insert(route.addr, route.bits, val) + } - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, writeSink, _ = rt.prefixes.lpmByAddr(uint(i)) - } - gets := float64(b.N) - elapsedSec := b.Elapsed().Seconds() - b.ReportMetric(gets/elapsedSec, "routes/s") + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, writeSink, _ = rt.prefixes.lpmByAddr(uint(uint8(i))) + } + + lpm := float64(b.N) + elapsed := float64(b.Elapsed().Nanoseconds()) + b.ReportMetric(elapsed/lpm, "ns/op") + }) } func BenchmarkPrefixSPM(b *testing.B) { - // No need to forCountAndOrdering here, route lookup time is independent of - // the route count. - routes := shufflePrefixes(allPrefixes())[:100] - rt := newNode[int]() - for _, route := range routes { - rt.prefixes.insert(route.addr, route.bits, route.val) - } + forPrefixCount(b, func(b *testing.B, routes []slowEntry[int]) { + val := 0 + rt := newNode[int]() + for _, route := range routes { + rt.prefixes.insert(route.addr, route.bits, val) + } - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, writeSink, _ = rt.prefixes.spmByAddr(uint(i)) - } - gets := float64(b.N) - elapsedSec := b.Elapsed().Seconds() - b.ReportMetric(gets/elapsedSec, "routes/s") + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, writeSink, _ = rt.prefixes.spmByAddr(uint(uint8(i))) + } + + spm := float64(b.N) + elapsed := float64(b.Elapsed().Nanoseconds()) + b.ReportMetric(elapsed/spm, "ns/op") + }) } // slowTable is an 8-bit routing table implemented as a set of prefixes that are @@ -245,8 +301,8 @@ type slowEntry[V any] struct { val V } -func (stbl *slowTable[V]) String() string { - pfxs := append([]slowEntry[V](nil), stbl.prefixes...) +func (st *slowTable[V]) String() string { + pfxs := append([]slowEntry[V](nil), st.prefixes...) sort.Slice(pfxs, func(i, j int) bool { if pfxs[i].bits != pfxs[j].bits { return pfxs[i].bits < pfxs[j].bits @@ -260,22 +316,22 @@ func (stbl *slowTable[V]) String() string { return ret.String() } -func (stbl *slowTable[V]) delete(addr uint, prefixLen int) { - pfx := make([]slowEntry[V], 0, len(stbl.prefixes)) - for _, e := range stbl.prefixes { +func (st *slowTable[V]) delete(addr uint, prefixLen int) { + pfx := make([]slowEntry[V], 0, len(st.prefixes)) + for _, e := range st.prefixes { if e.addr == addr && e.bits == prefixLen { continue } pfx = append(pfx, e) } - stbl.prefixes = pfx + st.prefixes = pfx } // get, longest-prefix-match -func (stbl *slowTable[V]) get(addr uint) (ret V, ok bool) { +func (st *slowTable[V]) get(addr uint) (ret V, ok bool) { const noMatch = -1 longest := noMatch - for _, e := range stbl.prefixes { + for _, e := range st.prefixes { if addr&pfxMask(e.bits) == e.addr && e.bits >= longest { ret = e.val longest = e.bits @@ -285,10 +341,10 @@ func (stbl *slowTable[V]) get(addr uint) (ret V, ok bool) { } // spm, shortest-prefix-match -func (stbl *slowTable[V]) spm(addr uint) (ret V, ok bool) { +func (st *slowTable[V]) spm(addr uint) (ret V, ok bool) { const noMatch = 9 shortest := noMatch - for _, e := range stbl.prefixes { + for _, e := range st.prefixes { if addr&pfxMask(e.bits) == e.addr && e.bits <= shortest { ret = e.val shortest = e.bits @@ -297,20 +353,35 @@ func (stbl *slowTable[V]) spm(addr uint) (ret V, ok bool) { return ret, shortest != noMatch } -func (stbl *slowTable[T]) overlapsPrefix(addr uint, prefixLen int) bool { - for _, e := range stbl.prefixes { +func (st *slowTable[T]) overlapsPrefix(addr uint8, prefixLen int) bool { + for _, e := range st.prefixes { minBits := prefixLen if e.bits < minBits { minBits = e.bits } - mask := ^addrMaskTable[minBits] - if addr&mask == e.addr&mask { + mask := ^hostMasks[minBits] + if addr&mask == uint8(e.addr)&mask { return true } } return false } +func (st *slowTable[T]) overlaps(so *slowTable[T]) bool { + for _, tp := range st.prefixes { + for _, op := range so.prefixes { + minBits := tp.bits + if op.bits < minBits { + minBits = op.bits + } + if tp.addr&pfxMask(minBits) == op.addr&pfxMask(minBits) { + return true + } + } + } + return false +} + func pfxMask(pfxLen int) uint { return 0xFF << (stride - pfxLen) } diff --git a/table.go b/table.go index 3c3184b..2372da7 100644 --- a/table.go +++ b/table.go @@ -19,9 +19,12 @@ type Table[V any] struct { } // init once, so no constructor is needed. -// BitSets have to be initialized. func (t *Table[V]) init() { t.initOnce.Do(func() { + // precalc lookup table + baseIndexToPrefixPrecalc() + + // BitSets have to be initialized. t.rootV4 = newNode[V]() t.rootV6 = newNode[V]() }) @@ -356,10 +359,7 @@ func (t *Table[V]) OverlapsPrefix(pfx netip.Prefix) bool { // last prefix chunk reached if bits <= stride { - // prefix overlaps any route in this node and vice versa OR - // prefix overlaps any child in this node? - return n.prefixes.overlaps(addr, bits) || - n.children.overlaps(addr, bits) + return n.overlapsPrefix(addr, bits) } // still in the middle of prefix chunks @@ -387,3 +387,11 @@ func (t *Table[V]) OverlapsPrefix(pfx netip.Prefix) bool { n = child } } + +// Overlaps reports whether any IP in the table matches a route in the +// other table. +func (t *Table[V]) Overlaps(o *Table[V]) bool { + t.init() + o.init() + return t.rootV4.overlapsRec(o.rootV4) || t.rootV6.overlapsRec(o.rootV6) +} diff --git a/table_test.go b/table_test.go index 8b1d739..7d4b8c9 100644 --- a/table_test.go +++ b/table_test.go @@ -20,288 +20,92 @@ import ( "time" ) -func TestInsert(t *testing.T) { - tbl := &Table[int]{} - p := func(s string) netip.Prefix { - pfx := netip.MustParsePrefix(s) - if pfx.Addr() != pfx.Masked().Addr() { - panic(fmt.Sprintf("%s is not normalized", s)) +func TestRegression(t *testing.T) { + // original comment by tailscale for ART, + // but the BART implementation is different and has other edge cases. + // + // These tests are specific triggers for subtle correctness issues + // that came up during initial implementation. Even if they seem + // arbitrary, please do not clean them up. They are checking edge + // cases that are very easy to get wrong, and quite difficult for + // the other statistical tests to trigger promptly. + + t.Run("prefixes_aligned_on_stride_boundary", func(t *testing.T) { + tbl := &Table[int]{} + slow := slowPrefixTable[int]{} + p := netip.MustParsePrefix + + tbl.Insert(p("226.205.197.0/24"), 1) + slow.insert(p("226.205.197.0/24"), 1) + tbl.Insert(p("226.205.0.0/16"), 2) + slow.insert(p("226.205.0.0/16"), 2) + + probe := netip.MustParseAddr("226.205.121.152") + got, gotOK := tbl.Get(probe) + want, wantOK := slow.get(probe) + if !getsEqual(got, gotOK, want, wantOK) { + t.Fatalf("got (%v, %v), want (%v, %v)", got, gotOK, want, wantOK) } - return pfx - } - - // Create a new leaf node. - tbl.Insert(p("192.168.0.1/32"), 1) - tbl.Insert(p("192.168.0.1/32"), 1) // no-op - checkRoutes(t, tbl, []tableTest{ - {"192.168.0.1", 1, 1}, - {"192.168.0.2", -1, -1}, - {"192.168.0.3", -1, -1}, - {"192.168.0.255", -1, -1}, - {"192.168.1.1", -1, -1}, - {"192.170.1.1", -1, -1}, - {"192.180.0.1", -1, -1}, - {"192.180.3.5", -1, -1}, - {"10.0.0.5", -1, -1}, - {"10.0.0.15", -1, -1}, - }) - - // Insert into previous leaf, no tree changes - tbl.Insert(p("192.168.0.2/32"), 2) - checkRoutes(t, tbl, []tableTest{ - {"192.168.0.1", 1, 1}, - {"192.168.0.2", 2, 2}, - {"192.168.0.3", -1, -1}, - {"192.168.0.255", -1, -1}, - {"192.168.1.1", -1, -1}, - {"192.170.1.1", -1, -1}, - {"192.180.0.1", -1, -1}, - {"192.180.3.5", -1, -1}, - {"10.0.0.5", -1, -1}, - {"10.0.0.15", -1, -1}, }) - // Insert into previous leaf, unaligned prefix covering the /32s - tbl.Insert(p("192.168.0.0/26"), 7) - checkRoutes(t, tbl, []tableTest{ - {"192.168.0.1", 1, 7}, - {"192.168.0.2", 2, 7}, - {"192.168.0.3", 7, 7}, - {"192.168.0.255", -1, -1}, - {"192.168.1.1", -1, -1}, - {"192.170.1.1", -1, -1}, - {"192.180.0.1", -1, -1}, - {"192.180.3.5", -1, -1}, - {"10.0.0.5", -1, -1}, - {"10.0.0.15", -1, -1}, - }) + t.Run("parent_prefix_inserted_in_different_orders", func(t *testing.T) { + t1, t2 := &Table[int]{}, &Table[int]{} + p := netip.MustParsePrefix - // Create a different leaf elsewhere - tbl.Insert(p("10.0.0.0/27"), 3) - checkRoutes(t, tbl, []tableTest{ - {"192.168.0.1", 1, 7}, - {"192.168.0.2", 2, 7}, - {"192.168.0.3", 7, 7}, - {"192.168.0.255", -1, -1}, - {"192.168.1.1", -1, -1}, - {"192.170.1.1", -1, -1}, - {"192.180.0.1", -1, -1}, - {"192.180.3.5", -1, -1}, - {"10.0.0.5", 3, 3}, - {"10.0.0.15", 3, 3}, - }) - // Insert that creates a new intermediate table and a new child - tbl.Insert(p("192.168.1.1/32"), 4) - checkRoutes(t, tbl, []tableTest{ - {"192.168.0.1", 1, 7}, - {"192.168.0.2", 2, 7}, - {"192.168.0.3", 7, 7}, - {"192.168.0.255", -1, -1}, - {"192.168.1.1", 4, 4}, - {"192.170.1.1", -1, -1}, - {"192.180.0.1", -1, -1}, - {"192.180.3.5", -1, -1}, - {"10.0.0.5", 3, 3}, - {"10.0.0.15", 3, 3}, - }) - // Insert that creates a new intermediate table but no new child - tbl.Insert(p("192.170.0.0/16"), 5) - checkRoutes(t, tbl, []tableTest{ - {"192.168.0.1", 1, 7}, - {"192.168.0.2", 2, 7}, - {"192.168.0.3", 7, 7}, - {"192.168.0.255", -1, -1}, - {"192.168.1.1", 4, 4}, - {"192.170.1.1", 5, 5}, - {"192.180.0.1", -1, -1}, - {"192.180.3.5", -1, -1}, - {"10.0.0.5", 3, 3}, - {"10.0.0.15", 3, 3}, - }) - // New leaf in a different subtree. - tbl.Insert(p("192.180.0.1/32"), 8) - checkRoutes(t, tbl, []tableTest{ - {"192.168.0.1", 1, 7}, - {"192.168.0.2", 2, 7}, - {"192.168.0.3", 7, 7}, - {"192.168.0.255", -1, -1}, - {"192.168.1.1", 4, 4}, - {"192.170.1.1", 5, 5}, - {"192.180.0.1", 8, 8}, - {"192.180.3.5", -1, -1}, - {"10.0.0.5", 3, 3}, - {"10.0.0.15", 3, 3}, - }) + t1.Insert(p("136.20.0.0/16"), 1) + t1.Insert(p("136.20.201.62/32"), 2) - // Insert that creates a new intermediate table but no new child, - // with an unaligned intermediate - tbl.Insert(p("192.180.0.0/21"), 9) - checkRoutes(t, tbl, []tableTest{ - {"192.168.0.1", 1, 7}, - {"192.168.0.2", 2, 7}, - {"192.168.0.3", 7, 7}, - {"192.168.0.255", -1, -1}, - {"192.168.1.1", 4, 4}, - {"192.170.1.1", 5, 5}, - {"192.180.0.1", 8, 9}, - {"192.180.3.5", 9, 9}, - {"10.0.0.5", 3, 3}, - {"10.0.0.15", 3, 3}, - }) + t2.Insert(p("136.20.201.62/32"), 2) + t2.Insert(p("136.20.0.0/16"), 1) - // Insert a default route, those have their own codepath. - tbl.Insert(p("0.0.0.0/0"), 6) - checkRoutes(t, tbl, []tableTest{ - {"192.168.0.1", 1, 6}, - {"192.168.0.2", 2, 6}, - {"192.168.0.3", 7, 6}, - {"192.168.0.255", 6, 6}, - {"192.168.1.1", 4, 6}, - {"192.170.1.1", 5, 6}, - {"192.180.0.1", 8, 6}, - {"192.180.3.5", 9, 6}, - {"10.0.0.5", 3, 6}, - {"10.0.0.15", 3, 6}, + a := netip.MustParseAddr("136.20.54.139") + got1, ok1 := t1.Get(a) + got2, ok2 := t2.Get(a) + if !getsEqual(got1, ok1, got2, ok2) { + t.Errorf("Get(%q) is insertion order dependent: t1=(%v, %v), t2=(%v, %v)", a, got1, ok1, got2, ok2) + } }) - // Now all of the above again, but for IPv6. - - // Create a new leaf node. - tbl.Insert(p("ff:aaaa::1/128"), 1) - checkRoutes(t, tbl, []tableTest{ - {"ff:aaaa::1", 1, 1}, - {"ff:aaaa::2", -1, -1}, - {"ff:aaaa::3", -1, -1}, - {"ff:aaaa::255", -1, -1}, - {"ff:aaaa:aaaa::1", -1, -1}, - {"ff:aaaa:aaaa:bbbb::1", -1, -1}, - {"ff:cccc::1", -1, -1}, - {"ff:cccc::ff", -1, -1}, - {"ffff:bbbb::5", -1, -1}, - {"ffff:bbbb::15", -1, -1}, - }) + t.Run("overlaps_divergent_children_with_parent_route_entry", func(t *testing.T) { + t1, t2 := Table[int]{}, Table[int]{} + p := netip.MustParsePrefix - // Insert into previous leaf, no tree changes - tbl.Insert(p("ff:aaaa::2/128"), 2) - checkRoutes(t, tbl, []tableTest{ - {"ff:aaaa::1", 1, 1}, - {"ff:aaaa::2", 2, 2}, - {"ff:aaaa::3", -1, -1}, - {"ff:aaaa::255", -1, -1}, - {"ff:aaaa:aaaa::1", -1, -1}, - {"ff:aaaa:aaaa:bbbb::1", -1, -1}, - {"ff:cccc::1", -1, -1}, - {"ff:cccc::ff", -1, -1}, - {"ffff:bbbb::5", -1, -1}, - {"ffff:bbbb::15", -1, -1}, - }) - - // Insert into previous leaf, unaligned prefix covering the /128s - tbl.Insert(p("ff:aaaa::/125"), 7) - checkRoutes(t, tbl, []tableTest{ - {"ff:aaaa::1", 1, 7}, - {"ff:aaaa::2", 2, 7}, - {"ff:aaaa::3", 7, 7}, - {"ff:aaaa::255", -1, -1}, - {"ff:aaaa:aaaa::1", -1, -1}, - {"ff:aaaa:aaaa:bbbb::1", -1, -1}, - {"ff:cccc::1", -1, -1}, - {"ff:cccc::ff", -1, -1}, - {"ffff:bbbb::5", -1, -1}, - {"ffff:bbbb::15", -1, -1}, - }) + t1.Insert(p("128.0.0.0/2"), 1) + t1.Insert(p("99.173.128.0/17"), 1) + t1.Insert(p("219.150.142.0/23"), 1) + t1.Insert(p("164.148.190.250/31"), 1) + t1.Insert(p("48.136.229.233/32"), 1) - // Create a different leaf elsewhere - tbl.Insert(p("ffff:bbbb::/120"), 3) - checkRoutes(t, tbl, []tableTest{ - {"ff:aaaa::1", 1, 7}, - {"ff:aaaa::2", 2, 7}, - {"ff:aaaa::3", 7, 7}, - {"ff:aaaa::255", -1, -1}, - {"ff:aaaa:aaaa::1", -1, -1}, - {"ff:aaaa:aaaa:bbbb::1", -1, -1}, - {"ff:cccc::1", -1, -1}, - {"ff:cccc::ff", -1, -1}, - {"ffff:bbbb::5", 3, 3}, - {"ffff:bbbb::15", 3, 3}, - }) + t2.Insert(p("217.32.0.0/11"), 1) + t2.Insert(p("38.176.0.0/12"), 1) + t2.Insert(p("106.16.0.0/13"), 1) + t2.Insert(p("164.85.192.0/23"), 1) + t2.Insert(p("225.71.164.112/31"), 1) - // Insert that creates a new intermediate table and a new child - tbl.Insert(p("ff:aaaa:aaaa::1/128"), 4) - checkRoutes(t, tbl, []tableTest{ - {"ff:aaaa::1", 1, 7}, - {"ff:aaaa::2", 2, 7}, - {"ff:aaaa::3", 7, 7}, - {"ff:aaaa::255", -1, -1}, - {"ff:aaaa:aaaa::1", 4, 4}, - {"ff:aaaa:aaaa:bbbb::1", -1, -1}, - {"ff:cccc::1", -1, -1}, - {"ff:cccc::ff", -1, -1}, - {"ffff:bbbb::5", 3, 3}, - {"ffff:bbbb::15", 3, 3}, + if !t1.Overlaps(&t2) { + t.Fatalf("tables unexpectedly do not overlap") + } }) - // Insert that creates a new intermediate table but no new child - tbl.Insert(p("ff:aaaa:aaaa:bb00::/56"), 5) - checkRoutes(t, tbl, []tableTest{ - {"ff:aaaa::1", 1, 7}, - {"ff:aaaa::2", 2, 7}, - {"ff:aaaa::3", 7, 7}, - {"ff:aaaa::255", -1, -1}, - {"ff:aaaa:aaaa::1", 4, 4}, - {"ff:aaaa:aaaa:bbbb::1", 5, 5}, - {"ff:cccc::1", -1, -1}, - {"ff:cccc::ff", -1, -1}, - {"ffff:bbbb::5", 3, 3}, - {"ffff:bbbb::15", 3, 3}, - }) + t.Run("overlaps_parent_child_comparison_with_route_in_parent", func(t *testing.T) { + t1, t2 := Table[int]{}, Table[int]{} + p := netip.MustParsePrefix - // New leaf in a different subtree. - tbl.Insert(p("ff:cccc::1/128"), 8) - checkRoutes(t, tbl, []tableTest{ - {"ff:aaaa::1", 1, 7}, - {"ff:aaaa::2", 2, 7}, - {"ff:aaaa::3", 7, 7}, - {"ff:aaaa::255", -1, -1}, - {"ff:aaaa:aaaa::1", 4, 4}, - {"ff:aaaa:aaaa:bbbb::1", 5, 5}, - {"ff:cccc::1", 8, 8}, - {"ff:cccc::ff", -1, -1}, - {"ffff:bbbb::5", 3, 3}, - {"ffff:bbbb::15", 3, 3}, - }) + t1.Insert(p("226.0.0.0/8"), 1) + t1.Insert(p("81.128.0.0/9"), 1) + t1.Insert(p("152.0.0.0/9"), 1) + t1.Insert(p("151.220.0.0/16"), 1) + t1.Insert(p("89.162.61.0/24"), 1) - // Insert that creates a new intermediate table but no new child, - // with an unaligned intermediate - tbl.Insert(p("ff:cccc::/37"), 9) - tbl.Insert(p("ff:cccc::/37"), 9) // no-op - checkRoutes(t, tbl, []tableTest{ - {"ff:aaaa::1", 1, 7}, - {"ff:aaaa::2", 2, 7}, - {"ff:aaaa::3", 7, 7}, - {"ff:aaaa::255", -1, -1}, - {"ff:aaaa:aaaa::1", 4, 4}, - {"ff:aaaa:aaaa:bbbb::1", 5, 5}, - {"ff:cccc::1", 8, 9}, - {"ff:cccc::ff", 9, 9}, - {"ffff:bbbb::5", 3, 3}, - {"ffff:bbbb::15", 3, 3}, - }) + t2.Insert(p("54.0.0.0/9"), 1) + t2.Insert(p("35.89.128.0/19"), 1) + t2.Insert(p("72.33.53.0/24"), 1) + t2.Insert(p("2.233.60.32/27"), 1) + t2.Insert(p("152.42.142.160/28"), 1) - // Insert a default route, those have their own codepath. - tbl.Insert(p("::/0"), 6) - tbl.Insert(p("::/0"), 6) // no-op - checkRoutes(t, tbl, []tableTest{ - {"ff:aaaa::1", 1, 6}, - {"ff:aaaa::2", 2, 6}, - {"ff:aaaa::3", 7, 6}, - {"ff:aaaa::255", 6, 6}, - {"ff:aaaa:aaaa::1", 4, 6}, - {"ff:aaaa:aaaa:bbbb::1", 5, 6}, - {"ff:cccc::1", 8, 6}, - {"ff:cccc::ff", 9, 6}, - {"ffff:bbbb::5", 3, 6}, - {"ffff:bbbb::15", 3, 6}, + if !t1.Overlaps(&t2) { + t.Fatalf("tables unexpectedly do not overlap") + } }) } @@ -787,6 +591,44 @@ func TestShortestCompare(t *testing.T) { } } +func TestOverlapsCompare(t *testing.T) { + t.Parallel() + + // Empirically, between 5 and 6 routes per table results in ~50% + // of random pairs overlapping. Cool example of the birthday + // paradox! + const numEntries = 6 + + seen := map[bool]int{} + for i := 0; i < 10000; i++ { + pfxs := randomPrefixes(numEntries) + slow := slowPrefixTable[int]{pfxs} + fast := Table[int]{} + for _, pfx := range pfxs { + fast.Insert(pfx.pfx, pfx.val) + } + + inter := randomPrefixes(numEntries) + slowInter := slowPrefixTable[int]{inter} + fastInter := Table[int]{} + for _, pfx := range inter { + fastInter.Insert(pfx.pfx, pfx.val) + } + + gotSlow := slow.overlaps(&slowInter) + gotFast := fast.Overlaps(&fastInter) + + if gotSlow != gotFast { + t.Fatalf("Overlaps(...) = %v, want %v\nTable1:\n%s\nTable2:\n%v", + gotFast, gotSlow, fast.String(), fastInter.String()) + } + + seen[gotFast]++ + } + + t.Log(seen) +} + func TestOverlapsPrefixCompare(t *testing.T) { t.Parallel() pfxs := randomPrefixes(100_000) @@ -809,7 +651,7 @@ func TestOverlapsPrefixCompare(t *testing.T) { } // test some edge cases -func TestOverlapsEdgeCases(t *testing.T) { +func TestOverlapsPrefixEdgeCases(t *testing.T) { t.Parallel() p := func(s string) netip.Prefix { @@ -1196,6 +1038,52 @@ func BenchmarkTableOverlapsPrefix(b *testing.B) { }) } +func BenchmarkTableOverlaps(b *testing.B) { + forFamilyAndCount(b, func(b *testing.B, routes []slowPrefixEntry[int]) { + var rt Table[int] + for _, route := range routes { + rt.Insert(route.pfx, route.val) + } + + genPfxs := randomPrefixes4 + if routes[0].pfx.Addr().Is6() { + genPfxs = randomPrefixes6 + } + + const ( + intersectSize = 10 + numIntersects = 1_000 + ) + + intersects := make([]*Table[int], numIntersects) + for i := range intersects { + inter := &Table[int]{} + for _, route := range genPfxs(intersectSize) { + inter.Insert(route.pfx, route.val) + } + intersects[i] = inter + } + + var t runningTimer + allocs, bytes := getMemCost(func() { + for i := 0; i < b.N; i++ { + t.Start() + boolSink = rt.Overlaps(intersects[i%numIntersects]) + t.Stop() + } + }) + + b.ReportAllocs() // Enables the output, but we report manually below + lookups := float64(b.N) + elapsed := float64(t.Elapsed().Nanoseconds()) + elapsedSec := t.Elapsed().Seconds() + b.ReportMetric(elapsed/lookups, "ns/op") + b.ReportMetric(lookups/elapsedSec, "tables/s") + b.ReportMetric(allocs/lookups, "allocs/op") + b.ReportMetric(bytes/lookups, "B/op") + }) +} + // getMemCost runs fn 100 times and returns the number of allocations and bytes // allocated by each call to fn. // @@ -1289,15 +1177,26 @@ type slowPrefixEntry[V any] struct { val V } -func (ts *slowPrefixTable[V]) get(addr netip.Addr) (val V, ok bool) { - _, val, ok = ts.lpm(addr) +func (st *slowPrefixTable[T]) insert(pfx netip.Prefix, val T) { + pfx = pfx.Masked() + for i, ent := range st.prefixes { + if ent.pfx == pfx { + st.prefixes[i].val = val + return + } + } + st.prefixes = append(st.prefixes, slowPrefixEntry[T]{pfx, val}) +} + +func (st *slowPrefixTable[V]) get(addr netip.Addr) (val V, ok bool) { + _, val, ok = st.lpm(addr) return } -func (ts *slowPrefixTable[V]) lpm(addr netip.Addr) (lpm netip.Prefix, val V, ok bool) { +func (st *slowPrefixTable[V]) lpm(addr netip.Addr) (lpm netip.Prefix, val V, ok bool) { bestLen := -1 - for _, item := range ts.prefixes { + for _, item := range st.prefixes { if item.pfx.Contains(addr) && item.pfx.Bits() > bestLen { lpm = item.pfx val = item.val @@ -1307,10 +1206,10 @@ func (ts *slowPrefixTable[V]) lpm(addr netip.Addr) (lpm netip.Prefix, val V, ok return lpm, val, bestLen != -1 } -func (ts *slowPrefixTable[V]) spm(addr netip.Addr) (spm netip.Prefix, val V, ok bool) { +func (st *slowPrefixTable[V]) spm(addr netip.Addr) (spm netip.Prefix, val V, ok bool) { bestLen := 129 - for _, item := range ts.prefixes { + for _, item := range st.prefixes { if item.pfx.Contains(addr) && item.pfx.Bits() < bestLen { spm = item.pfx val = item.val @@ -1329,6 +1228,17 @@ func (st *slowPrefixTable[T]) overlapsPrefix(pfx netip.Prefix) bool { return false } +func (st *slowPrefixTable[T]) overlaps(so *slowPrefixTable[T]) bool { + for _, tp := range st.prefixes { + for _, op := range so.prefixes { + if tp.pfx.Overlaps(op.pfx) { + return true + } + } + } + return false +} + // randomPrefixes returns n randomly generated prefixes and associated values, // distributed equally between IPv4 and IPv6. func randomPrefixes(n int) []slowPrefixEntry[int] {