]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/radix.c
xnu-1228.tar.gz
[apple/xnu.git] / bsd / net / radix.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1988, 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)radix.c 8.4 (Berkeley) 11/2/94
9bccf70c 61 * $FreeBSD: src/sys/net/radix.c,v 1.20.2.2 2001/03/06 00:56:50 obrien Exp $
1c79356b
A
62 */
63
64/*
65 * Routines to build and maintain radix trees for routing lookups.
66 */
67#ifndef _RADIX_H_
68#include <sys/param.h>
69#ifdef KERNEL
70#include <sys/systm.h>
71#include <sys/malloc.h>
72#define M_DONTWAIT M_NOWAIT
73#include <sys/domain.h>
74#else
75#include <stdlib.h>
76#endif
77#include <sys/syslog.h>
78#include <net/radix.h>
91447636
A
79#include <sys/socket.h>
80#include <sys/socketvar.h>
81#include <kern/locks.h>
1c79356b
A
82#endif
83
91447636
A
84static int rn_walktree_from(struct radix_node_head *h, void *a,
85 void *m, walktree_f_t *f, void *w);
86static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
1c79356b 87static struct radix_node
91447636
A
88 *rn_insert(void *, struct radix_node_head *, int *,
89 struct radix_node [2]),
90 *rn_newpair(void *, int, struct radix_node[2]),
91 *rn_search(void *, struct radix_node *),
92 *rn_search_m(void *, struct radix_node *, void *);
1c79356b
A
93
94static int max_keylen;
95static struct radix_mask *rn_mkfreelist;
96static struct radix_node_head *mask_rnhead;
97static char *addmask_key;
98static char normal_chars[] = {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1};
99static char *rn_zeros, *rn_ones;
100
91447636
A
101
102extern lck_grp_t *domain_proto_mtx_grp;
103extern lck_attr_t *domain_proto_mtx_attr;
104lck_mtx_t *rn_mutex;
105
1c79356b
A
106#define rn_masktop (mask_rnhead->rnh_treetop)
107#undef Bcmp
9bccf70c
A
108#define Bcmp(a, b, l) \
109 (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (u_long)l))
1c79356b 110
91447636 111static int rn_lexobetter(void *m_arg, void *n_arg);
1c79356b 112static struct radix_mask *
91447636
A
113 rn_new_radix_mask(struct radix_node *tt,
114 struct radix_mask *next);
115static int rn_satsifies_leaf(char *trial, struct radix_node *leaf,
116 int skip);
1c79356b
A
117
118/*
119 * The data structure for the keys is a radix tree with one way
9bccf70c 120 * branching removed. The index rn_bit at an internal node n represents a bit
1c79356b 121 * position to be tested. The tree is arranged so that all descendants
9bccf70c
A
122 * of a node n have keys whose bits all agree up to position rn_bit - 1.
123 * (We say the index of n is rn_bit.)
1c79356b 124 *
9bccf70c 125 * There is at least one descendant which has a one bit at position rn_bit,
1c79356b
A
126 * and at least one with a zero there.
127 *
128 * A route is determined by a pair of key and mask. We require that the
129 * bit-wise logical and of the key and mask to be the key.
130 * We define the index of a route to associated with the mask to be
131 * the first bit number in the mask where 0 occurs (with bit number 0
132 * representing the highest order bit).
133 *
134 * We say a mask is normal if every bit is 0, past the index of the mask.
9bccf70c 135 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
1c79356b 136 * and m is a normal mask, then the route applies to every descendant of n.
9bccf70c 137 * If the index(m) < rn_bit, this implies the trailing last few bits of k
1c79356b
A
138 * before bit b are all 0, (and hence consequently true of every descendant
139 * of n), so the route applies to all descendants of the node as well.
140 *
141 * Similar logic shows that a non-normal mask m such that
142 * index(m) <= index(n) could potentially apply to many children of n.
143 * Thus, for each non-host route, we attach its mask to a list at an internal
144 * node as high in the tree as we can go.
145 *
146 * The present version of the code makes use of normal routes in short-
147 * circuiting an explict mask and compare operation when testing whether
148 * a key satisfies a normal route, and also in remembering the unique leaf
149 * that governs a subtree.
150 */
151
152static struct radix_node *
2d21ac55 153rn_search(void *v_arg, struct radix_node *head)
1c79356b 154{
2d21ac55
A
155 struct radix_node *x;
156 caddr_t v;
1c79356b 157
9bccf70c
A
158 for (x = head, v = v_arg; x->rn_bit >= 0;) {
159 if (x->rn_bmask & v[x->rn_offset])
160 x = x->rn_right;
1c79356b 161 else
9bccf70c 162 x = x->rn_left;
1c79356b
A
163 }
164 return (x);
165}
166
167static struct radix_node *
2d21ac55 168rn_search_m(void *v_arg, struct radix_node *head, void *m_arg)
1c79356b 169{
2d21ac55
A
170 struct radix_node *x;
171 caddr_t v = v_arg, m = m_arg;
1c79356b 172
9bccf70c
A
173 for (x = head; x->rn_bit >= 0;) {
174 if ((x->rn_bmask & m[x->rn_offset]) &&
175 (x->rn_bmask & v[x->rn_offset]))
176 x = x->rn_right;
1c79356b 177 else
9bccf70c 178 x = x->rn_left;
1c79356b
A
179 }
180 return x;
181}
182
183int
2d21ac55 184rn_refines(void *m_arg, void *n_arg)
1c79356b 185{
2d21ac55
A
186 caddr_t m = m_arg, n = n_arg;
187 caddr_t lim, lim2 = lim = n + *(u_char *)n;
1c79356b
A
188 int longer = (*(u_char *)n++) - (int)(*(u_char *)m++);
189 int masks_are_equal = 1;
190
191 if (longer > 0)
192 lim -= longer;
193 while (n < lim) {
194 if (*n & ~(*m))
195 return 0;
196 if (*n++ != *m++)
197 masks_are_equal = 0;
198 }
199 while (n < lim2)
200 if (*n++)
201 return 0;
202 if (masks_are_equal && (longer < 0))
203 for (lim2 = m - longer; m < lim2; )
204 if (*m++)
205 return 1;
206 return (!masks_are_equal);
207}
208
209struct radix_node *
2d21ac55 210rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
1c79356b 211{
2d21ac55
A
212 struct radix_node *x;
213 caddr_t netmask = NULL;
1c79356b
A
214
215 if (m_arg) {
9bccf70c
A
216 x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset);
217 if (x == 0)
2d21ac55 218 return (NULL);
1c79356b
A
219 netmask = x->rn_key;
220 }
221 x = rn_match(v_arg, head);
222 if (x && netmask) {
223 while (x && x->rn_mask != netmask)
224 x = x->rn_dupedkey;
225 }
226 return x;
227}
228
229static int
2d21ac55 230rn_satsifies_leaf(char *trial, struct radix_node *leaf, int skip)
1c79356b 231{
2d21ac55 232 char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
1c79356b
A
233 char *cplim;
234 int length = min(*(u_char *)cp, *(u_char *)cp2);
235
236 if (cp3 == 0)
237 cp3 = rn_ones;
238 else
239 length = min(length, *(u_char *)cp3);
240 cplim = cp + length; cp3 += skip; cp2 += skip;
241 for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
242 if ((*cp ^ *cp2) & *cp3)
243 return 0;
244 return 1;
245}
246
247struct radix_node *
2d21ac55 248rn_match(void *v_arg, struct radix_node_head *head)
1c79356b
A
249{
250 caddr_t v = v_arg;
2d21ac55
A
251 struct radix_node *t = head->rnh_treetop, *x;
252 caddr_t cp = v, cp2;
1c79356b
A
253 caddr_t cplim;
254 struct radix_node *saved_t, *top = t;
9bccf70c 255 int off = t->rn_offset, vlen = *(u_char *)cp, matched_off;
2d21ac55 256 int test, b, rn_bit;
1c79356b
A
257
258 /*
259 * Open code rn_search(v, top) to avoid overhead of extra
260 * subroutine call.
261 */
9bccf70c
A
262 for (; t->rn_bit >= 0; ) {
263 if (t->rn_bmask & cp[t->rn_offset])
264 t = t->rn_right;
1c79356b 265 else
9bccf70c 266 t = t->rn_left;
1c79356b
A
267 }
268 /*
269 * See if we match exactly as a host destination
270 * or at least learn how many bits match, for normal mask finesse.
271 *
272 * It doesn't hurt us to limit how many bytes to check
273 * to the length of the mask, since if it matches we had a genuine
274 * match and the leaf we have is the most specific one anyway;
275 * if it didn't match with a shorter length it would fail
276 * with a long one. This wins big for class B&C netmasks which
277 * are probably the most common case...
278 */
279 if (t->rn_mask)
280 vlen = *(u_char *)t->rn_mask;
281 cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
282 for (; cp < cplim; cp++, cp2++)
283 if (*cp != *cp2)
284 goto on1;
285 /*
286 * This extra grot is in case we are explicitly asked
287 * to look up the default. Ugh!
9bccf70c
A
288 *
289 * Never return the root node itself, it seems to cause a
290 * lot of confusion.
1c79356b 291 */
9bccf70c 292 if (t->rn_flags & RNF_ROOT)
1c79356b
A
293 t = t->rn_dupedkey;
294 return t;
295on1:
296 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
297 for (b = 7; (test >>= 1) > 0;)
298 b--;
299 matched_off = cp - v;
300 b += matched_off << 3;
9bccf70c 301 rn_bit = -1 - b;
1c79356b
A
302 /*
303 * If there is a host route in a duped-key chain, it will be first.
304 */
305 if ((saved_t = t)->rn_mask == 0)
306 t = t->rn_dupedkey;
307 for (; t; t = t->rn_dupedkey)
308 /*
309 * Even if we don't match exactly as a host,
310 * we may match if the leaf we wound up at is
311 * a route to a net.
312 */
313 if (t->rn_flags & RNF_NORMAL) {
9bccf70c 314 if (rn_bit <= t->rn_bit)
1c79356b
A
315 return t;
316 } else if (rn_satsifies_leaf(v, t, matched_off))
317 return t;
318 t = saved_t;
319 /* start searching up the tree */
320 do {
2d21ac55 321 struct radix_mask *m;
9bccf70c 322 t = t->rn_parent;
1c79356b 323 m = t->rn_mklist;
9bccf70c
A
324 /*
325 * If non-contiguous masks ever become important
326 * we can restore the masking and open coding of
327 * the search and satisfaction test and put the
328 * calculation of "off" back before the "do".
329 */
330 while (m) {
331 if (m->rm_flags & RNF_NORMAL) {
332 if (rn_bit <= m->rm_bit)
333 return (m->rm_leaf);
334 } else {
335 off = min(t->rn_offset, matched_off);
336 x = rn_search_m(v, t, m->rm_mask);
337 while (x && x->rn_mask != m->rm_mask)
338 x = x->rn_dupedkey;
339 if (x && rn_satsifies_leaf(v, x, off))
340 return x;
341 }
342 m = m->rm_mklist;
1c79356b
A
343 }
344 } while (t != top);
2d21ac55 345 return NULL;
1c79356b
A
346}
347
348#ifdef RN_DEBUG
349int rn_nodenum;
350struct radix_node *rn_clist;
351int rn_saveinfo;
352int rn_debug = 1;
353#endif
354
355static struct radix_node *
2d21ac55 356rn_newpair(void *v, int b, struct radix_node nodes[2])
1c79356b 357{
2d21ac55 358 struct radix_node *tt = nodes, *t = tt + 1;
9bccf70c
A
359 t->rn_bit = b;
360 t->rn_bmask = 0x80 >> (b & 7);
361 t->rn_left = tt;
362 t->rn_offset = b >> 3;
363 tt->rn_bit = -1;
364 tt->rn_key = (caddr_t)v;
365 tt->rn_parent = t;
1c79356b 366 tt->rn_flags = t->rn_flags = RNF_ACTIVE;
2d21ac55 367 tt->rn_mklist = t->rn_mklist = NULL;
1c79356b
A
368#ifdef RN_DEBUG
369 tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
9bccf70c
A
370 tt->rn_twin = t;
371 tt->rn_ybro = rn_clist;
372 rn_clist = tt;
1c79356b
A
373#endif
374 return t;
375}
376
377static struct radix_node *
2d21ac55
A
378rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry,
379 struct radix_node nodes[2])
1c79356b
A
380{
381 caddr_t v = v_arg;
382 struct radix_node *top = head->rnh_treetop;
9bccf70c 383 int head_off = top->rn_offset, vlen = (int)*((u_char *)v);
2d21ac55
A
384 struct radix_node *t = rn_search(v_arg, top);
385 caddr_t cp = v + head_off;
386 int b;
1c79356b
A
387 struct radix_node *tt;
388 /*
389 * Find first bit at which v and t->rn_key differ
390 */
391 {
2d21ac55
A
392 caddr_t cp2 = t->rn_key + head_off;
393 int cmp_res;
1c79356b
A
394 caddr_t cplim = v + vlen;
395
396 while (cp < cplim)
397 if (*cp2++ != *cp++)
398 goto on1;
399 *dupentry = 1;
400 return t;
401on1:
402 *dupentry = 0;
403 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
404 for (b = (cp - v) << 3; cmp_res; b--)
405 cmp_res >>= 1;
406 }
407 {
2d21ac55 408 struct radix_node *p, *x = top;
1c79356b
A
409 cp = v;
410 do {
411 p = x;
9bccf70c
A
412 if (cp[x->rn_offset] & x->rn_bmask)
413 x = x->rn_right;
414 else
415 x = x->rn_left;
416 } while (b > (unsigned) x->rn_bit);
417 /* x->rn_bit < b && x->rn_bit >= 0 */
1c79356b
A
418#ifdef RN_DEBUG
419 if (rn_debug)
420 log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p);
421#endif
9bccf70c
A
422 t = rn_newpair(v_arg, b, nodes);
423 tt = t->rn_left;
424 if ((cp[p->rn_offset] & p->rn_bmask) == 0)
425 p->rn_left = t;
1c79356b 426 else
9bccf70c
A
427 p->rn_right = t;
428 x->rn_parent = t;
429 t->rn_parent = p; /* frees x, p as temp vars below */
430 if ((cp[t->rn_offset] & t->rn_bmask) == 0) {
431 t->rn_right = x;
1c79356b 432 } else {
9bccf70c
A
433 t->rn_right = tt;
434 t->rn_left = x;
1c79356b
A
435 }
436#ifdef RN_DEBUG
437 if (rn_debug)
438 log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p);
439#endif
440 }
441 return (tt);
442}
443
444struct radix_node *
2d21ac55 445rn_addmask(void *n_arg, int search, int skip)
1c79356b
A
446{
447 caddr_t netmask = (caddr_t)n_arg;
2d21ac55
A
448 struct radix_node *x;
449 caddr_t cp, cplim;
450 int b = 0, mlen, j;
1c79356b
A
451 int maskduplicated, m0, isnormal;
452 struct radix_node *saved_x;
453 static int last_zeroed = 0;
454
455 if ((mlen = *(u_char *)netmask) > max_keylen)
456 mlen = max_keylen;
457 if (skip == 0)
458 skip = 1;
459 if (mlen <= skip)
460 return (mask_rnhead->rnh_nodes);
461 if (skip > 1)
462 Bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
463 if ((m0 = mlen) > skip)
464 Bcopy(netmask + skip, addmask_key + skip, mlen - skip);
465 /*
466 * Trim trailing zeroes.
467 */
468 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
469 cp--;
470 mlen = cp - addmask_key;
471 if (mlen <= skip) {
472 if (m0 >= last_zeroed)
473 last_zeroed = mlen;
474 return (mask_rnhead->rnh_nodes);
475 }
476 if (m0 < last_zeroed)
477 Bzero(addmask_key + m0, last_zeroed - m0);
478 *addmask_key = last_zeroed = mlen;
479 x = rn_search(addmask_key, rn_masktop);
480 if (Bcmp(addmask_key, x->rn_key, mlen) != 0)
2d21ac55 481 x = NULL;
1c79356b
A
482 if (x || search)
483 return (x);
484 R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
485 if ((saved_x = x) == 0)
2d21ac55 486 return (NULL);
1c79356b
A
487 Bzero(x, max_keylen + 2 * sizeof (*x));
488 netmask = cp = (caddr_t)(x + 2);
489 Bcopy(addmask_key, cp, mlen);
490 x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
491 if (maskduplicated) {
492 log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
91447636 493 R_Free(saved_x);
1c79356b
A
494 return (x);
495 }
6601e61a 496 mask_rnhead->rnh_cnt++;
1c79356b
A
497 /*
498 * Calculate index of mask, and check for normalcy.
499 */
500 cplim = netmask + mlen; isnormal = 1;
501 for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;)
502 cp++;
503 if (cp != cplim) {
504 for (j = 0x80; (j & *cp) != 0; j >>= 1)
505 b++;
506 if (*cp != normal_chars[b] || cp != (cplim - 1))
507 isnormal = 0;
508 }
509 b += (cp - netmask) << 3;
9bccf70c 510 x->rn_bit = -1 - b;
1c79356b
A
511 if (isnormal)
512 x->rn_flags |= RNF_NORMAL;
513 return (x);
514}
515
516static int /* XXX: arbitrary ordering for non-contiguous masks */
2d21ac55 517rn_lexobetter(void *m_arg, void *n_arg)
1c79356b 518{
2d21ac55 519 u_char *mp = m_arg, *np = n_arg, *lim;
1c79356b
A
520
521 if (*mp > *np)
522 return 1; /* not really, but need to check longer one first */
523 if (*mp == *np)
524 for (lim = mp + *mp; mp < lim;)
525 if (*mp++ > *np++)
526 return 1;
527 return 0;
528}
529
530static struct radix_mask *
2d21ac55 531rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next)
1c79356b 532{
2d21ac55 533 struct radix_mask *m;
1c79356b
A
534
535 MKGet(m);
536 if (m == 0) {
537 log(LOG_ERR, "Mask for route not entered\n");
2d21ac55 538 return (NULL);
1c79356b
A
539 }
540 Bzero(m, sizeof *m);
9bccf70c 541 m->rm_bit = tt->rn_bit;
1c79356b
A
542 m->rm_flags = tt->rn_flags;
543 if (tt->rn_flags & RNF_NORMAL)
544 m->rm_leaf = tt;
545 else
546 m->rm_mask = tt->rn_mask;
547 m->rm_mklist = next;
548 tt->rn_mklist = m;
549 return m;
550}
551
552struct radix_node *
2d21ac55
A
553rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
554 struct radix_node treenodes[2])
1c79356b
A
555{
556 caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
2d21ac55 557 struct radix_node *t, *x = NULL, *tt;
1c79356b
A
558 struct radix_node *saved_tt, *top = head->rnh_treetop;
559 short b = 0, b_leaf = 0;
560 int keyduplicated;
561 caddr_t mmask;
562 struct radix_mask *m, **mp;
563
564 /*
565 * In dealing with non-contiguous masks, there may be
566 * many different routes which have the same mask.
567 * We will find it useful to have a unique pointer to
568 * the mask to speed avoiding duplicate references at
569 * nodes and possibly save time in calculating indices.
570 */
571 if (netmask) {
9bccf70c 572 if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0)
2d21ac55 573 return (NULL);
9bccf70c
A
574 b_leaf = x->rn_bit;
575 b = -1 - x->rn_bit;
1c79356b
A
576 netmask = x->rn_key;
577 }
578 /*
579 * Deal with duplicated keys: attach node to previous instance
580 */
581 saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
582 if (keyduplicated) {
583 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
584 if (tt->rn_mask == netmask)
2d21ac55 585 return (NULL);
1c79356b
A
586 if (netmask == 0 ||
587 (tt->rn_mask &&
9bccf70c
A
588 ((b_leaf < tt->rn_bit) /* index(netmask) > node */
589 || rn_refines(netmask, tt->rn_mask)
590 || rn_lexobetter(netmask, tt->rn_mask))))
1c79356b
A
591 break;
592 }
593 /*
594 * If the mask is not duplicated, we wouldn't
595 * find it among possible duplicate key entries
596 * anyway, so the above test doesn't hurt.
597 *
598 * We sort the masks for a duplicated key the same way as
599 * in a masklist -- most specific to least specific.
600 * This may require the unfortunate nuisance of relocating
601 * the head of the list.
602 */
603 if (tt == saved_tt) {
604 struct radix_node *xx = x;
605 /* link in at head of list */
606 (tt = treenodes)->rn_dupedkey = t;
607 tt->rn_flags = t->rn_flags;
9bccf70c
A
608 tt->rn_parent = x = t->rn_parent;
609 t->rn_parent = tt; /* parent */
610 if (x->rn_left == t)
611 x->rn_left = tt;
612 else
613 x->rn_right = tt;
1c79356b
A
614 saved_tt = tt; x = xx;
615 } else {
616 (tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
617 t->rn_dupedkey = tt;
9bccf70c 618 tt->rn_parent = t; /* parent */
1c79356b 619 if (tt->rn_dupedkey) /* parent */
9bccf70c 620 tt->rn_dupedkey->rn_parent = tt; /* parent */
1c79356b
A
621 }
622#ifdef RN_DEBUG
623 t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
624 tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
625#endif
626 tt->rn_key = (caddr_t) v;
9bccf70c 627 tt->rn_bit = -1;
1c79356b
A
628 tt->rn_flags = RNF_ACTIVE;
629 }
6601e61a 630 head->rnh_cnt++;
1c79356b
A
631 /*
632 * Put mask in tree.
633 */
634 if (netmask) {
635 tt->rn_mask = netmask;
9bccf70c 636 tt->rn_bit = x->rn_bit;
1c79356b
A
637 tt->rn_flags |= x->rn_flags & RNF_NORMAL;
638 }
9bccf70c 639 t = saved_tt->rn_parent;
1c79356b
A
640 if (keyduplicated)
641 goto on2;
9bccf70c
A
642 b_leaf = -1 - t->rn_bit;
643 if (t->rn_right == saved_tt)
644 x = t->rn_left;
645 else
646 x = t->rn_right;
1c79356b 647 /* Promote general routes from below */
9bccf70c 648 if (x->rn_bit < 0) {
1c79356b 649 for (mp = &t->rn_mklist; x; x = x->rn_dupedkey)
9bccf70c 650 if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
2d21ac55 651 *mp = m = rn_new_radix_mask(x, NULL);
1c79356b
A
652 if (m)
653 mp = &m->rm_mklist;
654 }
655 } else if (x->rn_mklist) {
656 /*
657 * Skip over masks whose index is > that of new node
658 */
659 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
9bccf70c 660 if (m->rm_bit >= b_leaf)
1c79356b 661 break;
2d21ac55 662 t->rn_mklist = m; *mp = NULL;
1c79356b
A
663 }
664on2:
665 /* Add new route to highest possible ancestor's list */
9bccf70c 666 if ((netmask == 0) || (b > t->rn_bit ))
1c79356b 667 return tt; /* can't lift at all */
9bccf70c 668 b_leaf = tt->rn_bit;
1c79356b
A
669 do {
670 x = t;
9bccf70c
A
671 t = t->rn_parent;
672 } while (b <= t->rn_bit && x != top);
1c79356b
A
673 /*
674 * Search through routes associated with node to
675 * insert new route according to index.
676 * Need same criteria as when sorting dupedkeys to avoid
677 * double loop on deletion.
678 */
679 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
9bccf70c 680 if (m->rm_bit < b_leaf)
1c79356b 681 continue;
9bccf70c 682 if (m->rm_bit > b_leaf)
1c79356b
A
683 break;
684 if (m->rm_flags & RNF_NORMAL) {
685 mmask = m->rm_leaf->rn_mask;
686 if (tt->rn_flags & RNF_NORMAL) {
9bccf70c
A
687 log(LOG_ERR,
688 "Non-unique normal route, mask not entered");
1c79356b
A
689 return tt;
690 }
691 } else
692 mmask = m->rm_mask;
693 if (mmask == netmask) {
694 m->rm_refs++;
695 tt->rn_mklist = m;
696 return tt;
697 }
9bccf70c
A
698 if (rn_refines(netmask, mmask)
699 || rn_lexobetter(netmask, mmask))
1c79356b
A
700 break;
701 }
702 *mp = rn_new_radix_mask(tt, *mp);
703 return tt;
704}
705
706struct radix_node *
2d21ac55 707rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
1c79356b 708{
2d21ac55 709 struct radix_node *t, *p, *x, *tt;
1c79356b
A
710 struct radix_mask *m, *saved_m, **mp;
711 struct radix_node *dupedkey, *saved_tt, *top;
712 caddr_t v, netmask;
713 int b, head_off, vlen;
714
715 v = v_arg;
716 netmask = netmask_arg;
717 x = head->rnh_treetop;
718 tt = rn_search(v, x);
9bccf70c 719 head_off = x->rn_offset;
1c79356b
A
720 vlen = *(u_char *)v;
721 saved_tt = tt;
722 top = x;
723 if (tt == 0 ||
724 Bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
2d21ac55 725 return (NULL);
1c79356b
A
726 /*
727 * Delete our route from mask lists.
728 */
729 if (netmask) {
730 if ((x = rn_addmask(netmask, 1, head_off)) == 0)
2d21ac55 731 return (NULL);
1c79356b
A
732 netmask = x->rn_key;
733 while (tt->rn_mask != netmask)
734 if ((tt = tt->rn_dupedkey) == 0)
2d21ac55 735 return (NULL);
1c79356b
A
736 }
737 if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
738 goto on1;
739 if (tt->rn_flags & RNF_NORMAL) {
740 if (m->rm_leaf != tt || m->rm_refs > 0) {
741 log(LOG_ERR, "rn_delete: inconsistent annotation\n");
2d21ac55 742 return NULL; /* dangling ref could cause disaster */
1c79356b
A
743 }
744 } else {
745 if (m->rm_mask != tt->rn_mask) {
746 log(LOG_ERR, "rn_delete: inconsistent annotation\n");
747 goto on1;
748 }
749 if (--m->rm_refs >= 0)
750 goto on1;
751 }
9bccf70c
A
752 b = -1 - tt->rn_bit;
753 t = saved_tt->rn_parent;
754 if (b > t->rn_bit)
1c79356b
A
755 goto on1; /* Wasn't lifted at all */
756 do {
757 x = t;
9bccf70c
A
758 t = t->rn_parent;
759 } while (b <= t->rn_bit && x != top);
1c79356b
A
760 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
761 if (m == saved_m) {
762 *mp = m->rm_mklist;
763 MKFree(m);
764 break;
765 }
766 if (m == 0) {
767 log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
768 if (tt->rn_flags & RNF_NORMAL)
2d21ac55 769 return (NULL); /* Dangling ref to us */
1c79356b
A
770 }
771on1:
772 /*
773 * Eliminate us from tree
774 */
775 if (tt->rn_flags & RNF_ROOT)
2d21ac55 776 return (NULL);
6601e61a 777 head->rnh_cnt--;
1c79356b
A
778#ifdef RN_DEBUG
779 /* Get us out of the creation list */
780 for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {}
781 if (t) t->rn_ybro = tt->rn_ybro;
782#endif
9bccf70c 783 t = tt->rn_parent;
1c79356b
A
784 dupedkey = saved_tt->rn_dupedkey;
785 if (dupedkey) {
786 /*
787 * at this point, tt is the deletion target and saved_tt
788 * is the head of the dupekey chain
789 */
790 if (tt == saved_tt) {
791 /* remove from head of chain */
9bccf70c
A
792 x = dupedkey; x->rn_parent = t;
793 if (t->rn_left == tt)
794 t->rn_left = x;
795 else
796 t->rn_right = x;
1c79356b
A
797 } else {
798 /* find node in front of tt on the chain */
799 for (x = p = saved_tt; p && p->rn_dupedkey != tt;)
800 p = p->rn_dupedkey;
801 if (p) {
802 p->rn_dupedkey = tt->rn_dupedkey;
9bccf70c
A
803 if (tt->rn_dupedkey) /* parent */
804 tt->rn_dupedkey->rn_parent = p;
805 /* parent */
1c79356b
A
806 } else log(LOG_ERR, "rn_delete: couldn't find us\n");
807 }
808 t = tt + 1;
809 if (t->rn_flags & RNF_ACTIVE) {
810#ifndef RN_DEBUG
9bccf70c
A
811 *++x = *t;
812 p = t->rn_parent;
1c79356b 813#else
9bccf70c
A
814 b = t->rn_info;
815 *++x = *t;
816 t->rn_info = b;
817 p = t->rn_parent;
1c79356b 818#endif
9bccf70c
A
819 if (p->rn_left == t)
820 p->rn_left = x;
821 else
822 p->rn_right = x;
823 x->rn_left->rn_parent = x;
824 x->rn_right->rn_parent = x;
1c79356b
A
825 }
826 goto out;
827 }
9bccf70c
A
828 if (t->rn_left == tt)
829 x = t->rn_right;
830 else
831 x = t->rn_left;
832 p = t->rn_parent;
833 if (p->rn_right == t)
834 p->rn_right = x;
835 else
836 p->rn_left = x;
837 x->rn_parent = p;
1c79356b
A
838 /*
839 * Demote routes attached to us.
840 */
841 if (t->rn_mklist) {
9bccf70c 842 if (x->rn_bit >= 0) {
1c79356b
A
843 for (mp = &x->rn_mklist; (m = *mp);)
844 mp = &m->rm_mklist;
845 *mp = t->rn_mklist;
846 } else {
847 /* If there are any key,mask pairs in a sibling
848 duped-key chain, some subset will appear sorted
849 in the same order attached to our mklist */
850 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey)
851 if (m == x->rn_mklist) {
852 struct radix_mask *mm = m->rm_mklist;
2d21ac55 853 x->rn_mklist = NULL;
1c79356b
A
854 if (--(m->rm_refs) < 0)
855 MKFree(m);
856 m = mm;
857 }
858 if (m)
859 log(LOG_ERR,
860 "rn_delete: Orphaned Mask %p at %p\n",
861 (void *)m, (void *)x);
862 }
863 }
864 /*
865 * We may be holding an active internal node in the tree.
866 */
867 x = tt + 1;
868 if (t != x) {
869#ifndef RN_DEBUG
870 *t = *x;
871#else
9bccf70c
A
872 b = t->rn_info;
873 *t = *x;
874 t->rn_info = b;
1c79356b 875#endif
9bccf70c
A
876 t->rn_left->rn_parent = t;
877 t->rn_right->rn_parent = t;
878 p = x->rn_parent;
879 if (p->rn_left == x)
880 p->rn_left = t;
881 else
882 p->rn_right = t;
1c79356b
A
883 }
884out:
885 tt->rn_flags &= ~RNF_ACTIVE;
886 tt[1].rn_flags &= ~RNF_ACTIVE;
887 return (tt);
888}
889
890/*
891 * This is the same as rn_walktree() except for the parameters and the
892 * exit.
893 */
894static int
2d21ac55
A
895rn_walktree_from(struct radix_node_head *h, void *a, void *m, walktree_f_t *f,
896 void *w)
1c79356b
A
897{
898 int error;
899 struct radix_node *base, *next;
900 u_char *xa = (u_char *)a;
901 u_char *xm = (u_char *)m;
6601e61a
A
902 struct radix_node *rn, *last;
903 int stopping;
1c79356b 904 int lastb;
6601e61a
A
905 int rnh_cnt;
906
907 /*
908 * This gets complicated because we may delete the node while
909 * applying the function f to it; we cannot simply use the next
910 * leaf as the successor node in advance, because that leaf may
911 * be removed as well during deletion when it is a clone of the
912 * current node. When that happens, we would end up referring
913 * to an already-freed radix node as the successor node. To get
914 * around this issue, if we detect that the radix tree has changed
915 * in dimension (smaller than before), we simply restart the walk
916 * from the top of tree.
917 */
918restart:
919 last = NULL;
920 stopping = 0;
921 rnh_cnt = h->rnh_cnt;
1c79356b
A
922
923 /*
924 * rn_search_m is sort-of-open-coded here.
925 */
9bccf70c 926 for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) {
1c79356b 927 last = rn;
6601e61a 928 if (!(rn->rn_bmask & xm[rn->rn_offset]))
1c79356b 929 break;
6601e61a
A
930
931 if (rn->rn_bmask & xa[rn->rn_offset])
9bccf70c 932 rn = rn->rn_right;
6601e61a 933 else
9bccf70c 934 rn = rn->rn_left;
1c79356b 935 }
1c79356b
A
936
937 /*
938 * Two cases: either we stepped off the end of our mask,
939 * in which case last == rn, or we reached a leaf, in which
940 * case we want to start from the last node we looked at.
941 * Either way, last is the node we want to start from.
942 */
943 rn = last;
9bccf70c 944 lastb = rn->rn_bit;
1c79356b 945
6601e61a 946 /* First time through node, go left */
9bccf70c
A
947 while (rn->rn_bit >= 0)
948 rn = rn->rn_left;
1c79356b
A
949
950 while (!stopping) {
1c79356b
A
951 base = rn;
952 /* If at right child go back up, otherwise, go right */
9bccf70c
A
953 while (rn->rn_parent->rn_right == rn
954 && !(rn->rn_flags & RNF_ROOT)) {
955 rn = rn->rn_parent;
1c79356b
A
956
957 /* if went up beyond last, stop */
6601e61a 958 if (rn->rn_bit <= lastb) {
1c79356b 959 stopping = 1;
6601e61a
A
960 /*
961 * XXX we should jump to the 'Process leaves'
962 * part, because the values of 'rn' and 'next'
963 * we compute will not be used. Not a big deal
964 * because this loop will terminate, but it is
965 * inefficient and hard to understand!
966 */
1c79356b
A
967 }
968 }
969
2d21ac55
A
970 /*
971 * The following code (bug fix) inherited from FreeBSD is
972 * currently disabled, because our implementation uses the
973 * RTF_PRCLONING scheme that has been abandoned in current
974 * FreeBSD release. The scheme involves setting such a flag
975 * for the default route entry, and therefore all off-link
976 * destinations would become clones of that entry. Enabling
977 * the following code would be problematic at this point,
978 * because the removal of default route would cause only
979 * the left-half of the tree to be traversed, leaving the
980 * right-half untouched. If there are clones of the entry
981 * that reside in that right-half, they would not be deleted
982 * and would linger around until they expire or explicitly
983 * deleted, which is a very bad thing.
984 *
985 * This code should be uncommented only after we get rid
986 * of the RTF_PRCLONING scheme.
987 */
988#if 0
989 /*
990 * At the top of the tree, no need to traverse the right
991 * half, prevent the traversal of the entire tree in the
992 * case of default route.
993 */
994 if (rn->rn_parent->rn_flags & RNF_ROOT)
995 stopping = 1;
996#endif
997
6601e61a 998 /* Find the next *leaf* to start from */
9bccf70c
A
999 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
1000 rn = rn->rn_left;
1c79356b
A
1001 next = rn;
1002 /* Process leaves */
1003 while ((rn = base) != 0) {
1004 base = rn->rn_dupedkey;
1c79356b
A
1005 if (!(rn->rn_flags & RNF_ROOT)
1006 && (error = (*f)(rn, w)))
1007 return (error);
1008 }
6601e61a
A
1009 /* If one or more nodes got deleted, restart from top */
1010 if (h->rnh_cnt < rnh_cnt)
1011 goto restart;
1c79356b 1012 rn = next;
6601e61a 1013 if (rn->rn_flags & RNF_ROOT)
1c79356b 1014 stopping = 1;
1c79356b
A
1015 }
1016 return 0;
1017}
1018
1019static int
2d21ac55 1020rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
1c79356b
A
1021{
1022 int error;
1023 struct radix_node *base, *next;
6601e61a
A
1024 struct radix_node *rn;
1025 int rnh_cnt;
1026
1c79356b 1027 /*
6601e61a
A
1028 * This gets complicated because we may delete the node while
1029 * applying the function f to it; we cannot simply use the next
1030 * leaf as the successor node in advance, because that leaf may
1031 * be removed as well during deletion when it is a clone of the
1032 * current node. When that happens, we would end up referring
1033 * to an already-freed radix node as the successor node. To get
1034 * around this issue, if we detect that the radix tree has changed
1035 * in dimension (smaller than before), we simply restart the walk
1036 * from the top of tree.
1c79356b 1037 */
6601e61a
A
1038restart:
1039 rn = h->rnh_treetop;
1040 rnh_cnt = h->rnh_cnt;
1041
1c79356b 1042 /* First time through node, go left */
6601e61a
A
1043 while (rn->rn_bit >= 0)
1044 rn = rn->rn_left;
1c79356b
A
1045 for (;;) {
1046 base = rn;
1047 /* If at right child go back up, otherwise, go right */
6601e61a
A
1048 while (rn->rn_parent->rn_right == rn &&
1049 (rn->rn_flags & RNF_ROOT) == 0)
9bccf70c 1050 rn = rn->rn_parent;
6601e61a
A
1051 /* Find the next *leaf* to start from */
1052 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
9bccf70c 1053 rn = rn->rn_left;
1c79356b
A
1054 next = rn;
1055 /* Process leaves */
6601e61a 1056 while ((rn = base) != NULL) {
1c79356b 1057 base = rn->rn_dupedkey;
9bccf70c
A
1058 if (!(rn->rn_flags & RNF_ROOT)
1059 && (error = (*f)(rn, w)))
1c79356b
A
1060 return (error);
1061 }
6601e61a
A
1062 /* If one or more nodes got deleted, restart from top */
1063 if (h->rnh_cnt < rnh_cnt)
1064 goto restart;
1c79356b
A
1065 rn = next;
1066 if (rn->rn_flags & RNF_ROOT)
1067 return (0);
1068 }
1069 /* NOTREACHED */
1070}
1071
1072int
2d21ac55 1073rn_inithead(void **head, int off)
1c79356b 1074{
2d21ac55
A
1075 struct radix_node_head *rnh;
1076 struct radix_node *t, *tt, *ttt;
1c79356b
A
1077 if (*head)
1078 return (1);
1079 R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh));
1080 if (rnh == 0)
1081 return (0);
1082 Bzero(rnh, sizeof (*rnh));
1083 *head = rnh;
1084 t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
1085 ttt = rnh->rnh_nodes + 2;
9bccf70c
A
1086 t->rn_right = ttt;
1087 t->rn_parent = t;
1088 tt = t->rn_left;
1c79356b 1089 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
9bccf70c 1090 tt->rn_bit = -1 - off;
1c79356b
A
1091 *ttt = *tt;
1092 ttt->rn_key = rn_ones;
1093 rnh->rnh_addaddr = rn_addroute;
1094 rnh->rnh_deladdr = rn_delete;
1095 rnh->rnh_matchaddr = rn_match;
1096 rnh->rnh_lookup = rn_lookup;
1097 rnh->rnh_walktree = rn_walktree;
1098 rnh->rnh_walktree_from = rn_walktree_from;
1099 rnh->rnh_treetop = t;
6601e61a 1100 rnh->rnh_cnt = 3;
1c79356b
A
1101 return (1);
1102}
1103
1104void
2d21ac55 1105rn_init(void)
1c79356b
A
1106{
1107 char *cp, *cplim;
1108#ifdef KERNEL
1109 struct domain *dom;
1110
91447636 1111 /* lock already held when rn_init is called */
1c79356b
A
1112 for (dom = domains; dom; dom = dom->dom_next)
1113 if (dom->dom_maxrtkey > max_keylen)
1114 max_keylen = dom->dom_maxrtkey;
1115#endif
1116 if (max_keylen == 0) {
1117 log(LOG_ERR,
1118 "rn_init: radix functions require max_keylen be set\n");
1119 return;
1120 }
1121 R_Malloc(rn_zeros, char *, 3 * max_keylen);
1122 if (rn_zeros == NULL)
1123 panic("rn_init");
1124 Bzero(rn_zeros, 3 * max_keylen);
1125 rn_ones = cp = rn_zeros + max_keylen;
1126 addmask_key = cplim = rn_ones + max_keylen;
1127 while (cp < cplim)
1128 *cp++ = -1;
1129 if (rn_inithead((void **)&mask_rnhead, 0) == 0)
1130 panic("rn_init 2");
91447636
A
1131
1132 rn_mutex = lck_mtx_alloc_init(domain_proto_mtx_grp, domain_proto_mtx_attr);
1133}