]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/radix.c
xnu-1228.7.58.tar.gz
[apple/xnu.git] / bsd / net / radix.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988, 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)radix.c 8.4 (Berkeley) 11/2/94
61 * $FreeBSD: src/sys/net/radix.c,v 1.20.2.2 2001/03/06 00:56:50 obrien Exp $
62 */
63
64 /*
65 * Routines to build and maintain radix trees for routing lookups.
66 */
67 #ifndef _RADIX_H_
68 #include <sys/param.h>
69 #ifdef KERNEL
70 #include <sys/systm.h>
71 #include <sys/malloc.h>
72 #define M_DONTWAIT M_NOWAIT
73 #include <sys/domain.h>
74 #else
75 #include <stdlib.h>
76 #endif
77 #include <sys/syslog.h>
78 #include <net/radix.h>
79 #include <sys/socket.h>
80 #include <sys/socketvar.h>
81 #include <kern/locks.h>
82 #endif
83
84 static int rn_walktree_from(struct radix_node_head *h, void *a,
85 void *m, walktree_f_t *f, void *w);
86 static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
87 static struct radix_node
88 *rn_insert(void *, struct radix_node_head *, int *,
89 struct radix_node [2]),
90 *rn_newpair(void *, int, struct radix_node[2]),
91 *rn_search(void *, struct radix_node *),
92 *rn_search_m(void *, struct radix_node *, void *);
93
94 static int max_keylen;
95 static struct radix_mask *rn_mkfreelist;
96 static struct radix_node_head *mask_rnhead;
97 static char *addmask_key;
98 static char normal_chars[] = {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1};
99 static char *rn_zeros, *rn_ones;
100
101
102 extern lck_grp_t *domain_proto_mtx_grp;
103 extern lck_attr_t *domain_proto_mtx_attr;
104 lck_mtx_t *rn_mutex;
105
106 #define rn_masktop (mask_rnhead->rnh_treetop)
107 #undef Bcmp
108 #define Bcmp(a, b, l) \
109 (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (u_long)l))
110
111 static int rn_lexobetter(void *m_arg, void *n_arg);
112 static struct radix_mask *
113 rn_new_radix_mask(struct radix_node *tt,
114 struct radix_mask *next);
115 static int rn_satsifies_leaf(char *trial, struct radix_node *leaf,
116 int skip);
117
118 /*
119 * The data structure for the keys is a radix tree with one way
120 * branching removed. The index rn_bit at an internal node n represents a bit
121 * position to be tested. The tree is arranged so that all descendants
122 * of a node n have keys whose bits all agree up to position rn_bit - 1.
123 * (We say the index of n is rn_bit.)
124 *
125 * There is at least one descendant which has a one bit at position rn_bit,
126 * and at least one with a zero there.
127 *
128 * A route is determined by a pair of key and mask. We require that the
129 * bit-wise logical and of the key and mask to be the key.
130 * We define the index of a route to associated with the mask to be
131 * the first bit number in the mask where 0 occurs (with bit number 0
132 * representing the highest order bit).
133 *
134 * We say a mask is normal if every bit is 0, past the index of the mask.
135 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
136 * and m is a normal mask, then the route applies to every descendant of n.
137 * If the index(m) < rn_bit, this implies the trailing last few bits of k
138 * before bit b are all 0, (and hence consequently true of every descendant
139 * of n), so the route applies to all descendants of the node as well.
140 *
141 * Similar logic shows that a non-normal mask m such that
142 * index(m) <= index(n) could potentially apply to many children of n.
143 * Thus, for each non-host route, we attach its mask to a list at an internal
144 * node as high in the tree as we can go.
145 *
146 * The present version of the code makes use of normal routes in short-
147 * circuiting an explict mask and compare operation when testing whether
148 * a key satisfies a normal route, and also in remembering the unique leaf
149 * that governs a subtree.
150 */
151
152 static struct radix_node *
153 rn_search(void *v_arg, struct radix_node *head)
154 {
155 struct radix_node *x;
156 caddr_t v;
157
158 for (x = head, v = v_arg; x->rn_bit >= 0;) {
159 if (x->rn_bmask & v[x->rn_offset])
160 x = x->rn_right;
161 else
162 x = x->rn_left;
163 }
164 return (x);
165 }
166
167 static struct radix_node *
168 rn_search_m(void *v_arg, struct radix_node *head, void *m_arg)
169 {
170 struct radix_node *x;
171 caddr_t v = v_arg, m = m_arg;
172
173 for (x = head; x->rn_bit >= 0;) {
174 if ((x->rn_bmask & m[x->rn_offset]) &&
175 (x->rn_bmask & v[x->rn_offset]))
176 x = x->rn_right;
177 else
178 x = x->rn_left;
179 }
180 return x;
181 }
182
183 int
184 rn_refines(void *m_arg, void *n_arg)
185 {
186 caddr_t m = m_arg, n = n_arg;
187 caddr_t lim, lim2 = lim = n + *(u_char *)n;
188 int longer = (*(u_char *)n++) - (int)(*(u_char *)m++);
189 int masks_are_equal = 1;
190
191 if (longer > 0)
192 lim -= longer;
193 while (n < lim) {
194 if (*n & ~(*m))
195 return 0;
196 if (*n++ != *m++)
197 masks_are_equal = 0;
198 }
199 while (n < lim2)
200 if (*n++)
201 return 0;
202 if (masks_are_equal && (longer < 0))
203 for (lim2 = m - longer; m < lim2; )
204 if (*m++)
205 return 1;
206 return (!masks_are_equal);
207 }
208
209 struct radix_node *
210 rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
211 {
212 struct radix_node *x;
213 caddr_t netmask = NULL;
214
215 if (m_arg) {
216 x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset);
217 if (x == 0)
218 return (NULL);
219 netmask = x->rn_key;
220 }
221 x = rn_match(v_arg, head);
222 if (x && netmask) {
223 while (x && x->rn_mask != netmask)
224 x = x->rn_dupedkey;
225 }
226 return x;
227 }
228
229 static int
230 rn_satsifies_leaf(char *trial, struct radix_node *leaf, int skip)
231 {
232 char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
233 char *cplim;
234 int length = min(*(u_char *)cp, *(u_char *)cp2);
235
236 if (cp3 == 0)
237 cp3 = rn_ones;
238 else
239 length = min(length, *(u_char *)cp3);
240 cplim = cp + length; cp3 += skip; cp2 += skip;
241 for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
242 if ((*cp ^ *cp2) & *cp3)
243 return 0;
244 return 1;
245 }
246
247 struct radix_node *
248 rn_match(void *v_arg, struct radix_node_head *head)
249 {
250 caddr_t v = v_arg;
251 struct radix_node *t = head->rnh_treetop, *x;
252 caddr_t cp = v, cp2;
253 caddr_t cplim;
254 struct radix_node *saved_t, *top = t;
255 int off = t->rn_offset, vlen = *(u_char *)cp, matched_off;
256 int test, b, rn_bit;
257
258 /*
259 * Open code rn_search(v, top) to avoid overhead of extra
260 * subroutine call.
261 */
262 for (; t->rn_bit >= 0; ) {
263 if (t->rn_bmask & cp[t->rn_offset])
264 t = t->rn_right;
265 else
266 t = t->rn_left;
267 }
268 /*
269 * See if we match exactly as a host destination
270 * or at least learn how many bits match, for normal mask finesse.
271 *
272 * It doesn't hurt us to limit how many bytes to check
273 * to the length of the mask, since if it matches we had a genuine
274 * match and the leaf we have is the most specific one anyway;
275 * if it didn't match with a shorter length it would fail
276 * with a long one. This wins big for class B&C netmasks which
277 * are probably the most common case...
278 */
279 if (t->rn_mask)
280 vlen = *(u_char *)t->rn_mask;
281 cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
282 for (; cp < cplim; cp++, cp2++)
283 if (*cp != *cp2)
284 goto on1;
285 /*
286 * This extra grot is in case we are explicitly asked
287 * to look up the default. Ugh!
288 *
289 * Never return the root node itself, it seems to cause a
290 * lot of confusion.
291 */
292 if (t->rn_flags & RNF_ROOT)
293 t = t->rn_dupedkey;
294 return t;
295 on1:
296 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
297 for (b = 7; (test >>= 1) > 0;)
298 b--;
299 matched_off = cp - v;
300 b += matched_off << 3;
301 rn_bit = -1 - b;
302 /*
303 * If there is a host route in a duped-key chain, it will be first.
304 */
305 if ((saved_t = t)->rn_mask == 0)
306 t = t->rn_dupedkey;
307 for (; t; t = t->rn_dupedkey)
308 /*
309 * Even if we don't match exactly as a host,
310 * we may match if the leaf we wound up at is
311 * a route to a net.
312 */
313 if (t->rn_flags & RNF_NORMAL) {
314 if (rn_bit <= t->rn_bit)
315 return t;
316 } else if (rn_satsifies_leaf(v, t, matched_off))
317 return t;
318 t = saved_t;
319 /* start searching up the tree */
320 do {
321 struct radix_mask *m;
322 t = t->rn_parent;
323 m = t->rn_mklist;
324 /*
325 * If non-contiguous masks ever become important
326 * we can restore the masking and open coding of
327 * the search and satisfaction test and put the
328 * calculation of "off" back before the "do".
329 */
330 while (m) {
331 if (m->rm_flags & RNF_NORMAL) {
332 if (rn_bit <= m->rm_bit)
333 return (m->rm_leaf);
334 } else {
335 off = min(t->rn_offset, matched_off);
336 x = rn_search_m(v, t, m->rm_mask);
337 while (x && x->rn_mask != m->rm_mask)
338 x = x->rn_dupedkey;
339 if (x && rn_satsifies_leaf(v, x, off))
340 return x;
341 }
342 m = m->rm_mklist;
343 }
344 } while (t != top);
345 return NULL;
346 }
347
348 #ifdef RN_DEBUG
349 int rn_nodenum;
350 struct radix_node *rn_clist;
351 int rn_saveinfo;
352 int rn_debug = 1;
353 #endif
354
355 static struct radix_node *
356 rn_newpair(void *v, int b, struct radix_node nodes[2])
357 {
358 struct radix_node *tt = nodes, *t = tt + 1;
359 t->rn_bit = b;
360 t->rn_bmask = 0x80 >> (b & 7);
361 t->rn_left = tt;
362 t->rn_offset = b >> 3;
363 tt->rn_bit = -1;
364 tt->rn_key = (caddr_t)v;
365 tt->rn_parent = t;
366 tt->rn_flags = t->rn_flags = RNF_ACTIVE;
367 tt->rn_mklist = t->rn_mklist = NULL;
368 #ifdef RN_DEBUG
369 tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
370 tt->rn_twin = t;
371 tt->rn_ybro = rn_clist;
372 rn_clist = tt;
373 #endif
374 return t;
375 }
376
377 static struct radix_node *
378 rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry,
379 struct radix_node nodes[2])
380 {
381 caddr_t v = v_arg;
382 struct radix_node *top = head->rnh_treetop;
383 int head_off = top->rn_offset, vlen = (int)*((u_char *)v);
384 struct radix_node *t = rn_search(v_arg, top);
385 caddr_t cp = v + head_off;
386 int b;
387 struct radix_node *tt;
388 /*
389 * Find first bit at which v and t->rn_key differ
390 */
391 {
392 caddr_t cp2 = t->rn_key + head_off;
393 int cmp_res;
394 caddr_t cplim = v + vlen;
395
396 while (cp < cplim)
397 if (*cp2++ != *cp++)
398 goto on1;
399 *dupentry = 1;
400 return t;
401 on1:
402 *dupentry = 0;
403 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
404 for (b = (cp - v) << 3; cmp_res; b--)
405 cmp_res >>= 1;
406 }
407 {
408 struct radix_node *p, *x = top;
409 cp = v;
410 do {
411 p = x;
412 if (cp[x->rn_offset] & x->rn_bmask)
413 x = x->rn_right;
414 else
415 x = x->rn_left;
416 } while (b > (unsigned) x->rn_bit);
417 /* x->rn_bit < b && x->rn_bit >= 0 */
418 #ifdef RN_DEBUG
419 if (rn_debug)
420 log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p);
421 #endif
422 t = rn_newpair(v_arg, b, nodes);
423 tt = t->rn_left;
424 if ((cp[p->rn_offset] & p->rn_bmask) == 0)
425 p->rn_left = t;
426 else
427 p->rn_right = t;
428 x->rn_parent = t;
429 t->rn_parent = p; /* frees x, p as temp vars below */
430 if ((cp[t->rn_offset] & t->rn_bmask) == 0) {
431 t->rn_right = x;
432 } else {
433 t->rn_right = tt;
434 t->rn_left = x;
435 }
436 #ifdef RN_DEBUG
437 if (rn_debug)
438 log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p);
439 #endif
440 }
441 return (tt);
442 }
443
444 struct radix_node *
445 rn_addmask(void *n_arg, int search, int skip)
446 {
447 caddr_t netmask = (caddr_t)n_arg;
448 struct radix_node *x;
449 caddr_t cp, cplim;
450 int b = 0, mlen, j;
451 int maskduplicated, m0, isnormal;
452 struct radix_node *saved_x;
453 static int last_zeroed = 0;
454
455 if ((mlen = *(u_char *)netmask) > max_keylen)
456 mlen = max_keylen;
457 if (skip == 0)
458 skip = 1;
459 if (mlen <= skip)
460 return (mask_rnhead->rnh_nodes);
461 if (skip > 1)
462 Bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
463 if ((m0 = mlen) > skip)
464 Bcopy(netmask + skip, addmask_key + skip, mlen - skip);
465 /*
466 * Trim trailing zeroes.
467 */
468 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
469 cp--;
470 mlen = cp - addmask_key;
471 if (mlen <= skip) {
472 if (m0 >= last_zeroed)
473 last_zeroed = mlen;
474 return (mask_rnhead->rnh_nodes);
475 }
476 if (m0 < last_zeroed)
477 Bzero(addmask_key + m0, last_zeroed - m0);
478 *addmask_key = last_zeroed = mlen;
479 x = rn_search(addmask_key, rn_masktop);
480 if (Bcmp(addmask_key, x->rn_key, mlen) != 0)
481 x = NULL;
482 if (x || search)
483 return (x);
484 R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
485 if ((saved_x = x) == 0)
486 return (NULL);
487 Bzero(x, max_keylen + 2 * sizeof (*x));
488 netmask = cp = (caddr_t)(x + 2);
489 Bcopy(addmask_key, cp, mlen);
490 x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
491 if (maskduplicated) {
492 log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
493 R_Free(saved_x);
494 return (x);
495 }
496 mask_rnhead->rnh_cnt++;
497 /*
498 * Calculate index of mask, and check for normalcy.
499 */
500 cplim = netmask + mlen; isnormal = 1;
501 for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;)
502 cp++;
503 if (cp != cplim) {
504 for (j = 0x80; (j & *cp) != 0; j >>= 1)
505 b++;
506 if (*cp != normal_chars[b] || cp != (cplim - 1))
507 isnormal = 0;
508 }
509 b += (cp - netmask) << 3;
510 x->rn_bit = -1 - b;
511 if (isnormal)
512 x->rn_flags |= RNF_NORMAL;
513 return (x);
514 }
515
516 static int /* XXX: arbitrary ordering for non-contiguous masks */
517 rn_lexobetter(void *m_arg, void *n_arg)
518 {
519 u_char *mp = m_arg, *np = n_arg, *lim;
520
521 if (*mp > *np)
522 return 1; /* not really, but need to check longer one first */
523 if (*mp == *np)
524 for (lim = mp + *mp; mp < lim;)
525 if (*mp++ > *np++)
526 return 1;
527 return 0;
528 }
529
530 static struct radix_mask *
531 rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next)
532 {
533 struct radix_mask *m;
534
535 MKGet(m);
536 if (m == 0) {
537 log(LOG_ERR, "Mask for route not entered\n");
538 return (NULL);
539 }
540 Bzero(m, sizeof *m);
541 m->rm_bit = tt->rn_bit;
542 m->rm_flags = tt->rn_flags;
543 if (tt->rn_flags & RNF_NORMAL)
544 m->rm_leaf = tt;
545 else
546 m->rm_mask = tt->rn_mask;
547 m->rm_mklist = next;
548 tt->rn_mklist = m;
549 return m;
550 }
551
552 struct radix_node *
553 rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
554 struct radix_node treenodes[2])
555 {
556 caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
557 struct radix_node *t, *x = NULL, *tt;
558 struct radix_node *saved_tt, *top = head->rnh_treetop;
559 short b = 0, b_leaf = 0;
560 int keyduplicated;
561 caddr_t mmask;
562 struct radix_mask *m, **mp;
563
564 /*
565 * In dealing with non-contiguous masks, there may be
566 * many different routes which have the same mask.
567 * We will find it useful to have a unique pointer to
568 * the mask to speed avoiding duplicate references at
569 * nodes and possibly save time in calculating indices.
570 */
571 if (netmask) {
572 if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0)
573 return (NULL);
574 b_leaf = x->rn_bit;
575 b = -1 - x->rn_bit;
576 netmask = x->rn_key;
577 }
578 /*
579 * Deal with duplicated keys: attach node to previous instance
580 */
581 saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
582 if (keyduplicated) {
583 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
584 if (tt->rn_mask == netmask)
585 return (NULL);
586 if (netmask == 0 ||
587 (tt->rn_mask &&
588 ((b_leaf < tt->rn_bit) /* index(netmask) > node */
589 || rn_refines(netmask, tt->rn_mask)
590 || rn_lexobetter(netmask, tt->rn_mask))))
591 break;
592 }
593 /*
594 * If the mask is not duplicated, we wouldn't
595 * find it among possible duplicate key entries
596 * anyway, so the above test doesn't hurt.
597 *
598 * We sort the masks for a duplicated key the same way as
599 * in a masklist -- most specific to least specific.
600 * This may require the unfortunate nuisance of relocating
601 * the head of the list.
602 */
603 if (tt == saved_tt) {
604 struct radix_node *xx = x;
605 /* link in at head of list */
606 (tt = treenodes)->rn_dupedkey = t;
607 tt->rn_flags = t->rn_flags;
608 tt->rn_parent = x = t->rn_parent;
609 t->rn_parent = tt; /* parent */
610 if (x->rn_left == t)
611 x->rn_left = tt;
612 else
613 x->rn_right = tt;
614 saved_tt = tt; x = xx;
615 } else {
616 (tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
617 t->rn_dupedkey = tt;
618 tt->rn_parent = t; /* parent */
619 if (tt->rn_dupedkey) /* parent */
620 tt->rn_dupedkey->rn_parent = tt; /* parent */
621 }
622 #ifdef RN_DEBUG
623 t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
624 tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
625 #endif
626 tt->rn_key = (caddr_t) v;
627 tt->rn_bit = -1;
628 tt->rn_flags = RNF_ACTIVE;
629 }
630 head->rnh_cnt++;
631 /*
632 * Put mask in tree.
633 */
634 if (netmask) {
635 tt->rn_mask = netmask;
636 tt->rn_bit = x->rn_bit;
637 tt->rn_flags |= x->rn_flags & RNF_NORMAL;
638 }
639 t = saved_tt->rn_parent;
640 if (keyduplicated)
641 goto on2;
642 b_leaf = -1 - t->rn_bit;
643 if (t->rn_right == saved_tt)
644 x = t->rn_left;
645 else
646 x = t->rn_right;
647 /* Promote general routes from below */
648 if (x->rn_bit < 0) {
649 for (mp = &t->rn_mklist; x; x = x->rn_dupedkey)
650 if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
651 *mp = m = rn_new_radix_mask(x, NULL);
652 if (m)
653 mp = &m->rm_mklist;
654 }
655 } else if (x->rn_mklist) {
656 /*
657 * Skip over masks whose index is > that of new node
658 */
659 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
660 if (m->rm_bit >= b_leaf)
661 break;
662 t->rn_mklist = m; *mp = NULL;
663 }
664 on2:
665 /* Add new route to highest possible ancestor's list */
666 if ((netmask == 0) || (b > t->rn_bit ))
667 return tt; /* can't lift at all */
668 b_leaf = tt->rn_bit;
669 do {
670 x = t;
671 t = t->rn_parent;
672 } while (b <= t->rn_bit && x != top);
673 /*
674 * Search through routes associated with node to
675 * insert new route according to index.
676 * Need same criteria as when sorting dupedkeys to avoid
677 * double loop on deletion.
678 */
679 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
680 if (m->rm_bit < b_leaf)
681 continue;
682 if (m->rm_bit > b_leaf)
683 break;
684 if (m->rm_flags & RNF_NORMAL) {
685 mmask = m->rm_leaf->rn_mask;
686 if (tt->rn_flags & RNF_NORMAL) {
687 log(LOG_ERR,
688 "Non-unique normal route, mask not entered");
689 return tt;
690 }
691 } else
692 mmask = m->rm_mask;
693 if (mmask == netmask) {
694 m->rm_refs++;
695 tt->rn_mklist = m;
696 return tt;
697 }
698 if (rn_refines(netmask, mmask)
699 || rn_lexobetter(netmask, mmask))
700 break;
701 }
702 *mp = rn_new_radix_mask(tt, *mp);
703 return tt;
704 }
705
706 struct radix_node *
707 rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
708 {
709 struct radix_node *t, *p, *x, *tt;
710 struct radix_mask *m, *saved_m, **mp;
711 struct radix_node *dupedkey, *saved_tt, *top;
712 caddr_t v, netmask;
713 int b, head_off, vlen;
714
715 v = v_arg;
716 netmask = netmask_arg;
717 x = head->rnh_treetop;
718 tt = rn_search(v, x);
719 head_off = x->rn_offset;
720 vlen = *(u_char *)v;
721 saved_tt = tt;
722 top = x;
723 if (tt == 0 ||
724 Bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
725 return (NULL);
726 /*
727 * Delete our route from mask lists.
728 */
729 if (netmask) {
730 if ((x = rn_addmask(netmask, 1, head_off)) == 0)
731 return (NULL);
732 netmask = x->rn_key;
733 while (tt->rn_mask != netmask)
734 if ((tt = tt->rn_dupedkey) == 0)
735 return (NULL);
736 }
737 if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
738 goto on1;
739 if (tt->rn_flags & RNF_NORMAL) {
740 if (m->rm_leaf != tt || m->rm_refs > 0) {
741 log(LOG_ERR, "rn_delete: inconsistent annotation\n");
742 return NULL; /* dangling ref could cause disaster */
743 }
744 } else {
745 if (m->rm_mask != tt->rn_mask) {
746 log(LOG_ERR, "rn_delete: inconsistent annotation\n");
747 goto on1;
748 }
749 if (--m->rm_refs >= 0)
750 goto on1;
751 }
752 b = -1 - tt->rn_bit;
753 t = saved_tt->rn_parent;
754 if (b > t->rn_bit)
755 goto on1; /* Wasn't lifted at all */
756 do {
757 x = t;
758 t = t->rn_parent;
759 } while (b <= t->rn_bit && x != top);
760 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
761 if (m == saved_m) {
762 *mp = m->rm_mklist;
763 MKFree(m);
764 break;
765 }
766 if (m == 0) {
767 log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
768 if (tt->rn_flags & RNF_NORMAL)
769 return (NULL); /* Dangling ref to us */
770 }
771 on1:
772 /*
773 * Eliminate us from tree
774 */
775 if (tt->rn_flags & RNF_ROOT)
776 return (NULL);
777 head->rnh_cnt--;
778 #ifdef RN_DEBUG
779 /* Get us out of the creation list */
780 for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {}
781 if (t) t->rn_ybro = tt->rn_ybro;
782 #endif
783 t = tt->rn_parent;
784 dupedkey = saved_tt->rn_dupedkey;
785 if (dupedkey) {
786 /*
787 * at this point, tt is the deletion target and saved_tt
788 * is the head of the dupekey chain
789 */
790 if (tt == saved_tt) {
791 /* remove from head of chain */
792 x = dupedkey; x->rn_parent = t;
793 if (t->rn_left == tt)
794 t->rn_left = x;
795 else
796 t->rn_right = x;
797 } else {
798 /* find node in front of tt on the chain */
799 for (x = p = saved_tt; p && p->rn_dupedkey != tt;)
800 p = p->rn_dupedkey;
801 if (p) {
802 p->rn_dupedkey = tt->rn_dupedkey;
803 if (tt->rn_dupedkey) /* parent */
804 tt->rn_dupedkey->rn_parent = p;
805 /* parent */
806 } else log(LOG_ERR, "rn_delete: couldn't find us\n");
807 }
808 t = tt + 1;
809 if (t->rn_flags & RNF_ACTIVE) {
810 #ifndef RN_DEBUG
811 *++x = *t;
812 p = t->rn_parent;
813 #else
814 b = t->rn_info;
815 *++x = *t;
816 t->rn_info = b;
817 p = t->rn_parent;
818 #endif
819 if (p->rn_left == t)
820 p->rn_left = x;
821 else
822 p->rn_right = x;
823 x->rn_left->rn_parent = x;
824 x->rn_right->rn_parent = x;
825 }
826 goto out;
827 }
828 if (t->rn_left == tt)
829 x = t->rn_right;
830 else
831 x = t->rn_left;
832 p = t->rn_parent;
833 if (p->rn_right == t)
834 p->rn_right = x;
835 else
836 p->rn_left = x;
837 x->rn_parent = p;
838 /*
839 * Demote routes attached to us.
840 */
841 if (t->rn_mklist) {
842 if (x->rn_bit >= 0) {
843 for (mp = &x->rn_mklist; (m = *mp);)
844 mp = &m->rm_mklist;
845 *mp = t->rn_mklist;
846 } else {
847 /* If there are any key,mask pairs in a sibling
848 duped-key chain, some subset will appear sorted
849 in the same order attached to our mklist */
850 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey)
851 if (m == x->rn_mklist) {
852 struct radix_mask *mm = m->rm_mklist;
853 x->rn_mklist = NULL;
854 if (--(m->rm_refs) < 0)
855 MKFree(m);
856 m = mm;
857 }
858 if (m)
859 log(LOG_ERR,
860 "rn_delete: Orphaned Mask %p at %p\n",
861 (void *)m, (void *)x);
862 }
863 }
864 /*
865 * We may be holding an active internal node in the tree.
866 */
867 x = tt + 1;
868 if (t != x) {
869 #ifndef RN_DEBUG
870 *t = *x;
871 #else
872 b = t->rn_info;
873 *t = *x;
874 t->rn_info = b;
875 #endif
876 t->rn_left->rn_parent = t;
877 t->rn_right->rn_parent = t;
878 p = x->rn_parent;
879 if (p->rn_left == x)
880 p->rn_left = t;
881 else
882 p->rn_right = t;
883 }
884 out:
885 tt->rn_flags &= ~RNF_ACTIVE;
886 tt[1].rn_flags &= ~RNF_ACTIVE;
887 return (tt);
888 }
889
890 /*
891 * This is the same as rn_walktree() except for the parameters and the
892 * exit.
893 */
894 static int
895 rn_walktree_from(struct radix_node_head *h, void *a, void *m, walktree_f_t *f,
896 void *w)
897 {
898 int error;
899 struct radix_node *base, *next;
900 u_char *xa = (u_char *)a;
901 u_char *xm = (u_char *)m;
902 struct radix_node *rn, *last;
903 int stopping;
904 int lastb;
905 int rnh_cnt;
906
907 /*
908 * This gets complicated because we may delete the node while
909 * applying the function f to it; we cannot simply use the next
910 * leaf as the successor node in advance, because that leaf may
911 * be removed as well during deletion when it is a clone of the
912 * current node. When that happens, we would end up referring
913 * to an already-freed radix node as the successor node. To get
914 * around this issue, if we detect that the radix tree has changed
915 * in dimension (smaller than before), we simply restart the walk
916 * from the top of tree.
917 */
918 restart:
919 last = NULL;
920 stopping = 0;
921 rnh_cnt = h->rnh_cnt;
922
923 /*
924 * rn_search_m is sort-of-open-coded here.
925 */
926 for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) {
927 last = rn;
928 if (!(rn->rn_bmask & xm[rn->rn_offset]))
929 break;
930
931 if (rn->rn_bmask & xa[rn->rn_offset])
932 rn = rn->rn_right;
933 else
934 rn = rn->rn_left;
935 }
936
937 /*
938 * Two cases: either we stepped off the end of our mask,
939 * in which case last == rn, or we reached a leaf, in which
940 * case we want to start from the last node we looked at.
941 * Either way, last is the node we want to start from.
942 */
943 rn = last;
944 lastb = rn->rn_bit;
945
946 /* First time through node, go left */
947 while (rn->rn_bit >= 0)
948 rn = rn->rn_left;
949
950 while (!stopping) {
951 base = rn;
952 /* If at right child go back up, otherwise, go right */
953 while (rn->rn_parent->rn_right == rn
954 && !(rn->rn_flags & RNF_ROOT)) {
955 rn = rn->rn_parent;
956
957 /* if went up beyond last, stop */
958 if (rn->rn_bit <= lastb) {
959 stopping = 1;
960 /*
961 * XXX we should jump to the 'Process leaves'
962 * part, because the values of 'rn' and 'next'
963 * we compute will not be used. Not a big deal
964 * because this loop will terminate, but it is
965 * inefficient and hard to understand!
966 */
967 }
968 }
969
970 /*
971 * The following code (bug fix) inherited from FreeBSD is
972 * currently disabled, because our implementation uses the
973 * RTF_PRCLONING scheme that has been abandoned in current
974 * FreeBSD release. The scheme involves setting such a flag
975 * for the default route entry, and therefore all off-link
976 * destinations would become clones of that entry. Enabling
977 * the following code would be problematic at this point,
978 * because the removal of default route would cause only
979 * the left-half of the tree to be traversed, leaving the
980 * right-half untouched. If there are clones of the entry
981 * that reside in that right-half, they would not be deleted
982 * and would linger around until they expire or explicitly
983 * deleted, which is a very bad thing.
984 *
985 * This code should be uncommented only after we get rid
986 * of the RTF_PRCLONING scheme.
987 */
988 #if 0
989 /*
990 * At the top of the tree, no need to traverse the right
991 * half, prevent the traversal of the entire tree in the
992 * case of default route.
993 */
994 if (rn->rn_parent->rn_flags & RNF_ROOT)
995 stopping = 1;
996 #endif
997
998 /* Find the next *leaf* to start from */
999 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
1000 rn = rn->rn_left;
1001 next = rn;
1002 /* Process leaves */
1003 while ((rn = base) != 0) {
1004 base = rn->rn_dupedkey;
1005 if (!(rn->rn_flags & RNF_ROOT)
1006 && (error = (*f)(rn, w)))
1007 return (error);
1008 }
1009 /* If one or more nodes got deleted, restart from top */
1010 if (h->rnh_cnt < rnh_cnt)
1011 goto restart;
1012 rn = next;
1013 if (rn->rn_flags & RNF_ROOT)
1014 stopping = 1;
1015 }
1016 return 0;
1017 }
1018
1019 static int
1020 rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
1021 {
1022 int error;
1023 struct radix_node *base, *next;
1024 struct radix_node *rn;
1025 int rnh_cnt;
1026
1027 /*
1028 * This gets complicated because we may delete the node while
1029 * applying the function f to it; we cannot simply use the next
1030 * leaf as the successor node in advance, because that leaf may
1031 * be removed as well during deletion when it is a clone of the
1032 * current node. When that happens, we would end up referring
1033 * to an already-freed radix node as the successor node. To get
1034 * around this issue, if we detect that the radix tree has changed
1035 * in dimension (smaller than before), we simply restart the walk
1036 * from the top of tree.
1037 */
1038 restart:
1039 rn = h->rnh_treetop;
1040 rnh_cnt = h->rnh_cnt;
1041
1042 /* First time through node, go left */
1043 while (rn->rn_bit >= 0)
1044 rn = rn->rn_left;
1045 for (;;) {
1046 base = rn;
1047 /* If at right child go back up, otherwise, go right */
1048 while (rn->rn_parent->rn_right == rn &&
1049 (rn->rn_flags & RNF_ROOT) == 0)
1050 rn = rn->rn_parent;
1051 /* Find the next *leaf* to start from */
1052 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
1053 rn = rn->rn_left;
1054 next = rn;
1055 /* Process leaves */
1056 while ((rn = base) != NULL) {
1057 base = rn->rn_dupedkey;
1058 if (!(rn->rn_flags & RNF_ROOT)
1059 && (error = (*f)(rn, w)))
1060 return (error);
1061 }
1062 /* If one or more nodes got deleted, restart from top */
1063 if (h->rnh_cnt < rnh_cnt)
1064 goto restart;
1065 rn = next;
1066 if (rn->rn_flags & RNF_ROOT)
1067 return (0);
1068 }
1069 /* NOTREACHED */
1070 }
1071
1072 int
1073 rn_inithead(void **head, int off)
1074 {
1075 struct radix_node_head *rnh;
1076 struct radix_node *t, *tt, *ttt;
1077 if (*head)
1078 return (1);
1079 R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh));
1080 if (rnh == 0)
1081 return (0);
1082 Bzero(rnh, sizeof (*rnh));
1083 *head = rnh;
1084 t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
1085 ttt = rnh->rnh_nodes + 2;
1086 t->rn_right = ttt;
1087 t->rn_parent = t;
1088 tt = t->rn_left;
1089 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
1090 tt->rn_bit = -1 - off;
1091 *ttt = *tt;
1092 ttt->rn_key = rn_ones;
1093 rnh->rnh_addaddr = rn_addroute;
1094 rnh->rnh_deladdr = rn_delete;
1095 rnh->rnh_matchaddr = rn_match;
1096 rnh->rnh_lookup = rn_lookup;
1097 rnh->rnh_walktree = rn_walktree;
1098 rnh->rnh_walktree_from = rn_walktree_from;
1099 rnh->rnh_treetop = t;
1100 rnh->rnh_cnt = 3;
1101 return (1);
1102 }
1103
1104 void
1105 rn_init(void)
1106 {
1107 char *cp, *cplim;
1108 #ifdef KERNEL
1109 struct domain *dom;
1110
1111 /* lock already held when rn_init is called */
1112 for (dom = domains; dom; dom = dom->dom_next)
1113 if (dom->dom_maxrtkey > max_keylen)
1114 max_keylen = dom->dom_maxrtkey;
1115 #endif
1116 if (max_keylen == 0) {
1117 log(LOG_ERR,
1118 "rn_init: radix functions require max_keylen be set\n");
1119 return;
1120 }
1121 R_Malloc(rn_zeros, char *, 3 * max_keylen);
1122 if (rn_zeros == NULL)
1123 panic("rn_init");
1124 Bzero(rn_zeros, 3 * max_keylen);
1125 rn_ones = cp = rn_zeros + max_keylen;
1126 addmask_key = cplim = rn_ones + max_keylen;
1127 while (cp < cplim)
1128 *cp++ = -1;
1129 if (rn_inithead((void **)&mask_rnhead, 0) == 0)
1130 panic("rn_init 2");
1131
1132 rn_mutex = lck_mtx_alloc_init(domain_proto_mtx_grp, domain_proto_mtx_attr);
1133 }