]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/radix.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / net / radix.c
1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988, 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)radix.c 8.4 (Berkeley) 11/2/94
61 * $FreeBSD: src/sys/net/radix.c,v 1.20.2.2 2001/03/06 00:56:50 obrien Exp $
62 */
63
64 /*
65 * Routines to build and maintain radix trees for routing lookups.
66 */
67 #ifndef _RADIX_H_
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/malloc.h>
71 #define M_DONTWAIT M_NOWAIT
72 #include <sys/domain.h>
73 #include <sys/syslog.h>
74 #include <net/radix.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <kern/locks.h>
78 #endif
79
80 static int rn_walktree_from(struct radix_node_head *h, void *a,
81 void *m, walktree_f_t *f, void *w);
82 static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
83 static struct radix_node
84 *rn_insert(void *, struct radix_node_head *, int *,
85 struct radix_node[2]),
86 *rn_newpair(void *, int, struct radix_node[2]),
87 *rn_search(void *, struct radix_node *),
88 *rn_search_m(void *, struct radix_node *, void *);
89
90 static int max_keylen;
91 static struct radix_mask *rn_mkfreelist;
92 static struct radix_node_head *mask_rnhead;
93 static char *addmask_key;
94 static char normal_chars[] = {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1};
95 static char *rn_zeros, *rn_ones;
96
97
98 #define rn_masktop (mask_rnhead->rnh_treetop)
99 #undef Bcmp
100 #define Bcmp(a, b, l) \
101 (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (uint32_t)l))
102
103 static int rn_lexobetter(void *m_arg, void *n_arg);
104 static struct radix_mask *
105 rn_new_radix_mask(struct radix_node *tt,
106 struct radix_mask *next);
107 static int rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip,
108 rn_matchf_t *f, void *w);
109
110 #define RN_MATCHF(rn, f, arg) (f == NULL || (*f)((rn), arg))
111
112 /*
113 * The data structure for the keys is a radix tree with one way
114 * branching removed. The index rn_bit at an internal node n represents a bit
115 * position to be tested. The tree is arranged so that all descendants
116 * of a node n have keys whose bits all agree up to position rn_bit - 1.
117 * (We say the index of n is rn_bit.)
118 *
119 * There is at least one descendant which has a one bit at position rn_bit,
120 * and at least one with a zero there.
121 *
122 * A route is determined by a pair of key and mask. We require that the
123 * bit-wise logical and of the key and mask to be the key.
124 * We define the index of a route to associated with the mask to be
125 * the first bit number in the mask where 0 occurs (with bit number 0
126 * representing the highest order bit).
127 *
128 * We say a mask is normal if every bit is 0, past the index of the mask.
129 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
130 * and m is a normal mask, then the route applies to every descendant of n.
131 * If the index(m) < rn_bit, this implies the trailing last few bits of k
132 * before bit b are all 0, (and hence consequently true of every descendant
133 * of n), so the route applies to all descendants of the node as well.
134 *
135 * Similar logic shows that a non-normal mask m such that
136 * index(m) <= index(n) could potentially apply to many children of n.
137 * Thus, for each non-host route, we attach its mask to a list at an internal
138 * node as high in the tree as we can go.
139 *
140 * The present version of the code makes use of normal routes in short-
141 * circuiting an explict mask and compare operation when testing whether
142 * a key satisfies a normal route, and also in remembering the unique leaf
143 * that governs a subtree.
144 */
145
146 static struct radix_node *
147 rn_search(void *v_arg, struct radix_node *head)
148 {
149 struct radix_node *x;
150 caddr_t v;
151
152 for (x = head, v = v_arg; x->rn_bit >= 0;) {
153 if (x->rn_bmask & v[x->rn_offset]) {
154 x = x->rn_right;
155 } else {
156 x = x->rn_left;
157 }
158 }
159 return x;
160 }
161
162 static struct radix_node *
163 rn_search_m(void *v_arg, struct radix_node *head, void *m_arg)
164 {
165 struct radix_node *x;
166 caddr_t v = v_arg, m = m_arg;
167
168 for (x = head; x->rn_bit >= 0;) {
169 if ((x->rn_bmask & m[x->rn_offset]) &&
170 (x->rn_bmask & v[x->rn_offset])) {
171 x = x->rn_right;
172 } else {
173 x = x->rn_left;
174 }
175 }
176 return x;
177 }
178
179 int
180 rn_refines(void *m_arg, void *n_arg)
181 {
182 caddr_t m = m_arg, n = n_arg;
183 caddr_t lim, lim2 = lim = n + *(u_char *)n;
184 int longer = (*(u_char *)n++) - (int)(*(u_char *)m++);
185 int masks_are_equal = 1;
186
187 if (longer > 0) {
188 lim -= longer;
189 }
190 while (n < lim) {
191 if (*n & ~(*m)) {
192 return 0;
193 }
194 if (*n++ != *m++) {
195 masks_are_equal = 0;
196 }
197 }
198 while (n < lim2) {
199 if (*n++) {
200 return 0;
201 }
202 }
203 if (masks_are_equal && (longer < 0)) {
204 for (lim2 = m - longer; m < lim2;) {
205 if (*m++) {
206 return 1;
207 }
208 }
209 }
210 return !masks_are_equal;
211 }
212
213 struct radix_node *
214 rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
215 {
216 return rn_lookup_args(v_arg, m_arg, head, NULL, NULL);
217 }
218
219 struct radix_node *
220 rn_lookup_args(void *v_arg, void *m_arg, struct radix_node_head *head,
221 rn_matchf_t *f, void *w)
222 {
223 struct radix_node *x;
224 caddr_t netmask = NULL;
225
226 if (m_arg) {
227 x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset);
228 if (x == 0) {
229 return NULL;
230 }
231 netmask = x->rn_key;
232 }
233 x = rn_match_args(v_arg, head, f, w);
234 if (x && netmask) {
235 while (x && x->rn_mask != netmask) {
236 x = x->rn_dupedkey;
237 }
238 }
239 return x;
240 }
241
242 /*
243 * Returns true if address 'trial' has no bits differing from the
244 * leaf's key when compared under the leaf's mask. In other words,
245 * returns true when 'trial' matches leaf. If a leaf-matching
246 * routine is passed in, it is also used to find a match on the
247 * conditions defined by the caller of rn_match.
248 */
249 static int
250 rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip,
251 rn_matchf_t *f, void *w)
252 {
253 char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
254 char *cplim;
255 int length = min(*(u_char *)cp, *(u_char *)cp2);
256
257 if (cp3 == 0) {
258 cp3 = rn_ones;
259 } else {
260 length = min(length, *(u_char *)cp3);
261 }
262 cplim = cp + length; cp3 += skip; cp2 += skip;
263 for (cp += skip; cp < cplim; cp++, cp2++, cp3++) {
264 if ((*cp ^ *cp2) & *cp3) {
265 return 0;
266 }
267 }
268
269 return RN_MATCHF(leaf, f, w);
270 }
271
272 struct radix_node *
273 rn_match(void *v_arg, struct radix_node_head *head)
274 {
275 return rn_match_args(v_arg, head, NULL, NULL);
276 }
277
278 struct radix_node *
279 rn_match_args(void *v_arg, struct radix_node_head *head,
280 rn_matchf_t *f, void *w)
281 {
282 caddr_t v = v_arg;
283 struct radix_node *t = head->rnh_treetop, *x;
284 caddr_t cp = v, cp2;
285 caddr_t cplim;
286 struct radix_node *saved_t, *top = t;
287 int off = t->rn_offset, vlen = *(u_char *)cp, matched_off;
288 int test, b, rn_bit;
289
290 /*
291 * Open code rn_search(v, top) to avoid overhead of extra
292 * subroutine call.
293 */
294 for (; t->rn_bit >= 0;) {
295 if (t->rn_bmask & cp[t->rn_offset]) {
296 t = t->rn_right;
297 } else {
298 t = t->rn_left;
299 }
300 }
301 /*
302 * See if we match exactly as a host destination
303 * or at least learn how many bits match, for normal mask finesse.
304 *
305 * It doesn't hurt us to limit how many bytes to check
306 * to the length of the mask, since if it matches we had a genuine
307 * match and the leaf we have is the most specific one anyway;
308 * if it didn't match with a shorter length it would fail
309 * with a long one. This wins big for class B&C netmasks which
310 * are probably the most common case...
311 */
312 if (t->rn_mask) {
313 vlen = *(u_char *)t->rn_mask;
314 }
315 cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
316 for (; cp < cplim; cp++, cp2++) {
317 if (*cp != *cp2) {
318 goto on1;
319 }
320 }
321 /*
322 * This extra grot is in case we are explicitly asked
323 * to look up the default. Ugh!
324 *
325 * Never return the root node itself, it seems to cause a
326 * lot of confusion.
327 */
328 if (t->rn_flags & RNF_ROOT) {
329 t = t->rn_dupedkey;
330 }
331 if (t == NULL || RN_MATCHF(t, f, w)) {
332 return t;
333 } else {
334 /*
335 * Although we found an exact match on the key,
336 * f() is looking for some other criteria as well.
337 * Continue looking as if the exact match failed.
338 */
339 if (t->rn_parent->rn_flags & RNF_ROOT) {
340 /* Hit the top; have to give up */
341 return NULL;
342 }
343 b = 0;
344 goto keeplooking;
345 }
346 on1:
347 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
348 for (b = 7; (test >>= 1) > 0;) {
349 b--;
350 }
351 keeplooking:
352 matched_off = cp - v;
353 b += matched_off << 3;
354 rn_bit = -1 - b;
355 /*
356 * If there is a host route in a duped-key chain, it will be first.
357 */
358 if ((saved_t = t)->rn_mask == 0) {
359 t = t->rn_dupedkey;
360 }
361 for (; t; t = t->rn_dupedkey) {
362 /*
363 * Even if we don't match exactly as a host,
364 * we may match if the leaf we wound up at is
365 * a route to a net.
366 */
367 if (t->rn_flags & RNF_NORMAL) {
368 if ((rn_bit <= t->rn_bit) && RN_MATCHF(t, f, w)) {
369 return t;
370 }
371 } else if (rn_satisfies_leaf(v, t, matched_off, f, w)) {
372 return t;
373 }
374 }
375 t = saved_t;
376 /* start searching up the tree */
377 do {
378 struct radix_mask *m;
379 t = t->rn_parent;
380 m = t->rn_mklist;
381 /*
382 * If non-contiguous masks ever become important
383 * we can restore the masking and open coding of
384 * the search and satisfaction test and put the
385 * calculation of "off" back before the "do".
386 */
387 while (m) {
388 if (m->rm_flags & RNF_NORMAL) {
389 if ((rn_bit <= m->rm_bit) &&
390 RN_MATCHF(m->rm_leaf, f, w)) {
391 return m->rm_leaf;
392 }
393 } else {
394 off = min(t->rn_offset, matched_off);
395 x = rn_search_m(v, t, m->rm_mask);
396 while (x && x->rn_mask != m->rm_mask) {
397 x = x->rn_dupedkey;
398 }
399 if (x && rn_satisfies_leaf(v, x, off, f, w)) {
400 return x;
401 }
402 }
403 m = m->rm_mklist;
404 }
405 } while (t != top);
406 return NULL;
407 }
408
409 #ifdef RN_DEBUG
410 int rn_nodenum;
411 struct radix_node *rn_clist;
412 int rn_saveinfo;
413 int rn_debug = 1;
414 #endif
415
416 static struct radix_node *
417 rn_newpair(void *v, int b, struct radix_node nodes[2])
418 {
419 struct radix_node *tt = nodes, *t = tt + 1;
420 t->rn_bit = b;
421 t->rn_bmask = 0x80 >> (b & 7);
422 t->rn_left = tt;
423 t->rn_offset = b >> 3;
424 tt->rn_bit = -1;
425 tt->rn_key = (caddr_t)v;
426 tt->rn_parent = t;
427 tt->rn_flags = t->rn_flags = RNF_ACTIVE;
428 tt->rn_mklist = t->rn_mklist = NULL;
429 #ifdef RN_DEBUG
430 tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
431 tt->rn_twin = t;
432 tt->rn_ybro = rn_clist;
433 rn_clist = tt;
434 #endif
435 return t;
436 }
437
438 static struct radix_node *
439 rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry,
440 struct radix_node nodes[2])
441 {
442 caddr_t v = v_arg;
443 struct radix_node *top = head->rnh_treetop;
444 int head_off = top->rn_offset, vlen = (int)*((u_char *)v);
445 struct radix_node *t = rn_search(v_arg, top);
446 caddr_t cp = v + head_off;
447 int b;
448 struct radix_node *tt;
449 /*
450 * Find first bit at which v and t->rn_key differ
451 */
452 {
453 caddr_t cp2 = t->rn_key + head_off;
454 int cmp_res;
455 caddr_t cplim = v + vlen;
456
457 while (cp < cplim) {
458 if (*cp2++ != *cp++) {
459 goto on1;
460 }
461 }
462 *dupentry = 1;
463 return t;
464 on1:
465 *dupentry = 0;
466 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
467 for (b = (cp - v) << 3; cmp_res; b--) {
468 cmp_res >>= 1;
469 }
470 }
471 {
472 struct radix_node *p, *x = top;
473 cp = v;
474 do {
475 p = x;
476 if (cp[x->rn_offset] & x->rn_bmask) {
477 x = x->rn_right;
478 } else {
479 x = x->rn_left;
480 }
481 } while (b > (unsigned) x->rn_bit);
482 /* x->rn_bit < b && x->rn_bit >= 0 */
483 #ifdef RN_DEBUG
484 if (rn_debug) {
485 log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p);
486 }
487 #endif
488 t = rn_newpair(v_arg, b, nodes);
489 tt = t->rn_left;
490 if ((cp[p->rn_offset] & p->rn_bmask) == 0) {
491 p->rn_left = t;
492 } else {
493 p->rn_right = t;
494 }
495 x->rn_parent = t;
496 t->rn_parent = p; /* frees x, p as temp vars below */
497 if ((cp[t->rn_offset] & t->rn_bmask) == 0) {
498 t->rn_right = x;
499 } else {
500 t->rn_right = tt;
501 t->rn_left = x;
502 }
503 #ifdef RN_DEBUG
504 if (rn_debug) {
505 log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p);
506 }
507 #endif
508 }
509 return tt;
510 }
511
512 struct radix_node *
513 rn_addmask(void *n_arg, int search, int skip)
514 {
515 caddr_t netmask = (caddr_t)n_arg;
516 struct radix_node *x;
517 caddr_t cp, cplim;
518 int b = 0, mlen, j;
519 int maskduplicated, m0, isnormal;
520 struct radix_node *saved_x;
521 static int last_zeroed = 0;
522
523 if ((mlen = *(u_char *)netmask) > max_keylen) {
524 mlen = max_keylen;
525 }
526 if (skip == 0) {
527 skip = 1;
528 }
529 if (mlen <= skip) {
530 return mask_rnhead->rnh_nodes;
531 }
532 if (skip > 1) {
533 Bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
534 }
535 if ((m0 = mlen) > skip) {
536 Bcopy(netmask + skip, addmask_key + skip, mlen - skip);
537 }
538 /*
539 * Trim trailing zeroes.
540 */
541 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) {
542 cp--;
543 }
544 mlen = cp - addmask_key;
545 if (mlen <= skip) {
546 if (m0 >= last_zeroed) {
547 last_zeroed = mlen;
548 }
549 return mask_rnhead->rnh_nodes;
550 }
551 if (m0 < last_zeroed) {
552 Bzero(addmask_key + m0, last_zeroed - m0);
553 }
554 *addmask_key = last_zeroed = mlen;
555 x = rn_search(addmask_key, rn_masktop);
556 if (Bcmp(addmask_key, x->rn_key, mlen) != 0) {
557 x = NULL;
558 }
559 if (x || search) {
560 return x;
561 }
562 R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof(*x));
563 if ((saved_x = x) == 0) {
564 return NULL;
565 }
566 Bzero(x, max_keylen + 2 * sizeof(*x));
567 netmask = cp = (caddr_t)(x + 2);
568 Bcopy(addmask_key, cp, mlen);
569 x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
570 if (maskduplicated) {
571 log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
572 R_Free(saved_x);
573 return x;
574 }
575 mask_rnhead->rnh_cnt++;
576 /*
577 * Calculate index of mask, and check for normalcy.
578 */
579 cplim = netmask + mlen; isnormal = 1;
580 for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;) {
581 cp++;
582 }
583 if (cp != cplim) {
584 for (j = 0x80; (j & *cp) != 0; j >>= 1) {
585 b++;
586 }
587 if (*cp != normal_chars[b] || cp != (cplim - 1)) {
588 isnormal = 0;
589 }
590 }
591 b += (cp - netmask) << 3;
592 x->rn_bit = -1 - b;
593 if (isnormal) {
594 x->rn_flags |= RNF_NORMAL;
595 }
596 return x;
597 }
598
599 static int
600 /* XXX: arbitrary ordering for non-contiguous masks */
601 rn_lexobetter(void *m_arg, void *n_arg)
602 {
603 u_char *mp = m_arg, *np = n_arg, *lim;
604
605 if (*mp > *np) {
606 return 1; /* not really, but need to check longer one first */
607 }
608 if (*mp == *np) {
609 for (lim = mp + *mp; mp < lim;) {
610 if (*mp++ > *np++) {
611 return 1;
612 }
613 }
614 }
615 return 0;
616 }
617
618 static struct radix_mask *
619 rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next)
620 {
621 struct radix_mask *m;
622
623 MKGet(m);
624 if (m == 0) {
625 log(LOG_ERR, "Mask for route not entered\n");
626 return NULL;
627 }
628 Bzero(m, sizeof *m);
629 m->rm_bit = tt->rn_bit;
630 m->rm_flags = tt->rn_flags;
631 if (tt->rn_flags & RNF_NORMAL) {
632 m->rm_leaf = tt;
633 } else {
634 m->rm_mask = tt->rn_mask;
635 }
636 m->rm_mklist = next;
637 tt->rn_mklist = m;
638 return m;
639 }
640
641 struct radix_node *
642 rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
643 struct radix_node treenodes[2])
644 {
645 caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
646 struct radix_node *t, *x = NULL, *tt;
647 struct radix_node *saved_tt, *top = head->rnh_treetop;
648 short b = 0, b_leaf = 0;
649 int keyduplicated;
650 caddr_t mmask;
651 struct radix_mask *m, **mp;
652
653 /*
654 * In dealing with non-contiguous masks, there may be
655 * many different routes which have the same mask.
656 * We will find it useful to have a unique pointer to
657 * the mask to speed avoiding duplicate references at
658 * nodes and possibly save time in calculating indices.
659 */
660 if (netmask) {
661 if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0) {
662 return NULL;
663 }
664 b_leaf = x->rn_bit;
665 b = -1 - x->rn_bit;
666 netmask = x->rn_key;
667 }
668 /*
669 * Deal with duplicated keys: attach node to previous instance
670 */
671 saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
672 if (keyduplicated) {
673 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
674 if (tt->rn_mask == netmask) {
675 return NULL;
676 }
677 if (netmask == 0 ||
678 (tt->rn_mask &&
679 ((b_leaf < tt->rn_bit) /* index(netmask) > node */
680 || rn_refines(netmask, tt->rn_mask)
681 || rn_lexobetter(netmask, tt->rn_mask)))) {
682 break;
683 }
684 }
685 /*
686 * If the mask is not duplicated, we wouldn't
687 * find it among possible duplicate key entries
688 * anyway, so the above test doesn't hurt.
689 *
690 * We sort the masks for a duplicated key the same way as
691 * in a masklist -- most specific to least specific.
692 * This may require the unfortunate nuisance of relocating
693 * the head of the list.
694 */
695 if (tt == saved_tt) {
696 struct radix_node *xx = x;
697 /* link in at head of list */
698 (tt = treenodes)->rn_dupedkey = t;
699 tt->rn_flags = t->rn_flags;
700 tt->rn_parent = x = t->rn_parent;
701 t->rn_parent = tt; /* parent */
702 if (x->rn_left == t) {
703 x->rn_left = tt;
704 } else {
705 x->rn_right = tt;
706 }
707 saved_tt = tt; x = xx;
708 } else {
709 (tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
710 t->rn_dupedkey = tt;
711 tt->rn_parent = t; /* parent */
712 if (tt->rn_dupedkey) { /* parent */
713 tt->rn_dupedkey->rn_parent = tt; /* parent */
714 }
715 }
716 #ifdef RN_DEBUG
717 t = tt + 1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
718 tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
719 #endif
720 tt->rn_key = (caddr_t) v;
721 tt->rn_bit = -1;
722 tt->rn_flags = RNF_ACTIVE;
723 }
724 head->rnh_cnt++;
725 /*
726 * Put mask in tree.
727 */
728 if (netmask) {
729 tt->rn_mask = netmask;
730 tt->rn_bit = x->rn_bit;
731 tt->rn_flags |= x->rn_flags & RNF_NORMAL;
732 }
733 t = saved_tt->rn_parent;
734 if (keyduplicated) {
735 goto on2;
736 }
737 b_leaf = -1 - t->rn_bit;
738 if (t->rn_right == saved_tt) {
739 x = t->rn_left;
740 } else {
741 x = t->rn_right;
742 }
743 /* Promote general routes from below */
744 if (x->rn_bit < 0) {
745 for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) {
746 if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
747 *mp = m = rn_new_radix_mask(x, NULL);
748 if (m) {
749 mp = &m->rm_mklist;
750 }
751 }
752 }
753 } else if (x->rn_mklist) {
754 /*
755 * Skip over masks whose index is > that of new node
756 */
757 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
758 if (m->rm_bit >= b_leaf) {
759 break;
760 }
761 }
762 t->rn_mklist = m; *mp = NULL;
763 }
764 on2:
765 /* Add new route to highest possible ancestor's list */
766 if ((netmask == 0) || (b > t->rn_bit)) {
767 return tt; /* can't lift at all */
768 }
769 b_leaf = tt->rn_bit;
770 do {
771 x = t;
772 t = t->rn_parent;
773 } while (b <= t->rn_bit && x != top);
774 /*
775 * Search through routes associated with node to
776 * insert new route according to index.
777 * Need same criteria as when sorting dupedkeys to avoid
778 * double loop on deletion.
779 */
780 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
781 if (m->rm_bit < b_leaf) {
782 continue;
783 }
784 if (m->rm_bit > b_leaf) {
785 break;
786 }
787 if (m->rm_flags & RNF_NORMAL) {
788 mmask = m->rm_leaf->rn_mask;
789 if (tt->rn_flags & RNF_NORMAL) {
790 log(LOG_ERR,
791 "Non-unique normal route, mask not entered");
792 return tt;
793 }
794 } else {
795 mmask = m->rm_mask;
796 }
797 if (mmask == netmask) {
798 m->rm_refs++;
799 tt->rn_mklist = m;
800 return tt;
801 }
802 if (rn_refines(netmask, mmask)
803 || rn_lexobetter(netmask, mmask)) {
804 break;
805 }
806 }
807 *mp = rn_new_radix_mask(tt, *mp);
808 return tt;
809 }
810
811 struct radix_node *
812 rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
813 {
814 struct radix_node *t, *p, *x, *tt;
815 struct radix_mask *m, *saved_m, **mp;
816 struct radix_node *dupedkey, *saved_tt, *top;
817 caddr_t v, netmask;
818 int b, head_off, vlen;
819
820 v = v_arg;
821 netmask = netmask_arg;
822 x = head->rnh_treetop;
823 tt = rn_search(v, x);
824 head_off = x->rn_offset;
825 vlen = *(u_char *)v;
826 saved_tt = tt;
827 top = x;
828 if (tt == 0 ||
829 Bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off)) {
830 return NULL;
831 }
832 /*
833 * Delete our route from mask lists.
834 */
835 if (netmask) {
836 if ((x = rn_addmask(netmask, 1, head_off)) == 0) {
837 return NULL;
838 }
839 netmask = x->rn_key;
840 while (tt->rn_mask != netmask) {
841 if ((tt = tt->rn_dupedkey) == 0) {
842 return NULL;
843 }
844 }
845 }
846 if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0) {
847 goto on1;
848 }
849 if (tt->rn_flags & RNF_NORMAL) {
850 if (m->rm_leaf != tt || m->rm_refs > 0) {
851 log(LOG_ERR, "rn_delete: inconsistent annotation\n");
852 return NULL; /* dangling ref could cause disaster */
853 }
854 } else {
855 if (m->rm_mask != tt->rn_mask) {
856 log(LOG_ERR, "rn_delete: inconsistent annotation\n");
857 goto on1;
858 }
859 if (--m->rm_refs >= 0) {
860 goto on1;
861 }
862 }
863 b = -1 - tt->rn_bit;
864 t = saved_tt->rn_parent;
865 if (b > t->rn_bit) {
866 goto on1; /* Wasn't lifted at all */
867 }
868 do {
869 x = t;
870 t = t->rn_parent;
871 } while (b <= t->rn_bit && x != top);
872 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
873 if (m == saved_m) {
874 *mp = m->rm_mklist;
875 MKFree(m);
876 break;
877 }
878 }
879 if (m == 0) {
880 log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
881 if (tt->rn_flags & RNF_NORMAL) {
882 return NULL; /* Dangling ref to us */
883 }
884 }
885 on1:
886 /*
887 * Eliminate us from tree
888 */
889 if (tt->rn_flags & RNF_ROOT) {
890 return NULL;
891 }
892 head->rnh_cnt--;
893 #ifdef RN_DEBUG
894 /* Get us out of the creation list */
895 for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {
896 }
897 if (t) {
898 t->rn_ybro = tt->rn_ybro;
899 }
900 #endif
901 t = tt->rn_parent;
902 dupedkey = saved_tt->rn_dupedkey;
903 if (dupedkey) {
904 /*
905 * at this point, tt is the deletion target and saved_tt
906 * is the head of the dupekey chain
907 */
908 if (tt == saved_tt) {
909 /* remove from head of chain */
910 x = dupedkey; x->rn_parent = t;
911 if (t->rn_left == tt) {
912 t->rn_left = x;
913 } else {
914 t->rn_right = x;
915 }
916 } else {
917 /* find node in front of tt on the chain */
918 for (x = p = saved_tt; p && p->rn_dupedkey != tt;) {
919 p = p->rn_dupedkey;
920 }
921 if (p) {
922 p->rn_dupedkey = tt->rn_dupedkey;
923 if (tt->rn_dupedkey) { /* parent */
924 tt->rn_dupedkey->rn_parent = p;
925 }
926 /* parent */
927 } else {
928 log(LOG_ERR, "rn_delete: couldn't find us\n");
929 }
930 }
931 t = tt + 1;
932 if (t->rn_flags & RNF_ACTIVE) {
933 #ifndef RN_DEBUG
934 *++x = *t;
935 p = t->rn_parent;
936 #else
937 b = t->rn_info;
938 *++x = *t;
939 t->rn_info = b;
940 p = t->rn_parent;
941 #endif
942 if (p->rn_left == t) {
943 p->rn_left = x;
944 } else {
945 p->rn_right = x;
946 }
947 x->rn_left->rn_parent = x;
948 x->rn_right->rn_parent = x;
949 }
950 goto out;
951 }
952 if (t->rn_left == tt) {
953 x = t->rn_right;
954 } else {
955 x = t->rn_left;
956 }
957 p = t->rn_parent;
958 if (p->rn_right == t) {
959 p->rn_right = x;
960 } else {
961 p->rn_left = x;
962 }
963 x->rn_parent = p;
964 /*
965 * Demote routes attached to us.
966 */
967 if (t->rn_mklist) {
968 if (x->rn_bit >= 0) {
969 for (mp = &x->rn_mklist; (m = *mp);) {
970 mp = &m->rm_mklist;
971 }
972 *mp = t->rn_mklist;
973 } else {
974 /* If there are any key,mask pairs in a sibling
975 * duped-key chain, some subset will appear sorted
976 * in the same order attached to our mklist */
977 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) {
978 if (m == x->rn_mklist) {
979 struct radix_mask *mm = m->rm_mklist;
980 x->rn_mklist = NULL;
981 if (--(m->rm_refs) < 0) {
982 MKFree(m);
983 }
984 m = mm;
985 }
986 }
987 if (m) {
988 log(LOG_ERR, "rn_delete: Orphaned Mask "
989 "0x%llx at 0x%llx\n",
990 (uint64_t)VM_KERNEL_ADDRPERM(m),
991 (uint64_t)VM_KERNEL_ADDRPERM(x));
992 }
993 }
994 }
995 /*
996 * We may be holding an active internal node in the tree.
997 */
998 x = tt + 1;
999 if (t != x) {
1000 #ifndef RN_DEBUG
1001 *t = *x;
1002 #else
1003 b = t->rn_info;
1004 *t = *x;
1005 t->rn_info = b;
1006 #endif
1007 t->rn_left->rn_parent = t;
1008 t->rn_right->rn_parent = t;
1009 p = x->rn_parent;
1010 if (p->rn_left == x) {
1011 p->rn_left = t;
1012 } else {
1013 p->rn_right = t;
1014 }
1015 }
1016 out:
1017 tt->rn_flags &= ~RNF_ACTIVE;
1018 tt[1].rn_flags &= ~RNF_ACTIVE;
1019 return tt;
1020 }
1021
1022 /*
1023 * This is the same as rn_walktree() except for the parameters and the
1024 * exit.
1025 */
1026 static int
1027 rn_walktree_from(struct radix_node_head *h, void *a, void *m, walktree_f_t *f,
1028 void *w)
1029 {
1030 int error;
1031 struct radix_node *base, *next;
1032 u_char *xa = (u_char *)a;
1033 u_char *xm = (u_char *)m;
1034 struct radix_node *rn, *last;
1035 int stopping;
1036 int lastb;
1037 int rnh_cnt;
1038
1039 /*
1040 * This gets complicated because we may delete the node while
1041 * applying the function f to it; we cannot simply use the next
1042 * leaf as the successor node in advance, because that leaf may
1043 * be removed as well during deletion when it is a clone of the
1044 * current node. When that happens, we would end up referring
1045 * to an already-freed radix node as the successor node. To get
1046 * around this issue, if we detect that the radix tree has changed
1047 * in dimension (smaller than before), we simply restart the walk
1048 * from the top of tree.
1049 */
1050 restart:
1051 last = NULL;
1052 stopping = 0;
1053 rnh_cnt = h->rnh_cnt;
1054
1055 /*
1056 * rn_search_m is sort-of-open-coded here.
1057 */
1058 for (rn = h->rnh_treetop; rn->rn_bit >= 0;) {
1059 last = rn;
1060 if (!(rn->rn_bmask & xm[rn->rn_offset])) {
1061 break;
1062 }
1063
1064 if (rn->rn_bmask & xa[rn->rn_offset]) {
1065 rn = rn->rn_right;
1066 } else {
1067 rn = rn->rn_left;
1068 }
1069 }
1070
1071 /*
1072 * Two cases: either we stepped off the end of our mask,
1073 * in which case last == rn, or we reached a leaf, in which
1074 * case we want to start from the last node we looked at.
1075 * Either way, last is the node we want to start from.
1076 */
1077 rn = last;
1078 lastb = rn->rn_bit;
1079
1080 /* First time through node, go left */
1081 while (rn->rn_bit >= 0) {
1082 rn = rn->rn_left;
1083 }
1084
1085 while (!stopping) {
1086 base = rn;
1087 /* If at right child go back up, otherwise, go right */
1088 while (rn->rn_parent->rn_right == rn
1089 && !(rn->rn_flags & RNF_ROOT)) {
1090 rn = rn->rn_parent;
1091
1092 /* if went up beyond last, stop */
1093 if (rn->rn_bit <= lastb) {
1094 stopping = 1;
1095 /*
1096 * XXX we should jump to the 'Process leaves'
1097 * part, because the values of 'rn' and 'next'
1098 * we compute will not be used. Not a big deal
1099 * because this loop will terminate, but it is
1100 * inefficient and hard to understand!
1101 */
1102 }
1103 }
1104
1105 /*
1106 * The following code (bug fix) inherited from FreeBSD is
1107 * currently disabled, because our implementation uses the
1108 * RTF_PRCLONING scheme that has been abandoned in current
1109 * FreeBSD release. The scheme involves setting such a flag
1110 * for the default route entry, and therefore all off-link
1111 * destinations would become clones of that entry. Enabling
1112 * the following code would be problematic at this point,
1113 * because the removal of default route would cause only
1114 * the left-half of the tree to be traversed, leaving the
1115 * right-half untouched. If there are clones of the entry
1116 * that reside in that right-half, they would not be deleted
1117 * and would linger around until they expire or explicitly
1118 * deleted, which is a very bad thing.
1119 *
1120 * This code should be uncommented only after we get rid
1121 * of the RTF_PRCLONING scheme.
1122 */
1123 #if 0
1124 /*
1125 * At the top of the tree, no need to traverse the right
1126 * half, prevent the traversal of the entire tree in the
1127 * case of default route.
1128 */
1129 if (rn->rn_parent->rn_flags & RNF_ROOT) {
1130 stopping = 1;
1131 }
1132 #endif
1133
1134 /* Find the next *leaf* to start from */
1135 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) {
1136 rn = rn->rn_left;
1137 }
1138 next = rn;
1139 /* Process leaves */
1140 while ((rn = base) != 0) {
1141 base = rn->rn_dupedkey;
1142 if (!(rn->rn_flags & RNF_ROOT)
1143 && (error = (*f)(rn, w))) {
1144 return error;
1145 }
1146 }
1147 /* If one or more nodes got deleted, restart from top */
1148 if (h->rnh_cnt < rnh_cnt) {
1149 goto restart;
1150 }
1151 rn = next;
1152 if (rn->rn_flags & RNF_ROOT) {
1153 stopping = 1;
1154 }
1155 }
1156 return 0;
1157 }
1158
1159 static int
1160 rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
1161 {
1162 int error;
1163 struct radix_node *base, *next;
1164 struct radix_node *rn;
1165 int rnh_cnt;
1166
1167 /*
1168 * This gets complicated because we may delete the node while
1169 * applying the function f to it; we cannot simply use the next
1170 * leaf as the successor node in advance, because that leaf may
1171 * be removed as well during deletion when it is a clone of the
1172 * current node. When that happens, we would end up referring
1173 * to an already-freed radix node as the successor node. To get
1174 * around this issue, if we detect that the radix tree has changed
1175 * in dimension (smaller than before), we simply restart the walk
1176 * from the top of tree.
1177 */
1178 restart:
1179 rn = h->rnh_treetop;
1180 rnh_cnt = h->rnh_cnt;
1181
1182 /* First time through node, go left */
1183 while (rn->rn_bit >= 0) {
1184 rn = rn->rn_left;
1185 }
1186 for (;;) {
1187 base = rn;
1188 /* If at right child go back up, otherwise, go right */
1189 while (rn->rn_parent->rn_right == rn &&
1190 (rn->rn_flags & RNF_ROOT) == 0) {
1191 rn = rn->rn_parent;
1192 }
1193 /* Find the next *leaf* to start from */
1194 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) {
1195 rn = rn->rn_left;
1196 }
1197 next = rn;
1198 /* Process leaves */
1199 while ((rn = base) != NULL) {
1200 base = rn->rn_dupedkey;
1201 if (!(rn->rn_flags & RNF_ROOT)
1202 && (error = (*f)(rn, w))) {
1203 return error;
1204 }
1205 }
1206 /* If one or more nodes got deleted, restart from top */
1207 if (h->rnh_cnt < rnh_cnt) {
1208 goto restart;
1209 }
1210 rn = next;
1211 if (rn->rn_flags & RNF_ROOT) {
1212 return 0;
1213 }
1214 }
1215 /* NOTREACHED */
1216 }
1217
1218 int
1219 rn_inithead(void **head, int off)
1220 {
1221 struct radix_node_head *rnh;
1222 struct radix_node *t, *tt, *ttt;
1223 if (*head) {
1224 return 1;
1225 }
1226 R_Malloc(rnh, struct radix_node_head *, sizeof(*rnh));
1227 if (rnh == 0) {
1228 return 0;
1229 }
1230 Bzero(rnh, sizeof(*rnh));
1231 *head = rnh;
1232 t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
1233 ttt = rnh->rnh_nodes + 2;
1234 t->rn_right = ttt;
1235 t->rn_parent = t;
1236 tt = t->rn_left;
1237 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
1238 tt->rn_bit = -1 - off;
1239 *ttt = *tt;
1240 ttt->rn_key = rn_ones;
1241 rnh->rnh_addaddr = rn_addroute;
1242 rnh->rnh_deladdr = rn_delete;
1243 rnh->rnh_matchaddr = rn_match;
1244 rnh->rnh_matchaddr_args = rn_match_args;
1245 rnh->rnh_lookup = rn_lookup;
1246 rnh->rnh_lookup_args = rn_lookup_args;
1247 rnh->rnh_walktree = rn_walktree;
1248 rnh->rnh_walktree_from = rn_walktree_from;
1249 rnh->rnh_treetop = t;
1250 rnh->rnh_cnt = 3;
1251 return 1;
1252 }
1253
1254 void
1255 rn_init(void)
1256 {
1257 char *cp, *cplim;
1258 struct domain *dom;
1259
1260 /* lock already held when rn_init is called */
1261 TAILQ_FOREACH(dom, &domains, dom_entry) {
1262 if (dom->dom_maxrtkey > max_keylen) {
1263 max_keylen = dom->dom_maxrtkey;
1264 }
1265 }
1266 if (max_keylen == 0) {
1267 log(LOG_ERR,
1268 "rn_init: radix functions require max_keylen be set\n");
1269 return;
1270 }
1271 R_Malloc(rn_zeros, char *, 3 * max_keylen);
1272 if (rn_zeros == NULL) {
1273 panic("rn_init");
1274 }
1275 Bzero(rn_zeros, 3 * max_keylen);
1276 rn_ones = cp = rn_zeros + max_keylen;
1277 addmask_key = cplim = rn_ones + max_keylen;
1278 while (cp < cplim) {
1279 *cp++ = -1;
1280 }
1281 if (rn_inithead((void **)&mask_rnhead, 0) == 0) {
1282 panic("rn_init 2");
1283 }
1284 }