bsd/netinet/ip_encap.c

   1 /*
   2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*      $FreeBSD: src/sys/netinet/ip_encap.c,v 1.1.2.2 2001/07/03 11:01:46 ume Exp $    */
  29 /*      $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $       */
  30
  31 /*
  32  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  33  * All rights reserved.
  34  *
  35  * Redistribution and use in source and binary forms, with or without
  36  * modification, are permitted provided that the following conditions
  37  * are met:
  38  * 1. Redistributions of source code must retain the above copyright
  39  *    notice, this list of conditions and the following disclaimer.
  40  * 2. Redistributions in binary form must reproduce the above copyright
  41  *    notice, this list of conditions and the following disclaimer in the
  42  *    documentation and/or other materials provided with the distribution.
  43  * 3. Neither the name of the project nor the names of its contributors
  44  *    may be used to endorse or promote products derived from this software
  45  *    without specific prior written permission.
  46  *
  47  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  57  * SUCH DAMAGE.
  58  */
  59 /*
  60  * My grandfather said that there's a devil inside tunnelling technology...
  61  *
  62  * We have surprisingly many protocols that want packets with IP protocol
  63  * #4 or #41.  Here's a list of protocols that want protocol #41:
  64  *      RFC1933 configured tunnel
  65  *      RFC1933 automatic tunnel
  66  *      RFC2401 IPsec tunnel
  67  *      RFC2473 IPv6 generic packet tunnelling
  68  *      RFC2529 6over4 tunnel
  69  *      mobile-ip6 (uses RFC2473)
  70  *      6to4 tunnel
  71  * Here's a list of protocol that want protocol #4:
  72  *      RFC1853 IPv4-in-IPv4 tunnelling
  73  *      RFC2003 IPv4 encapsulation within IPv4
  74  *      RFC2344 reverse tunnelling for mobile-ip4
  75  *      RFC2401 IPsec tunnel
  76  * Well, what can I say.  They impose different en/decapsulation mechanism
  77  * from each other, so they need separate protocol handler.  The only one
  78  * we can easily determine by protocol # is IPsec, which always has
  79  * AH/ESP header right after outer IP header.
  80  *
  81  * So, clearly good old protosw does not work for protocol #4 and #41.
  82  * The code will let you match protocol via src/dst address pair.
  83  */
  84 /* XXX is M_NETADDR correct? */
  85
  86 #include <sys/param.h>
  87 #include <sys/systm.h>
  88 #include <sys/socket.h>
  89 #include <sys/sockio.h>
  90 #include <sys/mbuf.h>
  91 #include <sys/mcache.h>
  92 #include <sys/errno.h>
  93 #include <sys/domain.h>
  94 #include <sys/protosw.h>
  95 #include <sys/queue.h>
  96
  97 #include <net/if.h>
  98 #include <net/route.h>
  99
 100 #include <netinet/in.h>
 101 #include <netinet/in_systm.h>
 102 #include <netinet/ip.h>
 103 #include <netinet/ip_var.h>
 104 #include <netinet/ip_encap.h>
 105
 106 #if INET6
 107 #include <netinet/ip6.h>
 108 #include <netinet6/ip6_var.h>
 109 #include <netinet6/ip6protosw.h>
 110 #endif
 111
 112
 113 #include <net/net_osdep.h>
 114
 115 #ifndef __APPLE__
 116 #include <sys/kernel.h>
 117 #include <sys/malloc.h>
 118 MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
 119 #endif
 120
 121 static void encap_init(struct protosw *, struct domain *);
 122 static void encap_add_locked(struct encaptab *);
 123 static int mask_match(const struct encaptab *, const struct sockaddr *,
 124     const struct sockaddr *);
 125 static void encap_fillarg(struct mbuf *, void *arg);
 126
 127 #ifndef LIST_HEAD_INITIALIZER
 128 /* rely upon BSS initialization */
 129 LIST_HEAD(, encaptab) encaptab;
 130 #else
 131 LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab);
 132 #endif
 133
 134 decl_lck_rw_data(static, encaptab_lock);
 135
 136 static void
 137 encap_init(struct protosw *pp, struct domain *dp)
 138 {
 139 #pragma unused(dp)
 140         static int encap_initialized = 0;
 141         lck_grp_attr_t *encaptab_grp_attrib = NULL;
 142         lck_attr_t *encaptab_lck_attrib = NULL;
 143         lck_grp_t *encaptab_lck_group = NULL;
 144
 145         VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
 146
 147         /* This gets called by more than one protocols, so initialize once */
 148         if (encap_initialized) {
 149                 return;
 150         }
 151
 152         encaptab_grp_attrib = lck_grp_attr_alloc_init();
 153         encaptab_lck_group = lck_grp_alloc_init("encaptab lock", encaptab_grp_attrib);
 154         lck_grp_attr_free(encaptab_grp_attrib);
 155
 156         encaptab_lck_attrib = lck_attr_alloc_init();
 157         lck_rw_init(&encaptab_lock, encaptab_lck_group, encaptab_lck_attrib);
 158
 159         lck_grp_free(encaptab_lck_group);
 160         lck_attr_free(encaptab_lck_attrib);
 161
 162         encap_initialized = 1;
 163 #if 0
 164         /*
 165          * we cannot use LIST_INIT() here, since drivers may want to call
 166          * encap_attach(), on driver attach.  encap_init() will be called
 167          * on AF_INET{,6} initialization, which happens after driver
 168          * initialization - using LIST_INIT() here can nuke encap_attach()
 169          * from drivers.
 170          */
 171         LIST_INIT(&encaptab);
 172 #endif
 173 }
 174
 175 void
 176 encap4_init(struct protosw *pp, struct domain *dp)
 177 {
 178         encap_init(pp, dp);
 179 }
 180
 181 void
 182 encap6_init(struct ip6protosw *pp, struct domain *dp)
 183 {
 184         encap_init((struct protosw *)pp, dp);
 185 }
 186
 187 #if INET
 188 void
 189 encap4_input(struct mbuf *m, int off)
 190 {
 191         int proto;
 192         struct ip *ip;
 193         struct sockaddr_in s, d;
 194         const struct protosw *psw;
 195         struct encaptab *ep, *match;
 196         int prio, matchprio;
 197         void *match_arg = NULL;
 198
 199 #ifndef __APPLE__
 200         va_start(ap, m);
 201         off = va_arg(ap, int);
 202         proto = va_arg(ap, int);
 203         va_end(ap);
 204 #endif
 205
 206         /* Expect 32-bit aligned data pointer on strict-align platforms */
 207         MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
 208
 209         ip = mtod(m, struct ip *);
 210 #ifdef __APPLE__
 211         proto = ip->ip_p;
 212 #endif
 213
 214         bzero(&s, sizeof(s));
 215         s.sin_family = AF_INET;
 216         s.sin_len = sizeof(struct sockaddr_in);
 217         s.sin_addr = ip->ip_src;
 218         bzero(&d, sizeof(d));
 219         d.sin_family = AF_INET;
 220         d.sin_len = sizeof(struct sockaddr_in);
 221         d.sin_addr = ip->ip_dst;
 222
 223         match = NULL;
 224         matchprio = 0;
 225
 226         lck_rw_lock_shared(&encaptab_lock);
 227         for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) {
 228                 if (ep->af != AF_INET) {
 229                         continue;
 230                 }
 231                 if (ep->proto >= 0 && ep->proto != proto) {
 232                         continue;
 233                 }
 234                 if (ep->func) {
 235                         prio = (*ep->func)(m, off, proto, ep->arg);
 236                 } else {
 237                         /*
 238                          * it's inbound traffic, we need to match in reverse
 239                          * order
 240                          */
 241                         prio = mask_match(ep, (struct sockaddr *)&d,
 242                             (struct sockaddr *)&s);
 243                 }
 244
 245                 /*
 246                  * We prioritize the matches by using bit length of the
 247                  * matches.  mask_match() and user-supplied matching function
 248                  * should return the bit length of the matches (for example,
 249                  * if both src/dst are matched for IPv4, 64 should be returned).
 250                  * 0 or negative return value means "it did not match".
 251                  *
 252                  * The question is, since we have two "mask" portion, we
 253                  * cannot really define total order between entries.
 254                  * For example, which of these should be preferred?
 255                  * mask_match() returns 48 (32 + 16) for both of them.
 256                  *      src=3ffe::/16, dst=3ffe:501::/32
 257                  *      src=3ffe:501::/32, dst=3ffe::/16
 258                  *
 259                  * We need to loop through all the possible candidates
 260                  * to get the best match - the search takes O(n) for
 261                  * n attachments (i.e. interfaces).
 262                  */
 263                 if (prio <= 0) {
 264                         continue;
 265                 }
 266                 if (prio > matchprio) {
 267                         matchprio = prio;
 268                         match = ep;
 269                         psw = (const struct protosw *)match->psw;
 270                         match_arg = ep->arg;
 271                 }
 272         }
 273         lck_rw_unlock_shared(&encaptab_lock);
 274
 275         if (match) {
 276                 /* found a match, "match" has the best one */
 277                 if (psw && psw->pr_input) {
 278                         encap_fillarg(m, match_arg);
 279                         (*psw->pr_input)(m, off);
 280                 } else {
 281                         m_freem(m);
 282                 }
 283                 return;
 284         }
 285
 286         /* last resort: inject to raw socket */
 287         rip_input(m, off);
 288 }
 289 #endif
 290
 291 #if INET6
 292 int
 293 encap6_input(struct mbuf **mp, int *offp, int proto)
 294 {
 295         struct mbuf *m = *mp;
 296         struct ip6_hdr *ip6;
 297         struct sockaddr_in6 s, d;
 298         const struct ip6protosw *psw;
 299         struct encaptab *ep, *match;
 300         int prio, matchprio;
 301         void *match_arg = NULL;
 302
 303         /* Expect 32-bit aligned data pointer on strict-align platforms */
 304         MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
 305
 306         ip6 = mtod(m, struct ip6_hdr *);
 307         bzero(&s, sizeof(s));
 308         s.sin6_family = AF_INET6;
 309         s.sin6_len = sizeof(struct sockaddr_in6);
 310         s.sin6_addr = ip6->ip6_src;
 311         bzero(&d, sizeof(d));
 312         d.sin6_family = AF_INET6;
 313         d.sin6_len = sizeof(struct sockaddr_in6);
 314         d.sin6_addr = ip6->ip6_dst;
 315
 316         match = NULL;
 317         matchprio = 0;
 318
 319         lck_rw_lock_shared(&encaptab_lock);
 320         for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) {
 321                 if (ep->af != AF_INET6) {
 322                         continue;
 323                 }
 324                 if (ep->proto >= 0 && ep->proto != proto) {
 325                         continue;
 326                 }
 327                 if (ep->func) {
 328                         prio = (*ep->func)(m, *offp, proto, ep->arg);
 329                 } else {
 330                         /*
 331                          * it's inbound traffic, we need to match in reverse
 332                          * order
 333                          */
 334                         prio = mask_match(ep, (struct sockaddr *)&d,
 335                             (struct sockaddr *)&s);
 336                 }
 337
 338                 /* see encap4_input() for issues here */
 339                 if (prio <= 0) {
 340                         continue;
 341                 }
 342                 if (prio > matchprio) {
 343                         matchprio = prio;
 344                         match = ep;
 345                         psw = (const struct ip6protosw *)match->psw;
 346                         match_arg = ep->arg;
 347                 }
 348         }
 349         lck_rw_unlock_shared(&encaptab_lock);
 350
 351         if (match) {
 352                 /* found a match */
 353                 if (psw && psw->pr_input) {
 354                         encap_fillarg(m, match_arg);
 355                         return (*psw->pr_input)(mp, offp, proto);
 356                 } else {
 357                         m_freem(m);
 358                         return IPPROTO_DONE;
 359                 }
 360         }
 361
 362         /* last resort: inject to raw socket */
 363         return rip6_input(mp, offp, proto);
 364 }
 365 #endif
 366
 367 static void
 368 encap_add_locked(struct encaptab *ep)
 369 {
 370         LCK_RW_ASSERT(&encaptab_lock, LCK_RW_ASSERT_EXCLUSIVE);
 371         LIST_INSERT_HEAD(&encaptab, ep, chain);
 372 }
 373
 374 /*
 375  * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
 376  * length of mask (sm and dm) is assumed to be same as sp/dp.
 377  * Return value will be necessary as input (cookie) for encap_detach().
 378  */
 379 const struct encaptab *
 380 encap_attach(int af, int proto, const struct sockaddr *sp,
 381     const struct sockaddr *sm, const struct sockaddr *dp,
 382     const struct sockaddr *dm, const struct protosw *psw, void *arg)
 383 {
 384         struct encaptab *ep = NULL;
 385         struct encaptab *new_ep = NULL;
 386         int error;
 387
 388         /* sanity check on args */
 389         if (sp->sa_len > sizeof(new_ep->src) || dp->sa_len > sizeof(new_ep->dst)) {
 390                 error = EINVAL;
 391                 goto fail;
 392         }
 393         if (sp->sa_len != dp->sa_len) {
 394                 error = EINVAL;
 395                 goto fail;
 396         }
 397         if (af != sp->sa_family || af != dp->sa_family) {
 398                 error = EINVAL;
 399                 goto fail;
 400         }
 401
 402         new_ep = _MALLOC(sizeof(*new_ep), M_NETADDR, M_WAITOK | M_ZERO);
 403         if (new_ep == NULL) {
 404                 error = ENOBUFS;
 405                 goto fail;
 406         }
 407
 408         /* check if anyone have already attached with exactly same config */
 409         lck_rw_lock_exclusive(&encaptab_lock);
 410         for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) {
 411                 if (ep->af != af) {
 412                         continue;
 413                 }
 414                 if (ep->proto != proto) {
 415                         continue;
 416                 }
 417                 if (ep->src.ss_len != sp->sa_len ||
 418                     bcmp(&ep->src, sp, sp->sa_len) != 0 ||
 419                     bcmp(&ep->srcmask, sm, sp->sa_len) != 0) {
 420                         continue;
 421                 }
 422                 if (ep->dst.ss_len != dp->sa_len ||
 423                     bcmp(&ep->dst, dp, dp->sa_len) != 0 ||
 424                     bcmp(&ep->dstmask, dm, dp->sa_len) != 0) {
 425                         continue;
 426                 }
 427
 428                 error = EEXIST;
 429                 goto fail_locked;
 430         }
 431
 432         new_ep->af = af;
 433         new_ep->proto = proto;
 434         bcopy(sp, &new_ep->src, sp->sa_len);
 435         bcopy(sm, &new_ep->srcmask, sp->sa_len);
 436         bcopy(dp, &new_ep->dst, dp->sa_len);
 437         bcopy(dm, &new_ep->dstmask, dp->sa_len);
 438         new_ep->psw = psw;
 439         new_ep->arg = arg;
 440
 441         encap_add_locked(new_ep);
 442         lck_rw_unlock_exclusive(&encaptab_lock);
 443
 444         error = 0;
 445         return new_ep;
 446
 447 fail_locked:
 448         lck_rw_unlock_exclusive(&encaptab_lock);
 449         if (new_ep != NULL) {
 450                 _FREE(new_ep, M_NETADDR);
 451         }
 452 fail:
 453         return NULL;
 454 }
 455
 456 const struct encaptab *
 457 encap_attach_func( int af, int proto,
 458     int (*func)(const struct mbuf *, int, int, void *),
 459     const struct protosw *psw, void *arg)
 460 {
 461         struct encaptab *ep;
 462         int error;
 463
 464         /* sanity check on args */
 465         if (!func) {
 466                 error = EINVAL;
 467                 goto fail;
 468         }
 469
 470         ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK | M_ZERO); /* XXX */
 471         if (ep == NULL) {
 472                 error = ENOBUFS;
 473                 goto fail;
 474         }
 475
 476         ep->af = af;
 477         ep->proto = proto;
 478         ep->func = func;
 479         ep->psw = psw;
 480         ep->arg = arg;
 481
 482         lck_rw_lock_exclusive(&encaptab_lock);
 483         encap_add_locked(ep);
 484         lck_rw_unlock_exclusive(&encaptab_lock);
 485
 486         error = 0;
 487         return ep;
 488
 489 fail:
 490         return NULL;
 491 }
 492
 493 int
 494 encap_detach(const struct encaptab *cookie)
 495 {
 496         const struct encaptab *ep = cookie;
 497         struct encaptab *p;
 498
 499         lck_rw_lock_exclusive(&encaptab_lock);
 500         for (p = LIST_FIRST(&encaptab); p; p = LIST_NEXT(p, chain)) {
 501                 if (p == ep) {
 502                         LIST_REMOVE(p, chain);
 503                         lck_rw_unlock_exclusive(&encaptab_lock);
 504                         _FREE(p, M_NETADDR);    /*XXX*/
 505                         return 0;
 506                 }
 507         }
 508         lck_rw_unlock_exclusive(&encaptab_lock);
 509
 510         return EINVAL;
 511 }
 512
 513 static int
 514 mask_match(const struct encaptab *ep, const struct sockaddr *sp,
 515     const struct sockaddr *dp)
 516 {
 517         struct sockaddr_storage s;
 518         struct sockaddr_storage d;
 519         int i;
 520         const u_int8_t *p, *q;
 521         u_int8_t *r;
 522         int matchlen;
 523
 524         if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d)) {
 525                 return 0;
 526         }
 527         if (sp->sa_family != ep->af || dp->sa_family != ep->af) {
 528                 return 0;
 529         }
 530         if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) {
 531                 return 0;
 532         }
 533
 534         matchlen = 0;
 535
 536         p = (const u_int8_t *)sp;
 537         q = (const u_int8_t *)&ep->srcmask;
 538         r = (u_int8_t *)&s;
 539         for (i = 0; i < sp->sa_len; i++) {
 540                 r[i] = p[i] & q[i];
 541                 /* XXX estimate */
 542                 matchlen += (q[i] ? 8 : 0);
 543         }
 544
 545         p = (const u_int8_t *)dp;
 546         q = (const u_int8_t *)&ep->dstmask;
 547         r = (u_int8_t *)&d;
 548         for (i = 0; i < dp->sa_len; i++) {
 549                 r[i] = p[i] & q[i];
 550                 /* XXX rough estimate */
 551                 matchlen += (q[i] ? 8 : 0);
 552         }
 553
 554         /* need to overwrite len/family portion as we don't compare them */
 555         s.ss_len = sp->sa_len;
 556         s.ss_family = sp->sa_family;
 557         d.ss_len = dp->sa_len;
 558         d.ss_family = dp->sa_family;
 559
 560         if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 &&
 561             bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) {
 562                 return matchlen;
 563         } else {
 564                 return 0;
 565         }
 566 }
 567
 568 struct encaptabtag {
 569         void*                   *arg;
 570 };
 571
 572 static void
 573 encap_fillarg(
 574         struct mbuf *m,
 575         void *arg)
 576 {
 577         struct m_tag    *tag;
 578         struct encaptabtag *et;
 579
 580         tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP,
 581             sizeof(struct encaptabtag), M_WAITOK, m);
 582
 583         if (tag != NULL) {
 584                 et = (struct encaptabtag*)(tag + 1);
 585                 et->arg = arg;
 586                 m_tag_prepend(m, tag);
 587         }
 588 }
 589
 590 void *
 591 encap_getarg(struct mbuf *m)
 592 {
 593         struct m_tag    *tag;
 594         struct encaptabtag *et;
 595         void *p = NULL;
 596
 597         tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP, NULL);
 598         if (tag) {
 599                 et = (struct encaptabtag*)(tag + 1);
 600                 p = et->arg;
 601                 m_tag_delete(m, tag);
 602         }
 603
 604         return p;
 605 }