bsd/net/bpf.c

   1 /*
   2  * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1990, 1991, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  *
  32  * This code is derived from the Stanford/CMU enet packet filter,
  33  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  34  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  35  * Berkeley Laboratory.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)bpf.c       8.2 (Berkeley) 3/28/94
  66  *
  67  * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
  68  */
  69 /*
  70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  71  * support for mandatory and extensible security protections.  This notice
  72  * is included in support of clause 2.2 (b) of the Apple Public License,
  73  * Version 2.0.
  74  */
  75
  76 #include "bpf.h"
  77
  78 #ifndef __GNUC__
  79 #define inline
  80 #else
  81 #define inline __inline
  82 #endif
  83
  84 #include <sys/param.h>
  85 #include <sys/systm.h>
  86 #include <sys/conf.h>
  87 #include <sys/malloc.h>
  88 #include <sys/mbuf.h>
  89 #include <sys/time.h>
  90 #include <sys/proc.h>
  91 #include <sys/signalvar.h>
  92 #include <sys/filio.h>
  93 #include <sys/sockio.h>
  94 #include <sys/ttycom.h>
  95 #include <sys/filedesc.h>
  96 #include <sys/uio_internal.h>
  97 #include <sys/file_internal.h>
  98 #include <sys/event.h>
  99
 100 #include <sys/poll.h>
 101
 102 #include <sys/socket.h>
 103 #include <sys/socketvar.h>
 104 #include <sys/vnode.h>
 105
 106 #include <net/if.h>
 107 #include <net/bpf.h>
 108 #include <net/bpfdesc.h>
 109
 110 #include <netinet/in.h>
 111 #include <netinet/in_pcb.h>
 112 #include <netinet/in_var.h>
 113 #include <netinet/ip_var.h>
 114 #include <netinet/tcp.h>
 115 #include <netinet/tcp_var.h>
 116 #include <netinet/udp.h>
 117 #include <netinet/udp_var.h>
 118 #include <netinet/if_ether.h>
 119 #include <sys/kernel.h>
 120 #include <sys/sysctl.h>
 121 #include <net/firewire.h>
 122
 123 #include <miscfs/devfs/devfs.h>
 124 #include <net/dlil.h>
 125 #include <net/pktap.h>
 126
 127 #include <kern/locks.h>
 128 #include <kern/thread_call.h>
 129
 130 #if CONFIG_MACF_NET
 131 #include <security/mac_framework.h>
 132 #endif /* MAC_NET */
 133
 134 extern int tvtohz(struct timeval *);
 135
 136 #define BPF_BUFSIZE 4096
 137 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
 138
 139
 140 #define PRINET  26                      /* interruptible */
 141
 142 /*
 143  * The default read buffer size is patchable.
 144  */
 145 static unsigned int bpf_bufsize = BPF_BUFSIZE;
 146 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
 147         &bpf_bufsize, 0, "");
 148 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
 149 SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
 150         &bpf_maxbufsize, 0, "");
 151 static unsigned int bpf_maxdevices = 256;
 152 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
 153         &bpf_maxdevices, 0, "");
 154 /*
 155  * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
 156  * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
 157  * explicitly to be able to use DLT_PKTAP.
 158  */
 159 static unsigned int bpf_wantpktap = 0;
 160 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
 161         &bpf_wantpktap, 0, "");
 162
 163 static int bpf_debug = 0;
 164 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
 165         &bpf_debug, 0, "");
 166
 167 /*
 168  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
 169  *  bpf_dtab holds pointer to the descriptors, indexed by minor device #
 170  */
 171 static struct bpf_if    *bpf_iflist;
 172 #ifdef __APPLE__
 173 /*
 174  * BSD now stores the bpf_d in the dev_t which is a struct
 175  * on their system. Our dev_t is an int, so we still store
 176  * the bpf_d in a separate table indexed by minor device #.
 177  *
 178  * The value stored in bpf_dtab[n] represent three states:
 179  *  0: device not opened
 180  *  1: device opening or closing
 181  *  other: device <n> opened with pointer to storage
 182  */
 183 static struct bpf_d     **bpf_dtab = NULL;
 184 static unsigned int bpf_dtab_size = 0;
 185 static unsigned int     nbpfilter = 0;
 186
 187 decl_lck_mtx_data(static, bpf_mlock_data);
 188 static lck_mtx_t                *bpf_mlock = &bpf_mlock_data;
 189 static lck_grp_t                *bpf_mlock_grp;
 190 static lck_grp_attr_t   *bpf_mlock_grp_attr;
 191 static lck_attr_t               *bpf_mlock_attr;
 192
 193 static mbuf_tag_id_t bpf_mtag_id;
 194 #endif /* __APPLE__ */
 195
 196 static int      bpf_allocbufs(struct bpf_d *);
 197 static errno_t  bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
 198 static int      bpf_detachd(struct bpf_d *d, int);
 199 static void     bpf_freed(struct bpf_d *);
 200 static void     bpf_mcopy(const void *, void *, size_t);
 201 static int      bpf_movein(struct uio *, int,
 202                     struct mbuf **, struct sockaddr *, int *);
 203 static int      bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt);
 204 static void     bpf_timed_out(void *, void *);
 205 static void     bpf_wakeup(struct bpf_d *);
 206 static void     catchpacket(struct bpf_d *, u_char *, struct mbuf *, u_int,
 207                     u_int, int, void (*)(const void *, void *, size_t));
 208 static void     reset_d(struct bpf_d *);
 209 static int      bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
 210 static int      bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
 211 static int      bpf_setdlt(struct bpf_d *, u_int);
 212 static int      bpf_set_traffic_class(struct bpf_d *, int);
 213 static void     bpf_set_packet_service_class(struct mbuf *, int);
 214
 215 static void     bpf_acquire_d(struct bpf_d *);
 216 static void     bpf_release_d(struct bpf_d *);
 217
 218 static  int bpf_devsw_installed;
 219
 220 void bpf_init(void *unused);
 221 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
 222
 223 /*
 224  * Darwin differs from BSD here, the following are static
 225  * on BSD and not static on Darwin.
 226  */
 227         d_open_t            bpfopen;
 228         d_close_t           bpfclose;
 229         d_read_t            bpfread;
 230         d_write_t           bpfwrite;
 231         ioctl_fcn_t         bpfioctl;
 232         select_fcn_t        bpfselect;
 233
 234
 235 /* Darwin's cdevsw struct differs slightly from BSDs */
 236 #define CDEV_MAJOR 23
 237 static struct cdevsw bpf_cdevsw = {
 238         /* open */          bpfopen,
 239         /* close */         bpfclose,
 240         /* read */          bpfread,
 241         /* write */         bpfwrite,
 242         /* ioctl */         bpfioctl,
 243         /* stop */          eno_stop,
 244         /* reset */         eno_reset,
 245         /* tty */           NULL,
 246         /* select */        bpfselect,
 247         /* mmap */          eno_mmap,
 248         /* strategy*/       eno_strat,
 249         /* getc */          eno_getc,
 250         /* putc */          eno_putc,
 251         /* type */          0
 252 };
 253
 254 #define SOCKADDR_HDR_LEN           offsetof(struct sockaddr, sa_data)
 255
 256 static int
 257 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *sockp, int *datlen)
 258 {
 259         struct mbuf *m;
 260         int error;
 261         int len;
 262         uint8_t sa_family;
 263         int hlen;
 264
 265         switch (linktype) {
 266
 267 #if SLIP
 268         case DLT_SLIP:
 269                 sa_family = AF_INET;
 270                 hlen = 0;
 271                 break;
 272 #endif /* SLIP */
 273
 274         case DLT_EN10MB:
 275                 sa_family = AF_UNSPEC;
 276                 /* XXX Would MAXLINKHDR be better? */
 277                 hlen = sizeof(struct ether_header);
 278                 break;
 279
 280 #if FDDI
 281         case DLT_FDDI:
 282         #if defined(__FreeBSD__) || defined(__bsdi__)
 283                 sa_family = AF_IMPLINK;
 284                 hlen = 0;
 285         #else
 286                 sa_family = AF_UNSPEC;
 287                 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
 288                 hlen = 24;
 289         #endif
 290                 break;
 291 #endif /* FDDI */
 292
 293         case DLT_RAW:
 294         case DLT_NULL:
 295                 sa_family = AF_UNSPEC;
 296                 hlen = 0;
 297                 break;
 298
 299         #ifdef __FreeBSD__
 300         case DLT_ATM_RFC1483:
 301                 /*
 302                  * en atm driver requires 4-byte atm pseudo header.
 303                  * though it isn't standard, vpi:vci needs to be
 304                  * specified anyway.
 305                  */
 306                 sa_family = AF_UNSPEC;
 307                 hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
 308                 break;
 309         #endif
 310
 311         case DLT_PPP:
 312                 sa_family = AF_UNSPEC;
 313                 hlen = 4;       /* This should match PPP_HDRLEN */
 314                 break;
 315
 316         case DLT_APPLE_IP_OVER_IEEE1394:
 317                 sa_family = AF_UNSPEC;
 318                 hlen = sizeof(struct firewire_header);
 319                 break;
 320
 321         case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
 322                 sa_family = AF_IEEE80211;
 323                 hlen = 0;
 324                 break;
 325
 326         case DLT_IEEE802_11_RADIO:
 327                 sa_family = AF_IEEE80211;
 328                 hlen = 0;
 329                 break;
 330
 331         default:
 332                 return (EIO);
 333         }
 334
 335         // LP64todo - fix this!
 336         len = uio_resid(uio);
 337         *datlen = len - hlen;
 338         if ((unsigned)len > MCLBYTES)
 339                 return (EIO);
 340
 341         if (sockp) {
 342                 /*
 343                  * Build a sockaddr based on the data link layer type.
 344                  * We do this at this level because the ethernet header
 345                  * is copied directly into the data field of the sockaddr.
 346                  * In the case of SLIP, there is no header and the packet
 347                  * is forwarded as is.
 348                  * Also, we are careful to leave room at the front of the mbuf
 349                  * for the link level header.
 350                  */
 351                 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
 352                         return (EIO);
 353                 }
 354                 sockp->sa_family = sa_family;
 355         } else {
 356                 /*
 357                  * We're directly sending the packet data supplied by
 358                  * the user; we don't need to make room for the link
 359                  * header, and don't need the header length value any
 360                  * more, so set it to 0.
 361                  */
 362                 hlen = 0;
 363         }
 364
 365         MGETHDR(m, M_WAIT, MT_DATA);
 366         if (m == 0)
 367                 return (ENOBUFS);
 368         if ((unsigned)len > MHLEN) {
 369                 MCLGET(m, M_WAIT);
 370                 if ((m->m_flags & M_EXT) == 0) {
 371                         error = ENOBUFS;
 372                         goto bad;
 373                 }
 374         }
 375         m->m_pkthdr.len = m->m_len = len;
 376         m->m_pkthdr.rcvif = NULL;
 377         *mp = m;
 378
 379         /*
 380          * Make room for link header.
 381          */
 382         if (hlen != 0) {
 383                 m->m_pkthdr.len -= hlen;
 384                 m->m_len -= hlen;
 385                 m->m_data += hlen; /* XXX */
 386                 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
 387                 if (error)
 388                         goto bad;
 389         }
 390         error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
 391         if (error)
 392                 goto bad;
 393
 394         /* Check for multicast destination */
 395         switch (linktype) {
 396                 case DLT_EN10MB: {
 397                         struct ether_header *eh = mtod(m, struct ether_header *);
 398
 399                         if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 400                                 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0)
 401                                         m->m_flags |= M_BCAST;
 402                                 else
 403                                         m->m_flags |= M_MCAST;
 404                         }
 405                         break;
 406                 }
 407         }
 408
 409         return 0;
 410  bad:
 411         m_freem(m);
 412         return (error);
 413 }
 414
 415 #ifdef __APPLE__
 416
 417 /*
 418  * The dynamic addition of a new device node must block all processes that
 419  * are opening the last device so that no process will get an unexpected
 420  * ENOENT
 421  */
 422 static void
 423 bpf_make_dev_t(int maj)
 424 {
 425         static int              bpf_growing = 0;
 426         unsigned int    cur_size = nbpfilter, i;
 427
 428         if (nbpfilter >= bpf_maxdevices)
 429                 return;
 430
 431         while (bpf_growing) {
 432                 /* Wait until new device has been created */
 433                 (void)tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
 434         }
 435         if (nbpfilter > cur_size) {
 436                 /* other thread grew it already */
 437                 return;
 438         }
 439         bpf_growing = 1;
 440
 441         /* need to grow bpf_dtab first */
 442         if (nbpfilter == bpf_dtab_size) {
 443                 int new_dtab_size;
 444                 struct bpf_d **new_dtab = NULL;
 445                 struct bpf_d **old_dtab = NULL;
 446
 447                 new_dtab_size = bpf_dtab_size + NBPFILTER;
 448                 new_dtab = (struct bpf_d **)_MALLOC(sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
 449                 if (new_dtab == 0) {
 450                         printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
 451                         goto done;
 452                 }
 453                 if (bpf_dtab) {
 454                         bcopy(bpf_dtab, new_dtab,
 455                                   sizeof(struct bpf_d *) * bpf_dtab_size);
 456                 }
 457                 bzero(new_dtab + bpf_dtab_size,
 458                           sizeof(struct bpf_d *) * NBPFILTER);
 459                 old_dtab = bpf_dtab;
 460                 bpf_dtab = new_dtab;
 461                 bpf_dtab_size = new_dtab_size;
 462                 if (old_dtab != NULL)
 463                         _FREE(old_dtab, M_DEVBUF);
 464         }
 465         i = nbpfilter++;
 466         (void) devfs_make_node(makedev(maj, i),
 467                                 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
 468                                 "bpf%d", i);
 469 done:
 470         bpf_growing = 0;
 471         wakeup((caddr_t)&bpf_growing);
 472 }
 473
 474 #endif
 475
 476 /*
 477  * Attach file to the bpf interface, i.e. make d listen on bp.
 478  */
 479 static errno_t
 480 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 481 {
 482         int first = bp->bif_dlist == NULL;
 483         int     error = 0;
 484
 485         /*
 486          * Point d at bp, and add d to the interface's list of listeners.
 487          * Finally, point the driver's bpf cookie at the interface so
 488          * it will divert packets to bpf.
 489          */
 490         d->bd_bif = bp;
 491         d->bd_next = bp->bif_dlist;
 492         bp->bif_dlist = d;
 493
 494         /*
 495          * Take a reference on the device even if an error is returned
 496          * because we keep the device in the interface's list of listeners
 497          */
 498         bpf_acquire_d(d);
 499
 500         if (first) {
 501                 /* Find the default bpf entry for this ifp */
 502                 if (bp->bif_ifp->if_bpf == NULL) {
 503                         struct bpf_if   *tmp, *primary = NULL;
 504
 505                         for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
 506                                 if (tmp->bif_ifp != bp->bif_ifp)
 507                                         continue;
 508                                 primary = tmp;
 509                                 /*
 510                                  * Make DLT_PKTAP only if process knows how
 511                                  * to deal with it, otherwise find another one
 512                                  */
 513                                 if (tmp->bif_dlt == DLT_PKTAP &&
 514                                         !(d->bd_flags & BPF_WANT_PKTAP))
 515                                         continue;
 516                                 break;
 517                         }
 518                         bp->bif_ifp->if_bpf = primary;
 519                 }
 520
 521                 /* Only call dlil_set_bpf_tap for primary dlt */
 522                 if (bp->bif_ifp->if_bpf == bp)
 523                         dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT, bpf_tap_callback);
 524
 525                 if (bp->bif_tap)
 526                         error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT);
 527         }
 528
 529         /*
 530          * Reset the detach flags in case we previously detached an interface
 531          */
 532         d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
 533
 534         if (bp->bif_ifp->if_bpf != NULL &&
 535                 bp->bif_ifp->if_bpf->bif_dlt == DLT_PKTAP)
 536                 d->bd_flags |= BPF_FINALIZE_PKTAP;
 537         else
 538                 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
 539
 540         return error;
 541 }
 542
 543 /*
 544  * Detach a file from its interface.
 545  *
 546  * Return 1 if was closed by some thread, 0 otherwise
 547  */
 548 static int
 549 bpf_detachd(struct bpf_d *d, int closing)
 550 {
 551         struct bpf_d **p;
 552         struct bpf_if *bp;
 553         struct ifnet  *ifp;
 554
 555         /*
 556          * Some other thread already detached
 557          */
 558         if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0)
 559                 goto done;
 560         /*
 561          * This thread is doing the detach
 562          */
 563         d->bd_flags |= BPF_DETACHING;
 564
 565         ifp = d->bd_bif->bif_ifp;
 566         bp = d->bd_bif;
 567
 568         if (bpf_debug != 0)
 569                 printf("%s: %llx %s%s\n",
 570                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
 571                     if_name(ifp), closing ? " closing" : "");
 572
 573         /* Remove d from the interface's descriptor list. */
 574         p = &bp->bif_dlist;
 575         while (*p != d) {
 576                 p = &(*p)->bd_next;
 577                 if (*p == 0)
 578                         panic("bpf_detachd: descriptor not in list");
 579         }
 580         *p = (*p)->bd_next;
 581         if (bp->bif_dlist == 0) {
 582                 /*
 583                  * Let the driver know that there are no more listeners.
 584                  */
 585                 /* Only call dlil_set_bpf_tap for primary dlt */
 586                 if (bp->bif_ifp->if_bpf == bp)
 587                         dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
 588                 if (bp->bif_tap)
 589                         bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
 590
 591                 for (bp = bpf_iflist; bp; bp = bp->bif_next)
 592                         if (bp->bif_ifp == ifp && bp->bif_dlist != 0)
 593                                 break;
 594                 if (bp == NULL)
 595                         ifp->if_bpf = NULL;
 596         }
 597         d->bd_bif = NULL;
 598         /*
 599          * Check if this descriptor had requested promiscuous mode.
 600          * If so, turn it off.
 601          */
 602         if (d->bd_promisc) {
 603                 d->bd_promisc = 0;
 604                 lck_mtx_unlock(bpf_mlock);
 605                 if (ifnet_set_promiscuous(ifp, 0)) {
 606                         /*
 607                          * Something is really wrong if we were able to put
 608                          * the driver into promiscuous mode, but can't
 609                          * take it out.
 610                          * Most likely the network interface is gone.
 611                          */
 612                         printf("%s: ifnet_set_promiscuous failed\n", __func__);
 613                 }
 614                 lck_mtx_lock(bpf_mlock);
 615         }
 616
 617         /*
 618          * Wake up other thread that are waiting for this thread to finish
 619          * detaching
 620          */
 621         d->bd_flags &= ~BPF_DETACHING;
 622         d->bd_flags |= BPF_DETACHED;
 623         /*
 624          * Note that We've kept the reference because we may have dropped
 625          * the lock when turning off promiscuous mode
 626          */
 627         bpf_release_d(d);
 628
 629 done:
 630         /*
 631          * When closing makes sure no other thread refer to the bpf_d
 632          */
 633         if (bpf_debug != 0)
 634                 printf("%s: %llx done\n",
 635                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
 636         /*
 637          * Let the caller know the bpf_d is closed
 638          */
 639         if ((d->bd_flags & BPF_CLOSING))
 640                 return (1);
 641         else
 642                 return (0);
 643 }
 644
 645
 646 /*
 647  * Start asynchronous timer, if necessary.
 648  * Must be called with bpf_mlock held.
 649  */
 650 static void
 651 bpf_start_timer(struct bpf_d *d)
 652 {
 653         uint64_t deadline;
 654         struct timeval tv;
 655
 656         if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
 657                 tv.tv_sec = d->bd_rtout / hz;
 658                 tv.tv_usec = (d->bd_rtout % hz) * tick;
 659
 660                 clock_interval_to_deadline(
 661                     (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
 662                     NSEC_PER_USEC, &deadline);
 663                 /*
 664                  * The state is BPF_IDLE, so the timer hasn't
 665                  * been started yet, and hasn't gone off yet;
 666                  * there is no thread call scheduled, so this
 667                  * won't change the schedule.
 668                  *
 669                  * XXX - what if, by the time it gets entered,
 670                  * the deadline has already passed?
 671                  */
 672                 thread_call_enter_delayed(d->bd_thread_call, deadline);
 673                 d->bd_state = BPF_WAITING;
 674         }
 675 }
 676
 677 /*
 678  * Cancel asynchronous timer.
 679  * Must be called with bpf_mlock held.
 680  */
 681 static boolean_t
 682 bpf_stop_timer(struct bpf_d *d)
 683 {
 684         /*
 685          * If the timer has already gone off, this does nothing.
 686          * Our caller is expected to set d->bd_state to BPF_IDLE,
 687          * with the bpf_mlock, after we are called. bpf_timed_out()
 688          * also grabs bpf_mlock, so, if the timer has gone off and
 689          * bpf_timed_out() hasn't finished, it's waiting for the
 690          * lock; when this thread releases the lock, it will
 691          * find the state is BPF_IDLE, and just release the
 692          * lock and return.
 693          */
 694         return (thread_call_cancel(d->bd_thread_call));
 695 }
 696
 697 void
 698 bpf_acquire_d(struct bpf_d *d)
 699 {
 700         void *lr_saved =  __builtin_return_address(0);
 701
 702         lck_mtx_assert(bpf_mlock, LCK_MTX_ASSERT_OWNED);
 703
 704         d->bd_refcnt += 1;
 705
 706         d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
 707         d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
 708 }
 709
 710 void
 711 bpf_release_d(struct bpf_d *d)
 712 {
 713         void *lr_saved =  __builtin_return_address(0);
 714
 715         lck_mtx_assert(bpf_mlock, LCK_MTX_ASSERT_OWNED);
 716
 717         if (d->bd_refcnt <= 0)
 718                 panic("%s: %p refcnt <= 0", __func__, d);
 719
 720         d->bd_refcnt -= 1;
 721
 722         d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
 723         d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
 724
 725         if (d->bd_refcnt == 0) {
 726                 /* Assert the device is detached */
 727                 if ((d->bd_flags & BPF_DETACHED) == 0)
 728                         panic("%s: %p BPF_DETACHED not set", __func__, d);
 729
 730                 _FREE(d, M_DEVBUF);
 731         }
 732 }
 733
 734 /*
 735  * Open ethernet device.  Returns ENXIO for illegal minor device number,
 736  * EBUSY if file is open by another process.
 737  */
 738 /* ARGSUSED */
 739 int
 740 bpfopen(dev_t dev, int flags, __unused int fmt,
 741         __unused struct proc *p)
 742 {
 743         struct bpf_d *d;
 744
 745         lck_mtx_lock(bpf_mlock);
 746         if ((unsigned int) minor(dev) >= nbpfilter) {
 747                 lck_mtx_unlock(bpf_mlock);
 748                 return (ENXIO);
 749         }
 750         /*
 751          * New device nodes are created on demand when opening the last one.
 752          * The programming model is for processes to loop on the minor starting at 0
 753          * as long as EBUSY is returned. The loop stops when either the open succeeds or
 754          * an error other that EBUSY is returned. That means that bpf_make_dev_t() must
 755          * block all processes that are opening the last  node. If not all
 756          * processes are blocked, they could unexpectedly get ENOENT and abort their
 757          * opening loop.
 758          */
 759         if ((unsigned int) minor(dev) == (nbpfilter - 1))
 760                 bpf_make_dev_t(major(dev));
 761
 762         /*
 763          * Each minor can be opened by only one process.  If the requested
 764          * minor is in use, return EBUSY.
 765          *
 766          * Important: bpfopen() and bpfclose() have to check and set the status of a device
 767          * in the same lockin context otherwise the device may be leaked because the vnode use count
 768          * will be unpextectly greater than 1 when close() is called.
 769          */
 770         if (bpf_dtab[minor(dev)] == 0) {
 771                 bpf_dtab[minor(dev)] = (void *)1;       /* Mark opening */
 772         } else {
 773                 lck_mtx_unlock(bpf_mlock);
 774                 return (EBUSY);
 775         }
 776         d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
 777             M_WAIT | M_ZERO);
 778         if (d == NULL) {
 779                 /* this really is a catastrophic failure */
 780                 printf("bpfopen: malloc bpf_d failed\n");
 781                 bpf_dtab[minor(dev)] = NULL;
 782                 lck_mtx_unlock(bpf_mlock);
 783                 return ENOMEM;
 784         }
 785
 786         /* Mark "in use" and do most initialization. */
 787         bpf_acquire_d(d);
 788         d->bd_bufsize = bpf_bufsize;
 789         d->bd_sig = SIGIO;
 790         d->bd_seesent = 1;
 791         d->bd_oflags = flags;
 792         d->bd_state = BPF_IDLE;
 793         d->bd_traffic_class = SO_TC_BE;
 794         d->bd_flags |= BPF_DETACHED;
 795         if (bpf_wantpktap)
 796                 d->bd_flags |= BPF_WANT_PKTAP;
 797         else
 798                 d->bd_flags &= ~BPF_WANT_PKTAP;
 799         d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
 800         if (d->bd_thread_call == NULL) {
 801                 printf("bpfopen: malloc thread call failed\n");
 802                 bpf_dtab[minor(dev)] = NULL;
 803                 bpf_release_d(d);
 804                 lck_mtx_unlock(bpf_mlock);
 805
 806                 return (ENOMEM);
 807         }
 808 #if CONFIG_MACF_NET
 809         mac_bpfdesc_label_init(d);
 810         mac_bpfdesc_label_associate(kauth_cred_get(), d);
 811 #endif
 812         bpf_dtab[minor(dev)] = d;                               /* Mark opened */
 813         lck_mtx_unlock(bpf_mlock);
 814
 815         return (0);
 816 }
 817
 818 /*
 819  * Close the descriptor by detaching it from its interface,
 820  * deallocating its buffers, and marking it free.
 821  */
 822 /* ARGSUSED */
 823 int
 824 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
 825          __unused struct proc *p)
 826 {
 827         struct bpf_d *d;
 828
 829         /* Take BPF lock to ensure no other thread is using the device */
 830         lck_mtx_lock(bpf_mlock);
 831
 832         d = bpf_dtab[minor(dev)];
 833         if (d == 0 || d == (void *)1) {
 834                 lck_mtx_unlock(bpf_mlock);
 835                 return (ENXIO);
 836         }
 837
 838         /*
 839          * Other threads may call bpd_detachd() if we drop the bpf_mlock
 840          */
 841         d->bd_flags |= BPF_CLOSING;
 842
 843         if (bpf_debug != 0)
 844                 printf("%s: %llx\n",
 845                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
 846
 847         bpf_dtab[minor(dev)] = (void *)1;               /* Mark closing */
 848
 849         /*
 850          * Deal with any in-progress timeouts.
 851          */
 852         switch (d->bd_state) {
 853                 case BPF_IDLE:
 854                         /*
 855                          * Not waiting for a timeout, and no timeout happened.
 856                          */
 857                         break;
 858
 859                 case BPF_WAITING:
 860                         /*
 861                          * Waiting for a timeout.
 862                          * Cancel any timer that has yet to go off,
 863                          * and mark the state as "closing".
 864                          * Then drop the lock to allow any timers that
 865                          * *have* gone off to run to completion, and wait
 866                          * for them to finish.
 867                          */
 868                         if (!bpf_stop_timer(d)) {
 869                                 /*
 870                                  * There was no pending call, so the call must
 871                                  * have been in progress. Wait for the call to
 872                                  * complete; we have to drop the lock while
 873                                  * waiting. to let the in-progrss call complete
 874                                  */
 875                                 d->bd_state = BPF_DRAINING;
 876                                 while (d->bd_state == BPF_DRAINING)
 877                                         msleep((caddr_t)d, bpf_mlock, PRINET,
 878                                                         "bpfdraining", NULL);
 879                         }
 880                         d->bd_state = BPF_IDLE;
 881                         break;
 882
 883                 case BPF_TIMED_OUT:
 884                         /*
 885                          * Timer went off, and the timeout routine finished.
 886                          */
 887                         d->bd_state = BPF_IDLE;
 888                         break;
 889
 890                 case BPF_DRAINING:
 891                         /*
 892                          * Another thread is blocked on a close waiting for
 893                          * a timeout to finish.
 894                          * This "shouldn't happen", as the first thread to enter
 895                          * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
 896                          * all subsequent threads should see that and fail with
 897                          * ENXIO.
 898                          */
 899                         panic("Two threads blocked in a BPF close");
 900                         break;
 901         }
 902
 903         if (d->bd_bif)
 904                 bpf_detachd(d, 1);
 905         selthreadclear(&d->bd_sel);
 906 #if CONFIG_MACF_NET
 907         mac_bpfdesc_label_destroy(d);
 908 #endif
 909         thread_call_free(d->bd_thread_call);
 910
 911         while (d->bd_hbuf_read)
 912                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
 913
 914         bpf_freed(d);
 915
 916         /* Mark free in same context as bpfopen comes to check */
 917         bpf_dtab[minor(dev)] = NULL;                    /* Mark closed */
 918
 919         bpf_release_d(d);
 920
 921         lck_mtx_unlock(bpf_mlock);
 922
 923         return (0);
 924 }
 925
 926
 927 #define BPF_SLEEP bpf_sleep
 928
 929 static int
 930 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
 931 {
 932         u_int64_t abstime = 0;
 933
 934         if(timo)
 935                 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
 936
 937         return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
 938 }
 939
 940 /*
 941  * Rotate the packet buffers in descriptor d.  Move the store buffer
 942  * into the hold slot, and the free buffer into the store slot.
 943  * Zero the length of the new store buffer.
 944  */
 945 #define ROTATE_BUFFERS(d) \
 946         if (d->bd_hbuf_read) \
 947                 panic("rotating bpf buffers during read"); \
 948         (d)->bd_hbuf = (d)->bd_sbuf; \
 949         (d)->bd_hlen = (d)->bd_slen; \
 950         (d)->bd_hcnt = (d)->bd_scnt; \
 951         (d)->bd_sbuf = (d)->bd_fbuf; \
 952         (d)->bd_slen = 0; \
 953         (d)->bd_scnt = 0; \
 954         (d)->bd_fbuf = NULL;
 955 /*
 956  *  bpfread - read next chunk of packets from buffers
 957  */
 958 int
 959 bpfread(dev_t dev, struct uio *uio, int ioflag)
 960 {
 961         struct bpf_d *d;
 962         caddr_t hbuf;
 963         int timed_out, hbuf_len;
 964         int error;
 965         int flags;
 966
 967         lck_mtx_lock(bpf_mlock);
 968
 969         d = bpf_dtab[minor(dev)];
 970         if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
 971                 lck_mtx_unlock(bpf_mlock);
 972                 return (ENXIO);
 973         }
 974
 975         bpf_acquire_d(d);
 976
 977         /*
 978          * Restrict application to use a buffer the same size as
 979          * as kernel buffers.
 980          */
 981         if (uio_resid(uio) != d->bd_bufsize) {
 982                 bpf_release_d(d);
 983                 lck_mtx_unlock(bpf_mlock);
 984                 return (EINVAL);
 985         }
 986
 987         if (d->bd_state == BPF_WAITING)
 988                 bpf_stop_timer(d);
 989
 990         timed_out = (d->bd_state == BPF_TIMED_OUT);
 991         d->bd_state = BPF_IDLE;
 992
 993         while (d->bd_hbuf_read)
 994                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
 995
 996         if ((d->bd_flags & BPF_CLOSING) != 0) {
 997                 bpf_release_d(d);
 998                 lck_mtx_unlock(bpf_mlock);
 999                 return (ENXIO);
1000         }
1001         /*
1002          * If the hold buffer is empty, then do a timed sleep, which
1003          * ends when the timeout expires or when enough packets
1004          * have arrived to fill the store buffer.
1005          */
1006         while (d->bd_hbuf == 0) {
1007                 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY))
1008                         && d->bd_slen != 0) {
1009                         /*
1010                          * We're in immediate mode, or are reading
1011                          * in non-blocking mode, or a timer was
1012                          * started before the read (e.g., by select()
1013                          * or poll()) and has expired and a packet(s)
1014                          * either arrived since the previous
1015                          * read or arrived while we were asleep.
1016                          * Rotate the buffers and return what's here.
1017                          */
1018                         ROTATE_BUFFERS(d);
1019                         break;
1020                 }
1021
1022                 /*
1023                  * No data is available, check to see if the bpf device
1024                  * is still pointed at a real interface.  If not, return
1025                  * ENXIO so that the userland process knows to rebind
1026                  * it before using it again.
1027                  */
1028                 if (d->bd_bif == NULL) {
1029                         bpf_release_d(d);
1030                         lck_mtx_unlock(bpf_mlock);
1031                         return (ENXIO);
1032                 }
1033                 if (ioflag & IO_NDELAY) {
1034                         bpf_release_d(d);
1035                         lck_mtx_unlock(bpf_mlock);
1036                         return (EWOULDBLOCK);
1037                 }
1038                 error = BPF_SLEEP(d, PRINET|PCATCH, "bpf",
1039                                   d->bd_rtout);
1040                 /*
1041                  * Make sure device is still opened
1042                  */
1043                 if ((d->bd_flags & BPF_CLOSING) != 0) {
1044                         bpf_release_d(d);
1045                         lck_mtx_unlock(bpf_mlock);
1046                         return (ENXIO);
1047                 }
1048
1049                 while (d->bd_hbuf_read)
1050                         msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1051
1052                 if ((d->bd_flags & BPF_CLOSING) != 0) {
1053                         bpf_release_d(d);
1054                         lck_mtx_unlock(bpf_mlock);
1055                         return (ENXIO);
1056                 }
1057
1058                 if (error == EINTR || error == ERESTART) {
1059                         if (d->bd_hbuf) {
1060                                 /*
1061                                  * Because we msleep, the hold buffer might
1062                                  * be filled when we wake up.  Avoid rotating
1063                                  * in this case.
1064                                  */
1065                                 break;
1066                         }
1067                         if (d->bd_slen) {
1068                                 /*
1069                                  * Sometimes we may be interrupted often and
1070                                  * the sleep above will not timeout.
1071                                  * Regardless, we should rotate the buffers
1072                                  * if there's any new data pending and
1073                                  * return it.
1074                                  */
1075                                 ROTATE_BUFFERS(d);
1076                                 break;
1077                         }
1078                         bpf_release_d(d);
1079                         lck_mtx_unlock(bpf_mlock);
1080                         return (error);
1081                 }
1082                 if (error == EWOULDBLOCK) {
1083                         /*
1084                          * On a timeout, return what's in the buffer,
1085                          * which may be nothing.  If there is something
1086                          * in the store buffer, we can rotate the buffers.
1087                          */
1088                         if (d->bd_hbuf)
1089                                 /*
1090                                  * We filled up the buffer in between
1091                                  * getting the timeout and arriving
1092                                  * here, so we don't need to rotate.
1093                                  */
1094                                 break;
1095
1096                         if (d->bd_slen == 0) {
1097                                 bpf_release_d(d);
1098                                 lck_mtx_unlock(bpf_mlock);
1099                                 return (0);
1100                         }
1101                         ROTATE_BUFFERS(d);
1102                         break;
1103                 }
1104         }
1105         /*
1106          * At this point, we know we have something in the hold slot.
1107          */
1108
1109         /*
1110          * Set the hold buffer read. So we do not
1111          * rotate the buffers until the hold buffer
1112          * read is complete. Also to avoid issues resulting
1113          * from page faults during disk sleep (<rdar://problem/13436396>).
1114          */
1115         d->bd_hbuf_read = 1;
1116         hbuf = d->bd_hbuf;
1117         hbuf_len = d->bd_hlen;
1118         flags = d->bd_flags;
1119         lck_mtx_unlock(bpf_mlock);
1120
1121 #ifdef __APPLE__
1122         /*
1123          * Before we move data to userland, we fill out the extended
1124          * header fields.
1125          */
1126         if (flags & BPF_EXTENDED_HDR) {
1127                 char *p;
1128
1129                 p = hbuf;
1130                 while (p < hbuf + hbuf_len) {
1131                         struct bpf_hdr_ext *ehp;
1132                         uint32_t flowid;
1133                         struct so_procinfo soprocinfo;
1134                         int found = 0;
1135
1136                         ehp = (struct bpf_hdr_ext *)(void *)p;
1137                         if ((flowid = ehp->bh_flowid)) {
1138                                 if (ehp->bh_proto == IPPROTO_TCP)
1139                                         found = inp_findinpcb_procinfo(&tcbinfo,
1140                                             flowid, &soprocinfo);
1141                                 else if (ehp->bh_proto == IPPROTO_UDP)
1142                                         found = inp_findinpcb_procinfo(&udbinfo,
1143                                             flowid, &soprocinfo);
1144                                 if (found == 1) {
1145                                         ehp->bh_pid = soprocinfo.spi_pid;
1146                                         proc_name(ehp->bh_pid, ehp->bh_comm, MAXCOMLEN);
1147                                 }
1148                                 ehp->bh_flowid = 0;
1149                         }
1150                         if (flags & BPF_FINALIZE_PKTAP) {
1151                                 struct pktap_header *pktaphdr;
1152
1153                                 pktaphdr = (struct pktap_header *)(void *)
1154                                     (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1155
1156                                 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1157                                         pktap_finalize_proc_info(pktaphdr);
1158
1159                                 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1160                                         ehp->bh_tstamp.tv_sec =
1161                                                 pktaphdr->pth_tstamp.tv_sec;
1162                                         ehp->bh_tstamp.tv_usec =
1163                                                 pktaphdr->pth_tstamp.tv_usec;
1164                                 }
1165                         }
1166                         p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1167                 }
1168         } else if (flags & BPF_FINALIZE_PKTAP) {
1169                 char *p;
1170
1171                 p = hbuf;
1172                 while (p < hbuf + hbuf_len) {
1173                         struct bpf_hdr *hp;
1174                         struct pktap_header *pktaphdr;
1175
1176                         hp = (struct bpf_hdr *)(void *)p;
1177                         pktaphdr = (struct pktap_header *)(void *)
1178                             (p + BPF_WORDALIGN(hp->bh_hdrlen));
1179
1180                         if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1181                                 pktap_finalize_proc_info(pktaphdr);
1182
1183                         if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1184                                 hp->bh_tstamp.tv_sec =
1185                                         pktaphdr->pth_tstamp.tv_sec;
1186                                 hp->bh_tstamp.tv_usec =
1187                                         pktaphdr->pth_tstamp.tv_usec;
1188                         }
1189
1190                         p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1191                 }
1192         }
1193 #endif
1194
1195         /*
1196          * Move data from hold buffer into user space.
1197          * We know the entire buffer is transferred since
1198          * we checked above that the read buffer is bpf_bufsize bytes.
1199          */
1200         error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1201
1202         lck_mtx_lock(bpf_mlock);
1203         /*
1204          * Make sure device is still opened
1205          */
1206         if ((d->bd_flags & BPF_CLOSING) != 0) {
1207                 bpf_release_d(d);
1208                 lck_mtx_unlock(bpf_mlock);
1209                 return (ENXIO);
1210         }
1211
1212         d->bd_hbuf_read = 0;
1213         d->bd_fbuf = d->bd_hbuf;
1214         d->bd_hbuf = NULL;
1215         d->bd_hlen = 0;
1216         d->bd_hcnt = 0;
1217         wakeup((caddr_t)d);
1218
1219         bpf_release_d(d);
1220         lck_mtx_unlock(bpf_mlock);
1221         return (error);
1222
1223 }
1224
1225
1226 /*
1227  * If there are processes sleeping on this descriptor, wake them up.
1228  */
1229 static void
1230 bpf_wakeup(struct bpf_d *d)
1231 {
1232         if (d->bd_state == BPF_WAITING) {
1233                 bpf_stop_timer(d);
1234                 d->bd_state = BPF_IDLE;
1235         }
1236         wakeup((caddr_t)d);
1237         if (d->bd_async && d->bd_sig && d->bd_sigio)
1238                 pgsigio(d->bd_sigio, d->bd_sig);
1239
1240         selwakeup(&d->bd_sel);
1241         if ((d->bd_flags & BPF_KNOTE))
1242                 KNOTE(&d->bd_sel.si_note, 1);
1243 }
1244
1245
1246 static void
1247 bpf_timed_out(void *arg, __unused void *dummy)
1248 {
1249         struct bpf_d *d = (struct bpf_d *)arg;
1250
1251         lck_mtx_lock(bpf_mlock);
1252         if (d->bd_state == BPF_WAITING) {
1253                 /*
1254                  * There's a select or kqueue waiting for this; if there's
1255                  * now stuff to read, wake it up.
1256                  */
1257                 d->bd_state = BPF_TIMED_OUT;
1258                 if (d->bd_slen != 0)
1259                         bpf_wakeup(d);
1260         } else if (d->bd_state == BPF_DRAINING) {
1261                 /*
1262                  * A close is waiting for this to finish.
1263                  * Mark it as finished, and wake the close up.
1264                  */
1265                 d->bd_state = BPF_IDLE;
1266                 bpf_wakeup(d);
1267         }
1268         lck_mtx_unlock(bpf_mlock);
1269 }
1270
1271
1272
1273
1274
1275 /* keep in sync with bpf_movein above: */
1276 #define MAX_DATALINK_HDR_LEN    (sizeof(struct firewire_header))
1277
1278 int
1279 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1280 {
1281         struct bpf_d *d;
1282         struct ifnet *ifp;
1283         struct mbuf *m = NULL;
1284         int error;
1285         char              dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1286         int datlen = 0;
1287         int bif_dlt;
1288         int bd_hdrcmplt;
1289
1290         lck_mtx_lock(bpf_mlock);
1291
1292         d = bpf_dtab[minor(dev)];
1293         if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
1294                 lck_mtx_unlock(bpf_mlock);
1295                 return (ENXIO);
1296         }
1297
1298         bpf_acquire_d(d);
1299
1300         if (d->bd_bif == 0) {
1301                 bpf_release_d(d);
1302                 lck_mtx_unlock(bpf_mlock);
1303                 return (ENXIO);
1304         }
1305
1306         ifp = d->bd_bif->bif_ifp;
1307
1308         if ((ifp->if_flags & IFF_UP) == 0) {
1309                 bpf_release_d(d);
1310                 lck_mtx_unlock(bpf_mlock);
1311                 return (ENETDOWN);
1312         }
1313         if (uio_resid(uio) == 0) {
1314                 bpf_release_d(d);
1315                 lck_mtx_unlock(bpf_mlock);
1316                 return (0);
1317         }
1318         ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1319
1320         /*
1321          * fix for PR-6849527
1322          * geting variables onto stack before dropping lock for bpf_movein()
1323          */
1324         bif_dlt = (int)d->bd_bif->bif_dlt;
1325         bd_hdrcmplt  = d->bd_hdrcmplt;
1326
1327         /* bpf_movein allocating mbufs; drop lock */
1328         lck_mtx_unlock(bpf_mlock);
1329
1330         error = bpf_movein(uio, bif_dlt, &m,
1331         bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1332         &datlen);
1333
1334         /* take the lock again */
1335         lck_mtx_lock(bpf_mlock);
1336         if (error) {
1337                 bpf_release_d(d);
1338                 lck_mtx_unlock(bpf_mlock);
1339                 return (error);
1340         }
1341
1342         /* verify the device is still open */
1343         if ((d->bd_flags & BPF_CLOSING) != 0) {
1344                 bpf_release_d(d);
1345                 lck_mtx_unlock(bpf_mlock);
1346                 m_freem(m);
1347                 return (ENXIO);
1348         }
1349
1350         if (d->bd_bif == NULL) {
1351                 bpf_release_d(d);
1352                 lck_mtx_unlock(bpf_mlock);
1353                 m_free(m);
1354                 return (ENXIO);
1355         }
1356
1357         if ((unsigned)datlen > ifp->if_mtu) {
1358                 bpf_release_d(d);
1359                 lck_mtx_unlock(bpf_mlock);
1360                 m_freem(m);
1361                 return (EMSGSIZE);
1362         }
1363
1364
1365 #if CONFIG_MACF_NET
1366         mac_mbuf_label_associate_bpfdesc(d, m);
1367 #endif
1368
1369         bpf_set_packet_service_class(m, d->bd_traffic_class);
1370
1371         lck_mtx_unlock(bpf_mlock);
1372
1373         /*
1374          * The driver frees the mbuf.
1375          */
1376         if (d->bd_hdrcmplt) {
1377                 if (d->bd_bif->bif_send)
1378                         error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1379                 else
1380                         error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1381         } else {
1382                 error = dlil_output(ifp, PF_INET, m, NULL,
1383                     (struct sockaddr *)dst_buf, 0, NULL);
1384         }
1385
1386         lck_mtx_lock(bpf_mlock);
1387         bpf_release_d(d);
1388         lck_mtx_unlock(bpf_mlock);
1389
1390         return (error);
1391 }
1392
1393 /*
1394  * Reset a descriptor by flushing its packet buffer and clearing the
1395  * receive and drop counts.
1396  */
1397 static void
1398 reset_d(struct bpf_d *d)
1399 {
1400         if (d->bd_hbuf_read)
1401                 panic("resetting buffers during read");
1402
1403         if (d->bd_hbuf) {
1404                 /* Free the hold buffer. */
1405                 d->bd_fbuf = d->bd_hbuf;
1406                 d->bd_hbuf = NULL;
1407         }
1408         d->bd_slen = 0;
1409         d->bd_hlen = 0;
1410         d->bd_scnt = 0;
1411         d->bd_hcnt = 0;
1412         d->bd_rcount = 0;
1413         d->bd_dcount = 0;
1414 }
1415
1416 /*
1417  *  FIONREAD            Check for read packet available.
1418  *  SIOCGIFADDR         Get interface address - convenient hook to driver.
1419  *  BIOCGBLEN           Get buffer len [for read()].
1420  *  BIOCSETF            Set ethernet read filter.
1421  *  BIOCFLUSH           Flush read packet buffer.
1422  *  BIOCPROMISC         Put interface into promiscuous mode.
1423  *  BIOCGDLT            Get link layer type.
1424  *  BIOCGETIF           Get interface name.
1425  *  BIOCSETIF           Set interface.
1426  *  BIOCSRTIMEOUT       Set read timeout.
1427  *  BIOCGRTIMEOUT       Get read timeout.
1428  *  BIOCGSTATS          Get packet stats.
1429  *  BIOCIMMEDIATE       Set immediate mode.
1430  *  BIOCVERSION         Get filter language version.
1431  *  BIOCGHDRCMPLT       Get "header already complete" flag
1432  *  BIOCSHDRCMPLT       Set "header already complete" flag
1433  *  BIOCGSEESENT        Get "see packets sent" flag
1434  *  BIOCSSEESENT        Set "see packets sent" flag
1435  *  BIOCSETTC           Set traffic class.
1436  *  BIOCGETTC           Get traffic class.
1437  *  BIOCSEXTHDR         Set "extended header" flag
1438  *  BIOCSHEADDROP       Drop head of the buffer if user is not reading
1439  *  BIOCGHEADDROP       Get "head-drop" flag
1440  */
1441 /* ARGSUSED */
1442 int
1443 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1444     struct proc *p)
1445 {
1446         struct bpf_d *d;
1447         int error = 0;
1448         u_int int_arg;
1449         struct ifreq ifr;
1450
1451         lck_mtx_lock(bpf_mlock);
1452
1453         d = bpf_dtab[minor(dev)];
1454         if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
1455                 lck_mtx_unlock(bpf_mlock);
1456                 return (ENXIO);
1457         }
1458
1459         bpf_acquire_d(d);
1460
1461         if (d->bd_state == BPF_WAITING)
1462                 bpf_stop_timer(d);
1463         d->bd_state = BPF_IDLE;
1464
1465         switch (cmd) {
1466
1467         default:
1468                 error = EINVAL;
1469                 break;
1470
1471         /*
1472          * Check for read packet available.
1473          */
1474         case FIONREAD:                  /* int */
1475                 {
1476                         int n;
1477
1478                         n = d->bd_slen;
1479                         if (d->bd_hbuf && d->bd_hbuf_read == 0)
1480                                 n += d->bd_hlen;
1481
1482                         bcopy(&n, addr, sizeof (n));
1483                         break;
1484                 }
1485
1486         case SIOCGIFADDR:               /* struct ifreq */
1487                 {
1488                         struct ifnet *ifp;
1489
1490                         if (d->bd_bif == 0)
1491                                 error = EINVAL;
1492                         else {
1493                                 ifp = d->bd_bif->bif_ifp;
1494                                 error = ifnet_ioctl(ifp, 0, cmd, addr);
1495                         }
1496                         break;
1497                 }
1498
1499         /*
1500          * Get buffer len [for read()].
1501          */
1502         case BIOCGBLEN:                 /* u_int */
1503                 bcopy(&d->bd_bufsize, addr, sizeof (u_int));
1504                 break;
1505
1506         /*
1507          * Set buffer length.
1508          */
1509         case BIOCSBLEN:                 /* u_int */
1510                 if (d->bd_bif != 0)
1511                         error = EINVAL;
1512                 else {
1513                         u_int size;
1514
1515                         bcopy(addr, &size, sizeof (size));
1516
1517                         /*
1518                          * Allow larger buffer in head drop mode with the
1519                          * assumption the capture is in standby mode to
1520                          * keep a cache of recent traffic
1521                          */
1522                         if (d->bd_headdrop != 0 && size > 2 * bpf_maxbufsize)
1523                                 size = 2 * bpf_maxbufsize;
1524                         else if (size > bpf_maxbufsize)
1525                                 size = bpf_maxbufsize;
1526                         else if (size < BPF_MINBUFSIZE)
1527                                 size = BPF_MINBUFSIZE;
1528                         bcopy(&size, addr, sizeof (size));
1529                         d->bd_bufsize = size;
1530                 }
1531                 break;
1532
1533         /*
1534          * Set link layer read filter.
1535          */
1536         case BIOCSETF32:
1537         case BIOCSETFNR32: {            /* struct bpf_program32 */
1538                 struct bpf_program32 prg32;
1539
1540                 bcopy(addr, &prg32, sizeof (prg32));
1541                 error = bpf_setf(d, prg32.bf_len,
1542                     CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1543                 break;
1544         }
1545
1546         case BIOCSETF64:
1547         case BIOCSETFNR64: {            /* struct bpf_program64 */
1548                 struct bpf_program64 prg64;
1549
1550                 bcopy(addr, &prg64, sizeof (prg64));
1551                 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
1552                 break;
1553         }
1554
1555         /*
1556          * Flush read packet buffer.
1557          */
1558         case BIOCFLUSH:
1559                 while (d->bd_hbuf_read) {
1560                         msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1561                 }
1562                 if ((d->bd_flags & BPF_CLOSING) != 0) {
1563                         error = ENXIO;
1564                         break;
1565                 }
1566                 reset_d(d);
1567                 break;
1568
1569         /*
1570          * Put interface into promiscuous mode.
1571          */
1572         case BIOCPROMISC:
1573                 if (d->bd_bif == 0) {
1574                         /*
1575                          * No interface attached yet.
1576                          */
1577                         error = EINVAL;
1578                         break;
1579                 }
1580                 if (d->bd_promisc == 0) {
1581                         lck_mtx_unlock(bpf_mlock);
1582                         error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1583                         lck_mtx_lock(bpf_mlock);
1584                         if (error == 0)
1585                                 d->bd_promisc = 1;
1586                 }
1587                 break;
1588
1589         /*
1590          * Get device parameters.
1591          */
1592         case BIOCGDLT:                  /* u_int */
1593                 if (d->bd_bif == 0)
1594                         error = EINVAL;
1595                 else
1596                         bcopy(&d->bd_bif->bif_dlt, addr, sizeof (u_int));
1597                 break;
1598
1599         /*
1600          * Get a list of supported data link types.
1601          */
1602         case BIOCGDLTLIST:              /* struct bpf_dltlist */
1603                 if (d->bd_bif == NULL) {
1604                         error = EINVAL;
1605                 } else {
1606                         error = bpf_getdltlist(d, addr, p);
1607                 }
1608                 break;
1609
1610         /*
1611          * Set data link type.
1612          */
1613         case BIOCSDLT:                  /* u_int */
1614                 if (d->bd_bif == NULL) {
1615                         error = EINVAL;
1616                 } else {
1617                         u_int dlt;
1618
1619                         bcopy(addr, &dlt, sizeof (dlt));
1620                         error = bpf_setdlt(d, dlt);
1621                 }
1622                 break;
1623
1624         /*
1625          * Get interface name.
1626          */
1627         case BIOCGETIF:                 /* struct ifreq */
1628                 if (d->bd_bif == 0)
1629                         error = EINVAL;
1630                 else {
1631                         struct ifnet *const ifp = d->bd_bif->bif_ifp;
1632
1633                         snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1634                             sizeof (ifr.ifr_name), "%s", if_name(ifp));
1635                 }
1636                 break;
1637
1638         /*
1639          * Set interface.
1640          */
1641         case BIOCSETIF: {               /* struct ifreq */
1642                 ifnet_t ifp;
1643
1644                 bcopy(addr, &ifr, sizeof (ifr));
1645                 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1646                 ifp = ifunit(ifr.ifr_name);
1647                 if (ifp == NULL)
1648                         error = ENXIO;
1649                 else
1650                         error = bpf_setif(d, ifp, 0);
1651                 break;
1652         }
1653
1654         /*
1655          * Set read timeout.
1656          */
1657         case BIOCSRTIMEOUT32: {         /* struct user32_timeval */
1658                 struct user32_timeval _tv;
1659                 struct timeval tv;
1660
1661                 bcopy(addr, &_tv, sizeof (_tv));
1662                 tv.tv_sec  = _tv.tv_sec;
1663                 tv.tv_usec = _tv.tv_usec;
1664
1665                 /*
1666                  * Subtract 1 tick from tvtohz() since this isn't
1667                  * a one-shot timer.
1668                  */
1669                 if ((error = itimerfix(&tv)) == 0)
1670                         d->bd_rtout = tvtohz(&tv) - 1;
1671                 break;
1672         }
1673
1674         case BIOCSRTIMEOUT64: {         /* struct user64_timeval */
1675                 struct user64_timeval _tv;
1676                 struct timeval tv;
1677
1678                 bcopy(addr, &_tv, sizeof (_tv));
1679                 tv.tv_sec  = _tv.tv_sec;
1680                 tv.tv_usec = _tv.tv_usec;
1681
1682                 /*
1683                  * Subtract 1 tick from tvtohz() since this isn't
1684                  * a one-shot timer.
1685                  */
1686                 if ((error = itimerfix(&tv)) == 0)
1687                         d->bd_rtout = tvtohz(&tv) - 1;
1688                 break;
1689         }
1690
1691         /*
1692          * Get read timeout.
1693          */
1694         case BIOCGRTIMEOUT32: {         /* struct user32_timeval */
1695                 struct user32_timeval tv;
1696
1697                 bzero(&tv, sizeof (tv));
1698                 tv.tv_sec = d->bd_rtout / hz;
1699                 tv.tv_usec = (d->bd_rtout % hz) * tick;
1700                 bcopy(&tv, addr, sizeof (tv));
1701                 break;
1702         }
1703
1704         case BIOCGRTIMEOUT64: {         /* struct user64_timeval */
1705                 struct user64_timeval tv;
1706
1707                 bzero(&tv, sizeof (tv));
1708                 tv.tv_sec = d->bd_rtout / hz;
1709                 tv.tv_usec = (d->bd_rtout % hz) * tick;
1710                 bcopy(&tv, addr, sizeof (tv));
1711                 break;
1712         }
1713
1714         /*
1715          * Get packet stats.
1716          */
1717         case BIOCGSTATS: {              /* struct bpf_stat */
1718                 struct bpf_stat bs;
1719
1720                 bzero(&bs, sizeof (bs));
1721                 bs.bs_recv = d->bd_rcount;
1722                 bs.bs_drop = d->bd_dcount;
1723                 bcopy(&bs, addr, sizeof (bs));
1724                 break;
1725         }
1726
1727         /*
1728          * Set immediate mode.
1729          */
1730         case BIOCIMMEDIATE:             /* u_int */
1731                 d->bd_immediate = *(u_int *)(void *)addr;
1732                 break;
1733
1734         case BIOCVERSION: {             /* struct bpf_version */
1735                 struct bpf_version bv;
1736
1737                 bzero(&bv, sizeof (bv));
1738                 bv.bv_major = BPF_MAJOR_VERSION;
1739                 bv.bv_minor = BPF_MINOR_VERSION;
1740                 bcopy(&bv, addr, sizeof (bv));
1741                 break;
1742         }
1743
1744         /*
1745          * Get "header already complete" flag
1746          */
1747         case BIOCGHDRCMPLT:             /* u_int */
1748                 bcopy(&d->bd_hdrcmplt, addr, sizeof (u_int));
1749                 break;
1750
1751         /*
1752          * Set "header already complete" flag
1753          */
1754         case BIOCSHDRCMPLT:             /* u_int */
1755                 bcopy(addr, &int_arg, sizeof (int_arg));
1756                 d->bd_hdrcmplt = int_arg ? 1 : 0;
1757                 break;
1758
1759         /*
1760          * Get "see sent packets" flag
1761          */
1762         case BIOCGSEESENT:              /* u_int */
1763                 bcopy(&d->bd_seesent, addr, sizeof (u_int));
1764                 break;
1765
1766         /*
1767          * Set "see sent packets" flag
1768          */
1769         case BIOCSSEESENT:              /* u_int */
1770                 bcopy(addr, &d->bd_seesent, sizeof (u_int));
1771                 break;
1772
1773         /*
1774          * Set traffic service class
1775          */
1776         case BIOCSETTC: {               /* int */
1777                 int tc;
1778
1779                 bcopy(addr, &tc, sizeof (int));
1780                 error = bpf_set_traffic_class(d, tc);
1781                 break;
1782         }
1783
1784         /*
1785          * Get traffic service class
1786          */
1787         case BIOCGETTC:                 /* int */
1788                 bcopy(&d->bd_traffic_class, addr, sizeof (int));
1789                 break;
1790
1791         case FIONBIO:           /* Non-blocking I/O; int */
1792                 break;
1793
1794         case FIOASYNC:          /* Send signal on receive packets; int */
1795                 bcopy(addr, &d->bd_async, sizeof (int));
1796                 break;
1797 #ifndef __APPLE__
1798         case FIOSETOWN:
1799                 error = fsetown(*(int *)addr, &d->bd_sigio);
1800                 break;
1801
1802         case FIOGETOWN:
1803                 *(int *)addr = fgetown(d->bd_sigio);
1804                 break;
1805
1806         /* This is deprecated, FIOSETOWN should be used instead. */
1807         case TIOCSPGRP:
1808                 error = fsetown(-(*(int *)addr), &d->bd_sigio);
1809                 break;
1810
1811         /* This is deprecated, FIOGETOWN should be used instead. */
1812         case TIOCGPGRP:
1813                 *(int *)addr = -fgetown(d->bd_sigio);
1814                 break;
1815 #endif
1816         case BIOCSRSIG: {       /* Set receive signal; u_int */
1817                 u_int sig;
1818
1819                 bcopy(addr, &sig, sizeof (u_int));
1820
1821                 if (sig >= NSIG)
1822                         error = EINVAL;
1823                 else
1824                         d->bd_sig = sig;
1825                 break;
1826         }
1827         case BIOCGRSIG:                 /* u_int */
1828                 bcopy(&d->bd_sig, addr, sizeof (u_int));
1829                 break;
1830 #ifdef __APPLE__
1831         case BIOCSEXTHDR:               /* u_int */
1832                 bcopy(addr, &int_arg, sizeof (int_arg));
1833                 if (int_arg)
1834                         d->bd_flags |= BPF_EXTENDED_HDR;
1835                 else
1836                         d->bd_flags &= ~BPF_EXTENDED_HDR;
1837                 break;
1838
1839         case BIOCGIFATTACHCOUNT: {              /* struct ifreq */
1840                 ifnet_t ifp;
1841                 struct bpf_if *bp;
1842
1843                 bcopy(addr, &ifr, sizeof (ifr));
1844                 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1845                 ifp = ifunit(ifr.ifr_name);
1846                 if (ifp == NULL) {
1847                         error = ENXIO;
1848                         break;
1849                 }
1850                 ifr.ifr_intval = 0;
1851                 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1852                         struct bpf_d *bpf_d;
1853
1854                         if (bp->bif_ifp == NULL || bp->bif_ifp != ifp)
1855                                 continue;
1856                         for (bpf_d = bp->bif_dlist; bpf_d; bpf_d = bpf_d->bd_next) {
1857                                 ifr.ifr_intval += 1;
1858                         }
1859                 }
1860                 bcopy(&ifr, addr, sizeof (ifr));
1861                 break;
1862         }
1863         case BIOCGWANTPKTAP:                    /* u_int */
1864                 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
1865                 bcopy(&int_arg, addr, sizeof (int_arg));
1866                 break;
1867
1868         case BIOCSWANTPKTAP:                    /* u_int */
1869                 bcopy(addr, &int_arg, sizeof (int_arg));
1870                 if (int_arg)
1871                         d->bd_flags |= BPF_WANT_PKTAP;
1872                 else
1873                         d->bd_flags &= ~BPF_WANT_PKTAP;
1874                 break;
1875 #endif
1876
1877         case BIOCSHEADDROP:
1878                 bcopy(addr, &int_arg, sizeof (int_arg));
1879                 d->bd_headdrop = int_arg ? 1 : 0;
1880                 break;
1881
1882         case BIOCGHEADDROP:
1883                 bcopy(&d->bd_headdrop, addr, sizeof (int));
1884                 break;
1885         }
1886
1887         bpf_release_d(d);
1888         lck_mtx_unlock(bpf_mlock);
1889
1890         return (error);
1891 }
1892
1893 /*
1894  * Set d's packet filter program to fp.  If this file already has a filter,
1895  * free it and replace it.  Returns EINVAL for bogus requests.
1896  */
1897 static int
1898 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
1899     u_long cmd)
1900 {
1901         struct bpf_insn *fcode, *old;
1902         u_int flen, size;
1903
1904         while (d->bd_hbuf_read)
1905                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1906
1907         if ((d->bd_flags & BPF_CLOSING) != 0)
1908                 return (ENXIO);
1909
1910         old = d->bd_filter;
1911         if (bf_insns == USER_ADDR_NULL) {
1912                 if (bf_len != 0)
1913                         return (EINVAL);
1914                 d->bd_filter = NULL;
1915                 reset_d(d);
1916                 if (old != 0)
1917                         FREE((caddr_t)old, M_DEVBUF);
1918                 return (0);
1919         }
1920         flen = bf_len;
1921         if (flen > BPF_MAXINSNS)
1922                 return (EINVAL);
1923
1924         size = flen * sizeof(struct bpf_insn);
1925         fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
1926 #ifdef __APPLE__
1927         if (fcode == NULL)
1928                 return (ENOBUFS);
1929 #endif
1930         if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
1931             bpf_validate(fcode, (int)flen)) {
1932                 d->bd_filter = fcode;
1933
1934                 if (cmd == BIOCSETF32 || cmd == BIOCSETF64)
1935                         reset_d(d);
1936
1937                 if (old != 0)
1938                         FREE((caddr_t)old, M_DEVBUF);
1939
1940                 return (0);
1941         }
1942         FREE((caddr_t)fcode, M_DEVBUF);
1943         return (EINVAL);
1944 }
1945
1946 /*
1947  * Detach a file from its current interface (if attached at all) and attach
1948  * to the interface indicated by the name stored in ifr.
1949  * Return an errno or 0.
1950  */
1951 static int
1952 bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt)
1953 {
1954         struct bpf_if *bp;
1955         int error;
1956
1957         while (d->bd_hbuf_read)
1958                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1959
1960         if ((d->bd_flags & BPF_CLOSING) != 0)
1961                 return (ENXIO);
1962
1963         /*
1964          * Look through attached interfaces for the named one.
1965          */
1966         for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1967                 struct ifnet *ifp = bp->bif_ifp;
1968
1969                 if (ifp == 0 || ifp != theywant || (dlt != 0 && dlt != bp->bif_dlt))
1970                         continue;
1971                 /*
1972                  * If the process knows how to deal with DLT_PKTAP, use it
1973                  * by default
1974                  */
1975                 if (dlt == 0 && bp->bif_dlt == DLT_PKTAP &&
1976                         !(d->bd_flags & BPF_WANT_PKTAP))
1977                         continue;
1978                 /*
1979                  * We found the requested interface.
1980                  * Allocate the packet buffers.
1981                  */
1982                 error = bpf_allocbufs(d);
1983                 if (error != 0)
1984                         return (error);
1985                 /*
1986                  * Detach if attached to something else.
1987                  */
1988                 if (bp != d->bd_bif) {
1989                         if (d->bd_bif != NULL) {
1990                                 if (bpf_detachd(d, 0) != 0)
1991                                         return (ENXIO);
1992                         }
1993                         if (bpf_attachd(d, bp) != 0)
1994                                 return (ENXIO);
1995                 }
1996                 reset_d(d);
1997                 return (0);
1998         }
1999         /* Not found. */
2000         return (ENXIO);
2001 }
2002
2003
2004
2005 /*
2006  * Get a list of available data link type of the interface.
2007  */
2008 static int
2009 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2010 {
2011         u_int           n;
2012         int             error;
2013         struct ifnet    *ifp;
2014         struct bpf_if   *bp;
2015         user_addr_t     dlist;
2016         struct bpf_dltlist bfl;
2017
2018         bcopy(addr, &bfl, sizeof (bfl));
2019         if (proc_is64bit(p)) {
2020                 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2021         } else {
2022                 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2023         }
2024
2025         ifp = d->bd_bif->bif_ifp;
2026         n = 0;
2027         error = 0;
2028
2029         for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2030                 if (bp->bif_ifp != ifp)
2031                         continue;
2032                 /*
2033                  * Return DLT_PKTAP only to processes that know how to handle it
2034                  */
2035                 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
2036                         continue;
2037                 if (dlist != USER_ADDR_NULL) {
2038                         if (n >= bfl.bfl_len) {
2039                                 return (ENOMEM);
2040                         }
2041                         error = copyout(&bp->bif_dlt, dlist,
2042                             sizeof (bp->bif_dlt));
2043                         if (error != 0)
2044                                 break;
2045                         dlist += sizeof (bp->bif_dlt);
2046                 }
2047                 n++;
2048         }
2049         bfl.bfl_len = n;
2050         bcopy(&bfl, addr, sizeof (bfl));
2051
2052         return (error);
2053 }
2054
2055 /*
2056  * Set the data link type of a BPF instance.
2057  */
2058 static int
2059 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2060 {
2061         int error, opromisc;
2062         struct ifnet *ifp;
2063         struct bpf_if *bp;
2064
2065         if (d->bd_bif->bif_dlt == dlt)
2066                 return (0);
2067
2068         while (d->bd_hbuf_read)
2069                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2070
2071         if ((d->bd_flags & BPF_CLOSING) != 0)
2072                 return (ENXIO);
2073
2074         ifp = d->bd_bif->bif_ifp;
2075         for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2076                 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2077                         break;
2078         }
2079         if (bp != NULL) {
2080                 opromisc = d->bd_promisc;
2081                 if (bpf_detachd(d, 0) != 0)
2082                         return (ENXIO);
2083                 error = bpf_attachd(d, bp);
2084                 if (error) {
2085                         printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
2086                                 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error);
2087                         return error;
2088                 }
2089                 reset_d(d);
2090                 if (opromisc) {
2091                         lck_mtx_unlock(bpf_mlock);
2092                         error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2093                         lck_mtx_lock(bpf_mlock);
2094                         if (error) {
2095                                 printf("%s: ifpromisc %s%d failed (%d)\n",
2096                                     __func__, ifnet_name(bp->bif_ifp),
2097                                     ifnet_unit(bp->bif_ifp), error);
2098                         } else {
2099                                 d->bd_promisc = 1;
2100                         }
2101                 }
2102         }
2103         return (bp == NULL ? EINVAL : 0);
2104 }
2105
2106 static int
2107 bpf_set_traffic_class(struct bpf_d *d, int tc)
2108 {
2109         int error = 0;
2110
2111         if (!SO_VALID_TC(tc))
2112                 error = EINVAL;
2113         else
2114                 d->bd_traffic_class = tc;
2115
2116         return (error);
2117 }
2118
2119 static void
2120 bpf_set_packet_service_class(struct mbuf *m, int tc)
2121 {
2122         if (!(m->m_flags & M_PKTHDR))
2123                 return;
2124
2125         VERIFY(SO_VALID_TC(tc));
2126         (void) m_set_service_class(m, so_tc2msc(tc));
2127 }
2128
2129 /*
2130  * Support for select()
2131  *
2132  * Return true iff the specific operation will not block indefinitely.
2133  * Otherwise, return false but make a note that a selwakeup() must be done.
2134  */
2135 int
2136 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2137 {
2138         struct bpf_d *d;
2139         int ret = 0;
2140
2141         lck_mtx_lock(bpf_mlock);
2142
2143         d = bpf_dtab[minor(dev)];
2144         if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
2145                 lck_mtx_unlock(bpf_mlock);
2146                 return (ENXIO);
2147         }
2148
2149         bpf_acquire_d(d);
2150
2151         if (d->bd_bif == NULL) {
2152                 bpf_release_d(d);
2153                 lck_mtx_unlock(bpf_mlock);
2154                 return (ENXIO);
2155         }
2156
2157         while (d->bd_hbuf_read)
2158                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2159
2160         if ((d->bd_flags & BPF_CLOSING) != 0) {
2161                 bpf_release_d(d);
2162                 lck_mtx_unlock(bpf_mlock);
2163                 return (ENXIO);
2164         }
2165
2166         switch (which) {
2167                 case FREAD:
2168                         if (d->bd_hlen != 0 ||
2169                                         ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
2170                                          d->bd_slen != 0))
2171                                 ret = 1; /* read has data to return */
2172                         else {
2173                                 /*
2174                                  * Read has no data to return.
2175                                  * Make the select wait, and start a timer if
2176                                  * necessary.
2177                                  */
2178                                 selrecord(p, &d->bd_sel, wql);
2179                                 bpf_start_timer(d);
2180                         }
2181                         break;
2182
2183                 case FWRITE:
2184                         ret = 1; /* can't determine whether a write would block */
2185                         break;
2186         }
2187
2188         bpf_release_d(d);
2189         lck_mtx_unlock(bpf_mlock);
2190
2191         return (ret);
2192 }
2193
2194
2195 /*
2196  * Support for kevent() system call.  Register EVFILT_READ filters and
2197  * reject all others.
2198  */
2199 int bpfkqfilter(dev_t dev, struct knote *kn);
2200 static void filt_bpfdetach(struct knote *);
2201 static int filt_bpfread(struct knote *, long);
2202 static int filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev);
2203 static int filt_bpfprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev);
2204
2205 struct filterops bpfread_filtops = {
2206         .f_isfd = 1,
2207         .f_detach = filt_bpfdetach,
2208         .f_event = filt_bpfread,
2209         .f_touch = filt_bpftouch,
2210         .f_process = filt_bpfprocess,
2211 };
2212
2213 static int
2214 filt_bpfread_common(struct knote *kn, struct bpf_d *d)
2215 {
2216         int ready = 0;
2217
2218         if (d->bd_immediate) {
2219                 /*
2220                  * If there's data in the hold buffer, it's the
2221                  * amount of data a read will return.
2222                  *
2223                  * If there's no data in the hold buffer, but
2224                  * there's data in the store buffer, a read will
2225                  * immediately rotate the store buffer to the
2226                  * hold buffer, the amount of data in the store
2227                  * buffer is the amount of data a read will
2228                  * return.
2229                  *
2230                  * If there's no data in either buffer, we're not
2231                  * ready to read.
2232                  */
2233                 kn->kn_data = ((d->bd_hlen == 0  || d->bd_hbuf_read)
2234                     ? d->bd_slen : d->bd_hlen);
2235                 int64_t lowwat = 1;
2236                 if (kn->kn_sfflags & NOTE_LOWAT)
2237                 {
2238                         if (kn->kn_sdata > d->bd_bufsize)
2239                                 lowwat = d->bd_bufsize;
2240                         else if (kn->kn_sdata > lowwat)
2241                                 lowwat = kn->kn_sdata;
2242                 }
2243                 ready = (kn->kn_data >= lowwat);
2244         } else {
2245                 /*
2246                  * If there's data in the hold buffer, it's the
2247                  * amount of data a read will return.
2248                  *
2249                  * If there's no data in the hold buffer, but
2250                  * there's data in the store buffer, if the
2251                  * timer has expired a read will immediately
2252                  * rotate the store buffer to the hold buffer,
2253                  * so the amount of data in the store buffer is
2254                  * the amount of data a read will return.
2255                  *
2256                  * If there's no data in either buffer, or there's
2257                  * no data in the hold buffer and the timer hasn't
2258                  * expired, we're not ready to read.
2259                  */
2260                 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read) && d->bd_state == BPF_TIMED_OUT ?
2261                                 d->bd_slen : d->bd_hlen);
2262                 ready = (kn->kn_data > 0);
2263         }
2264         if (!ready)
2265                 bpf_start_timer(d);
2266
2267         return (ready);
2268 }
2269
2270 int
2271 bpfkqfilter(dev_t dev, struct knote *kn)
2272 {
2273         struct bpf_d *d;
2274         int res;
2275
2276         /*
2277          * Is this device a bpf?
2278          */
2279         if (major(dev) != CDEV_MAJOR ||
2280             kn->kn_filter != EVFILT_READ) {
2281                 kn->kn_flags = EV_ERROR;
2282                 kn->kn_data = EINVAL;
2283                 return 0;
2284         }
2285
2286         lck_mtx_lock(bpf_mlock);
2287
2288         d = bpf_dtab[minor(dev)];
2289
2290         if (d == 0 ||
2291             d == (void *)1 ||
2292             d->bd_bif == NULL ||
2293             (d->bd_flags & BPF_CLOSING) != 0) {
2294                 lck_mtx_unlock(bpf_mlock);
2295                 kn->kn_flags = EV_ERROR;
2296                 kn->kn_data = ENXIO;
2297                 return 0;
2298         }
2299
2300         kn->kn_hook = d;
2301         kn->kn_filtid = EVFILTID_BPFREAD;
2302         KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2303         d->bd_flags |= BPF_KNOTE;
2304
2305         /* capture the current state */
2306         res = filt_bpfread_common(kn, d);
2307
2308         lck_mtx_unlock(bpf_mlock);
2309
2310         return (res);
2311 }
2312
2313 static void
2314 filt_bpfdetach(struct knote *kn)
2315 {
2316         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2317
2318         lck_mtx_lock(bpf_mlock);
2319         if (d->bd_flags & BPF_KNOTE) {
2320                 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2321                 d->bd_flags &= ~BPF_KNOTE;
2322         }
2323         lck_mtx_unlock(bpf_mlock);
2324 }
2325
2326 static int
2327 filt_bpfread(struct knote *kn, long hint)
2328 {
2329 #pragma unused(hint)
2330         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2331
2332         return filt_bpfread_common(kn, d);
2333 }
2334
2335 static int
2336 filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev)
2337 {
2338         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2339         int res;
2340
2341         lck_mtx_lock(bpf_mlock);
2342
2343         /* save off the lowat threshold and flag */
2344         kn->kn_sdata = kev->data;
2345         kn->kn_sfflags = kev->fflags;
2346         if ((kn->kn_status & KN_UDATA_SPECIFIC) == 0)
2347                 kn->kn_udata = kev->udata;
2348
2349         /* output data will be re-generated here */
2350         res = filt_bpfread_common(kn, d);
2351
2352         lck_mtx_unlock(bpf_mlock);
2353
2354         return res;
2355 }
2356
2357 static int
2358 filt_bpfprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev)
2359 {
2360 #pragma unused(data)
2361         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2362         int res;
2363
2364         lck_mtx_lock(bpf_mlock);
2365         res = filt_bpfread_common(kn, d);
2366         if (res) {
2367                 *kev = kn->kn_kevent;
2368         }
2369         lck_mtx_unlock(bpf_mlock);
2370
2371         return res;
2372 }
2373
2374 /*
2375  * Copy data from an mbuf chain into a buffer.  This code is derived
2376  * from m_copydata in sys/uipc_mbuf.c.
2377  */
2378 static void
2379 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
2380 {
2381         struct mbuf *m = (struct mbuf *)(uintptr_t)(src_arg);
2382         u_int count;
2383         u_char *dst;
2384
2385         dst = dst_arg;
2386         while (len > 0) {
2387                 if (m == 0)
2388                         panic("bpf_mcopy");
2389                 count = min(m->m_len, len);
2390                 bcopy(mbuf_data(m), dst, count);
2391                 m = m->m_next;
2392                 dst += count;
2393                 len -= count;
2394         }
2395 }
2396
2397 static inline void
2398 bpf_tap_imp(
2399         ifnet_t         ifp,
2400         u_int32_t       dlt,
2401         mbuf_t          m,
2402         void*           hdr,
2403         size_t          hlen,
2404         int             outbound)
2405 {
2406         struct bpf_if *bp;
2407         struct mbuf *savedm = m;
2408
2409         /*
2410          * It's possible that we get here after the bpf descriptor has been
2411          * detached from the interface; in such a case we simply return.
2412          * Lock ordering is important since we can be called asynchronously
2413          * (from the IOKit) to process an inbound packet; when that happens
2414          * we would have been holding its "gateLock" and will be acquiring
2415          * "bpf_mlock" upon entering this routine.  Due to that, we release
2416          * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2417          * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2418          * when a ifnet_set_promiscuous request simultaneously collides with
2419          * an inbound packet being passed into the tap callback.
2420          */
2421         lck_mtx_lock(bpf_mlock);
2422         if (ifp->if_bpf == NULL) {
2423                 lck_mtx_unlock(bpf_mlock);
2424                 return;
2425         }
2426         bp = ifp->if_bpf;
2427         for (bp = ifp->if_bpf; bp && bp->bif_ifp == ifp &&
2428                  (dlt != 0 && bp->bif_dlt != dlt); bp = bp->bif_next)
2429                 ;
2430         if (bp && bp->bif_ifp == ifp && bp->bif_dlist != NULL) {
2431                 struct bpf_d    *d;
2432                 struct m_hdr    hack_hdr;
2433                 u_int   pktlen = 0;
2434                 u_int   slen = 0;
2435                 struct mbuf *m0;
2436
2437                 if (hdr) {
2438                         /*
2439                          * This is gross. We mock up an mbuf that points to the
2440                          * header buffer. This means we don't have to copy the
2441                          * header. A number of interfaces prepended headers just
2442                          * for bpf by allocating an mbuf on the stack. We want to
2443                          * give developers an easy way to prepend a header for bpf.
2444                          * Since a developer allocating an mbuf on the stack is bad,
2445                          * we do even worse here, allocating only a header to point
2446                          * to a buffer the developer supplied. This makes assumptions
2447                          * that bpf_filter and catchpacket will not look at anything
2448                          * in the mbuf other than the header. This was true at the
2449                          * time this code was written.
2450                          */
2451                         hack_hdr.mh_next = m;
2452                         hack_hdr.mh_nextpkt = NULL;
2453                         hack_hdr.mh_len = hlen;
2454                         hack_hdr.mh_data = hdr;
2455                         hack_hdr.mh_type = m->m_type;
2456                         hack_hdr.mh_flags = 0;
2457
2458                         __IGNORE_WCASTALIGN(m = (mbuf_t)&hack_hdr);
2459                 }
2460
2461                 for (m0 = m; m0 != 0; m0 = m0->m_next)
2462                         pktlen += m0->m_len;
2463
2464                 for (d = bp->bif_dlist; d; d = d->bd_next) {
2465                         if (outbound && !d->bd_seesent)
2466                                 continue;
2467                         ++d->bd_rcount;
2468                         slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
2469                         if (slen != 0) {
2470 #if CONFIG_MACF_NET
2471                                 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0)
2472                                         continue;
2473 #endif
2474                                 catchpacket(d, (u_char *)m, savedm, pktlen,
2475                                     slen, outbound, bpf_mcopy);
2476                         }
2477                 }
2478         }
2479         lck_mtx_unlock(bpf_mlock);
2480 }
2481
2482 void
2483 bpf_tap_out(
2484         ifnet_t         ifp,
2485         u_int32_t       dlt,
2486         mbuf_t          m,
2487         void*           hdr,
2488         size_t          hlen)
2489 {
2490         bpf_tap_imp(ifp, dlt, m, hdr, hlen, 1);
2491 }
2492
2493 void
2494 bpf_tap_in(
2495         ifnet_t         ifp,
2496         u_int32_t       dlt,
2497         mbuf_t          m,
2498         void*           hdr,
2499         size_t          hlen)
2500 {
2501         bpf_tap_imp(ifp, dlt, m, hdr, hlen, 0);
2502 }
2503
2504 /* Callback registered with Ethernet driver. */
2505 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2506 {
2507         bpf_tap_imp(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2508
2509         return 0;
2510 }
2511
2512 /*
2513  * Move the packet data from interface memory (pkt) into the
2514  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
2515  * otherwise 0.  "copy" is the routine called to do the actual data
2516  * transfer.  bcopy is passed in to copy contiguous chunks, while
2517  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
2518  * pkt is really an mbuf.
2519  */
2520 static void
2521 catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen,
2522         u_int snaplen, int outbound,
2523         void (*cpfn)(const void *, void *, size_t))
2524 {
2525         struct bpf_hdr *hp;
2526         struct bpf_hdr_ext *ehp;
2527         int totlen, curlen;
2528         int hdrlen, caplen;
2529         int do_wakeup = 0;
2530         u_char *payload;
2531         struct timeval tv;
2532         struct m_tag *mt = NULL;
2533         struct bpf_mtag *bt = NULL;
2534
2535         hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
2536             d->bd_bif->bif_hdrlen;
2537         /*
2538          * Figure out how many bytes to move.  If the packet is
2539          * greater or equal to the snapshot length, transfer that
2540          * much.  Otherwise, transfer the whole packet (unless
2541          * we hit the buffer size limit).
2542          */
2543         totlen = hdrlen + min(snaplen, pktlen);
2544         if (totlen > d->bd_bufsize)
2545                 totlen = d->bd_bufsize;
2546
2547         /*
2548          * Round up the end of the previous packet to the next longword.
2549          */
2550         curlen = BPF_WORDALIGN(d->bd_slen);
2551         if (curlen + totlen > d->bd_bufsize) {
2552                 /*
2553                  * This packet will overflow the storage buffer.
2554                  * Rotate the buffers if we can, then wakeup any
2555                  * pending reads.
2556                  *
2557                  * We cannot rotate buffers if a read is in progress
2558                  * so drop the packet
2559                  */
2560                 if (d->bd_hbuf_read) {
2561                         ++d->bd_dcount;
2562                         return;
2563                 }
2564
2565                 if (d->bd_fbuf == NULL) {
2566                         if (d->bd_headdrop == 0) {
2567                                 /*
2568                                  * We haven't completed the previous read yet,
2569                                  * so drop the packet.
2570                                  */
2571                                 ++d->bd_dcount;
2572                                 return;
2573                         }
2574                         /*
2575                          * Drop the hold buffer as it contains older packets
2576                          */
2577                         d->bd_dcount += d->bd_hcnt;
2578                         d->bd_fbuf = d->bd_hbuf;
2579                         ROTATE_BUFFERS(d);
2580                 } else {
2581                         ROTATE_BUFFERS(d);
2582                 }
2583                 do_wakeup = 1;
2584                 curlen = 0;
2585         }
2586         else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
2587                 /*
2588                  * Immediate mode is set, or the read timeout has
2589                  * already expired during a select call. A packet
2590                  * arrived, so the reader should be woken up.
2591                  */
2592                 do_wakeup = 1;
2593
2594         /*
2595          * Append the bpf header.
2596          */
2597         microtime(&tv);
2598         if (d->bd_flags & BPF_EXTENDED_HDR) {
2599                 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
2600                 memset(ehp, 0, sizeof(*ehp));
2601                 ehp->bh_tstamp.tv_sec = tv.tv_sec;
2602                 ehp->bh_tstamp.tv_usec = tv.tv_usec;
2603                 ehp->bh_datalen = pktlen;
2604                 ehp->bh_hdrlen = hdrlen;
2605                 ehp->bh_caplen = totlen - hdrlen;
2606                 mt = m_tag_locate(m, bpf_mtag_id, 0, NULL);
2607                 if (mt && mt->m_tag_len >= sizeof(*bt)) {
2608                         bt = (struct bpf_mtag *)(mt + 1);
2609                         ehp->bh_pid = bt->bt_pid;
2610                         strlcpy(ehp->bh_comm, bt->bt_comm,
2611                             sizeof(ehp->bh_comm));
2612                         ehp->bh_svc = so_svc2tc(bt->bt_svc);
2613                         if (bt->bt_direction == BPF_MTAG_DIR_OUT)
2614                                 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2615                         else
2616                                 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2617                         m_tag_delete(m, mt);
2618                 } else if (outbound) {
2619                         /* only do lookups on non-raw INPCB */
2620                         if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID|
2621                             PKTF_FLOW_LOCALSRC|PKTF_FLOW_RAWSOCK)) ==
2622                             (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) &&
2623                             m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
2624                                 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
2625                                 ehp->bh_proto = m->m_pkthdr.pkt_proto;
2626                         }
2627                         ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
2628                         ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2629                         if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT)
2630                                 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
2631                         if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ)
2632                                 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
2633                         if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT)
2634                                 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
2635                         if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
2636                                 ehp->bh_unsent_bytes =
2637                                     m->m_pkthdr.bufstatus_if;
2638                                 ehp->bh_unsent_snd =
2639                                     m->m_pkthdr.bufstatus_sndbuf;
2640                         }
2641                 } else
2642                         ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2643                 payload = (u_char *)ehp + hdrlen;
2644                 caplen = ehp->bh_caplen;
2645         } else {
2646                 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
2647                 hp->bh_tstamp.tv_sec = tv.tv_sec;
2648                 hp->bh_tstamp.tv_usec = tv.tv_usec;
2649                 hp->bh_datalen = pktlen;
2650                 hp->bh_hdrlen = hdrlen;
2651                 hp->bh_caplen = totlen - hdrlen;
2652                 payload = (u_char *)hp + hdrlen;
2653                 caplen = hp->bh_caplen;
2654         }
2655         /*
2656          * Copy the packet data into the store buffer and update its length.
2657          */
2658         (*cpfn)(pkt, payload, caplen);
2659         d->bd_slen = curlen + totlen;
2660         d->bd_scnt += 1;
2661
2662         if (do_wakeup)
2663                 bpf_wakeup(d);
2664 }
2665
2666 /*
2667  * Initialize all nonzero fields of a descriptor.
2668  */
2669 static int
2670 bpf_allocbufs(struct bpf_d *d)
2671 {
2672         if (d->bd_sbuf != NULL) {
2673                 FREE(d->bd_sbuf, M_DEVBUF);
2674                 d->bd_sbuf = NULL;
2675         }
2676         if (d->bd_hbuf != NULL) {
2677                 FREE(d->bd_hbuf, M_DEVBUF);
2678                 d->bd_hbuf = NULL;
2679         }
2680         if (d->bd_fbuf != NULL) {
2681                 FREE(d->bd_fbuf, M_DEVBUF);
2682                 d->bd_fbuf = NULL;
2683         }
2684
2685         d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2686         if (d->bd_fbuf == NULL)
2687                 return (ENOBUFS);
2688
2689         d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2690         if (d->bd_sbuf == NULL) {
2691                 FREE(d->bd_fbuf, M_DEVBUF);
2692                 d->bd_fbuf = NULL;
2693                 return (ENOBUFS);
2694         }
2695         d->bd_slen = 0;
2696         d->bd_hlen = 0;
2697         d->bd_scnt = 0;
2698         d->bd_hcnt = 0;
2699         return (0);
2700 }
2701
2702 /*
2703  * Free buffers currently in use by a descriptor.
2704  * Called on close.
2705  */
2706 static void
2707 bpf_freed(struct bpf_d *d)
2708 {
2709         /*
2710          * We don't need to lock out interrupts since this descriptor has
2711          * been detached from its interface and it yet hasn't been marked
2712          * free.
2713          */
2714         if (d->bd_hbuf_read)
2715                 panic("bpf buffer freed during read");
2716
2717         if (d->bd_sbuf != 0) {
2718                 FREE(d->bd_sbuf, M_DEVBUF);
2719                 if (d->bd_hbuf != 0)
2720                         FREE(d->bd_hbuf, M_DEVBUF);
2721                 if (d->bd_fbuf != 0)
2722                         FREE(d->bd_fbuf, M_DEVBUF);
2723         }
2724         if (d->bd_filter)
2725                 FREE((caddr_t)d->bd_filter, M_DEVBUF);
2726 }
2727
2728 /*
2729  * Attach an interface to bpf.  driverp is a pointer to a (struct bpf_if *)
2730  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
2731  * size of the link header (variable length headers not yet supported).
2732  */
2733 void
2734 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2735 {
2736         bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
2737 }
2738
2739 errno_t
2740 bpf_attach(
2741         ifnet_t                 ifp,
2742         u_int32_t               dlt,
2743         u_int32_t               hdrlen,
2744         bpf_send_func   send,
2745         bpf_tap_func    tap)
2746 {
2747         struct bpf_if *bp_new;
2748         struct bpf_if *bp_temp;
2749         struct bpf_if *bp_first = NULL;
2750
2751         bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF,
2752             M_WAIT | M_ZERO);
2753         if (bp_new == 0)
2754                 panic("bpfattach");
2755
2756         lck_mtx_lock(bpf_mlock);
2757
2758         /*
2759          * Check if this interface/dlt is already attached, record first
2760          * attachment for this interface.
2761          */
2762         for (bp_temp = bpf_iflist; bp_temp && (bp_temp->bif_ifp != ifp ||
2763                  bp_temp->bif_dlt != dlt); bp_temp = bp_temp->bif_next) {
2764                  if (bp_temp->bif_ifp == ifp && bp_first == NULL)
2765                         bp_first = bp_temp;
2766         }
2767
2768         if (bp_temp != NULL) {
2769                 printf("bpfattach - %s with dlt %d is already attached\n",
2770                         if_name(ifp), dlt);
2771                 FREE(bp_new, M_DEVBUF);
2772                 lck_mtx_unlock(bpf_mlock);
2773                 return EEXIST;
2774         }
2775
2776         bp_new->bif_ifp = ifp;
2777         bp_new->bif_dlt = dlt;
2778         bp_new->bif_send = send;
2779         bp_new->bif_tap = tap;
2780
2781         if (bp_first == NULL) {
2782                 /* No other entries for this ifp */
2783                 bp_new->bif_next = bpf_iflist;
2784                 bpf_iflist = bp_new;
2785         }
2786         else {
2787                 /* Add this after the first entry for this interface */
2788                 bp_new->bif_next = bp_first->bif_next;
2789                 bp_first->bif_next = bp_new;
2790         }
2791
2792         /*
2793          * Compute the length of the bpf header.  This is not necessarily
2794          * equal to SIZEOF_BPF_HDR because we want to insert spacing such
2795          * that the network layer header begins on a longword boundary (for
2796          * performance reasons and to alleviate alignment restrictions).
2797          */
2798         bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
2799         bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
2800             sizeof(struct bpf_hdr_ext)) - hdrlen;
2801
2802         /* Take a reference on the interface */
2803         ifnet_reference(ifp);
2804
2805         lck_mtx_unlock(bpf_mlock);
2806
2807 #ifndef __APPLE__
2808         if (bootverbose)
2809                 printf("bpf: %s attached\n", if_name(ifp));
2810 #endif
2811
2812         return 0;
2813 }
2814
2815 /*
2816  * Detach bpf from an interface.  This involves detaching each descriptor
2817  * associated with the interface, and leaving bd_bif NULL.  Notify each
2818  * descriptor as it's detached so that any sleepers wake up and get
2819  * ENXIO.
2820  */
2821 void
2822 bpfdetach(struct ifnet *ifp)
2823 {
2824         struct bpf_if   *bp, *bp_prev, *bp_next;
2825         struct bpf_d    *d;
2826
2827         if (bpf_debug != 0)
2828                 printf("%s: %s\n",
2829                     __func__, if_name(ifp));
2830
2831         lck_mtx_lock(bpf_mlock);
2832
2833         /*
2834          * Build the list of devices attached to that interface
2835          * that we need to free while keeping the lock to maintain
2836          * the integrity of the interface list
2837          */
2838         bp_prev = NULL;
2839         for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
2840                 bp_next = bp->bif_next;
2841
2842                 if (ifp != bp->bif_ifp) {
2843                         bp_prev = bp;
2844                         continue;
2845                 }
2846                 /* Unlink from the interface list */
2847                 if (bp_prev)
2848                         bp_prev->bif_next = bp->bif_next;
2849                 else
2850                         bpf_iflist = bp->bif_next;
2851
2852                 /* Detach the devices attached to the interface */
2853                 while ((d = bp->bif_dlist) != NULL) {
2854                         /*
2855                          * Take an extra reference to prevent the device
2856                          * from being freed when bpf_detachd() releases
2857                          * the reference for the interface list
2858                          */
2859                         bpf_acquire_d(d);
2860                         bpf_detachd(d, 0);
2861                         bpf_wakeup(d);
2862                         bpf_release_d(d);
2863                 }
2864                 ifnet_release(ifp);
2865         }
2866
2867         lck_mtx_unlock(bpf_mlock);
2868 }
2869
2870 void
2871 bpf_init(__unused void *unused)
2872 {
2873 #ifdef __APPLE__
2874         int     i;
2875         int     maj;
2876
2877         if (bpf_devsw_installed == 0) {
2878                 bpf_devsw_installed = 1;
2879                 bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
2880                 bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
2881                 bpf_mlock_attr = lck_attr_alloc_init();
2882                 lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
2883                 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
2884                 if (maj == -1) {
2885                         if (bpf_mlock_attr)
2886                                 lck_attr_free(bpf_mlock_attr);
2887                         if (bpf_mlock_grp)
2888                                 lck_grp_free(bpf_mlock_grp);
2889                         if (bpf_mlock_grp_attr)
2890                                 lck_grp_attr_free(bpf_mlock_grp_attr);
2891
2892                         bpf_mlock = NULL;
2893                         bpf_mlock_attr = NULL;
2894                         bpf_mlock_grp = NULL;
2895                         bpf_mlock_grp_attr = NULL;
2896                         bpf_devsw_installed = 0;
2897                         printf("bpf_init: failed to allocate a major number!\n");
2898                         return;
2899                 }
2900
2901                 for (i = 0 ; i < NBPFILTER; i++)
2902                         bpf_make_dev_t(maj);
2903
2904                 VERIFY(mbuf_tag_id_find(BPF_CONTROL_NAME, &bpf_mtag_id) == 0);
2905         }
2906 #else
2907         cdevsw_add(&bpf_cdevsw);
2908 #endif
2909 }
2910
2911 #ifndef __APPLE__
2912 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
2913 #endif
2914
2915 #if CONFIG_MACF_NET
2916 struct label *
2917 mac_bpfdesc_label_get(struct bpf_d *d)
2918 {
2919
2920         return (d->bd_label);
2921 }
2922
2923 void
2924 mac_bpfdesc_label_set(struct bpf_d *d, struct label *label)
2925 {
2926
2927         d->bd_label = label;
2928 }
2929 #endif