bsd/net/bpf.c

   1 /*
   2  * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1990, 1991, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  *
  32  * This code is derived from the Stanford/CMU enet packet filter,
  33  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  34  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  35  * Berkeley Laboratory.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)bpf.c       8.2 (Berkeley) 3/28/94
  66  *
  67  * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
  68  */
  69 /*
  70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  71  * support for mandatory and extensible security protections.  This notice
  72  * is included in support of clause 2.2 (b) of the Apple Public License,
  73  * Version 2.0.
  74  */
  75
  76 #include "bpf.h"
  77
  78 #ifndef __GNUC__
  79 #define inline
  80 #else
  81 #define inline __inline
  82 #endif
  83
  84 #include <sys/param.h>
  85 #include <sys/systm.h>
  86 #include <sys/conf.h>
  87 #include <sys/malloc.h>
  88 #include <sys/mbuf.h>
  89 #include <sys/time.h>
  90 #include <sys/proc.h>
  91 #include <sys/signalvar.h>
  92 #include <sys/filio.h>
  93 #include <sys/sockio.h>
  94 #include <sys/ttycom.h>
  95 #include <sys/filedesc.h>
  96 #include <sys/uio_internal.h>
  97 #include <sys/file_internal.h>
  98 #include <sys/event.h>
  99
 100 #include <sys/poll.h>
 101
 102 #include <sys/socket.h>
 103 #include <sys/socketvar.h>
 104 #include <sys/vnode.h>
 105
 106 #include <net/if.h>
 107 #include <net/bpf.h>
 108 #include <net/bpfdesc.h>
 109
 110 #include <netinet/in.h>
 111 #include <netinet/in_pcb.h>
 112 #include <netinet/in_var.h>
 113 #include <netinet/ip_var.h>
 114 #include <netinet/tcp.h>
 115 #include <netinet/tcp_var.h>
 116 #include <netinet/udp.h>
 117 #include <netinet/udp_var.h>
 118 #include <netinet/if_ether.h>
 119 #include <sys/kernel.h>
 120 #include <sys/sysctl.h>
 121 #include <net/firewire.h>
 122
 123 #include <miscfs/devfs/devfs.h>
 124 #include <net/dlil.h>
 125 #include <net/pktap.h>
 126
 127 #include <kern/locks.h>
 128 #include <kern/thread_call.h>
 129 #include <libkern/section_keywords.h>
 130
 131 #if CONFIG_MACF_NET
 132 #include <security/mac_framework.h>
 133 #endif /* MAC_NET */
 134
 135 extern int tvtohz(struct timeval *);
 136
 137 #define BPF_BUFSIZE 4096
 138 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
 139
 140
 141 #define PRINET  26                      /* interruptible */
 142
 143 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
 144
 145 /*
 146  * The default read buffer size is patchable.
 147  */
 148 static unsigned int bpf_bufsize = BPF_BUFSIZE;
 149 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
 150         &bpf_bufsize, 0, "");
 151 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
 152 SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
 153         &bpf_maxbufsize, 0, "");
 154 static unsigned int bpf_maxdevices = 256;
 155 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
 156         &bpf_maxdevices, 0, "");
 157 /*
 158  * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
 159  * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
 160  * explicitly to be able to use DLT_PKTAP.
 161  */
 162 #if CONFIG_EMBEDDED
 163 static unsigned int bpf_wantpktap = 1;
 164 #else
 165 static unsigned int bpf_wantpktap = 0;
 166 #endif
 167 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
 168         &bpf_wantpktap, 0, "");
 169
 170 static int bpf_debug = 0;
 171 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
 172         &bpf_debug, 0, "");
 173
 174 /*
 175  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
 176  *  bpf_dtab holds pointer to the descriptors, indexed by minor device #
 177  */
 178 static struct bpf_if    *bpf_iflist;
 179 #ifdef __APPLE__
 180 /*
 181  * BSD now stores the bpf_d in the dev_t which is a struct
 182  * on their system. Our dev_t is an int, so we still store
 183  * the bpf_d in a separate table indexed by minor device #.
 184  *
 185  * The value stored in bpf_dtab[n] represent three states:
 186  *  0: device not opened
 187  *  1: device opening or closing
 188  *  other: device <n> opened with pointer to storage
 189  */
 190 static struct bpf_d     **bpf_dtab = NULL;
 191 static unsigned int bpf_dtab_size = 0;
 192 static unsigned int     nbpfilter = 0;
 193
 194 decl_lck_mtx_data(static, bpf_mlock_data);
 195 static lck_mtx_t                *bpf_mlock = &bpf_mlock_data;
 196 static lck_grp_t                *bpf_mlock_grp;
 197 static lck_grp_attr_t   *bpf_mlock_grp_attr;
 198 static lck_attr_t               *bpf_mlock_attr;
 199
 200 #endif /* __APPLE__ */
 201
 202 static int      bpf_allocbufs(struct bpf_d *);
 203 static errno_t  bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
 204 static int      bpf_detachd(struct bpf_d *d, int);
 205 static void     bpf_freed(struct bpf_d *);
 206 static int      bpf_movein(struct uio *, int,
 207                     struct mbuf **, struct sockaddr *, int *);
 208 static int      bpf_setif(struct bpf_d *, ifnet_t ifp);
 209 static void     bpf_timed_out(void *, void *);
 210 static void     bpf_wakeup(struct bpf_d *);
 211 static void     catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
 212 static void     reset_d(struct bpf_d *);
 213 static int      bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
 214 static int      bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
 215 static int      bpf_setdlt(struct bpf_d *, u_int);
 216 static int      bpf_set_traffic_class(struct bpf_d *, int);
 217 static void     bpf_set_packet_service_class(struct mbuf *, int);
 218
 219 static void     bpf_acquire_d(struct bpf_d *);
 220 static void     bpf_release_d(struct bpf_d *);
 221
 222 static  int bpf_devsw_installed;
 223
 224 void bpf_init(void *unused);
 225 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
 226
 227 /*
 228  * Darwin differs from BSD here, the following are static
 229  * on BSD and not static on Darwin.
 230  */
 231         d_open_t            bpfopen;
 232         d_close_t           bpfclose;
 233         d_read_t            bpfread;
 234         d_write_t           bpfwrite;
 235         ioctl_fcn_t         bpfioctl;
 236         select_fcn_t        bpfselect;
 237
 238
 239 /* Darwin's cdevsw struct differs slightly from BSDs */
 240 #define CDEV_MAJOR 23
 241 static struct cdevsw bpf_cdevsw = {
 242         /* open */          bpfopen,
 243         /* close */         bpfclose,
 244         /* read */          bpfread,
 245         /* write */         bpfwrite,
 246         /* ioctl */         bpfioctl,
 247         /* stop */          eno_stop,
 248         /* reset */         eno_reset,
 249         /* tty */           NULL,
 250         /* select */        bpfselect,
 251         /* mmap */          eno_mmap,
 252         /* strategy*/       eno_strat,
 253         /* getc */          eno_getc,
 254         /* putc */          eno_putc,
 255         /* type */          0
 256 };
 257
 258 #define SOCKADDR_HDR_LEN           offsetof(struct sockaddr, sa_data)
 259
 260 static int
 261 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *sockp, int *datlen)
 262 {
 263         struct mbuf *m;
 264         int error;
 265         int len;
 266         uint8_t sa_family;
 267         int hlen;
 268
 269         switch (linktype) {
 270
 271 #if SLIP
 272         case DLT_SLIP:
 273                 sa_family = AF_INET;
 274                 hlen = 0;
 275                 break;
 276 #endif /* SLIP */
 277
 278         case DLT_EN10MB:
 279                 sa_family = AF_UNSPEC;
 280                 /* XXX Would MAXLINKHDR be better? */
 281                 hlen = sizeof(struct ether_header);
 282                 break;
 283
 284 #if FDDI
 285         case DLT_FDDI:
 286         #if defined(__FreeBSD__) || defined(__bsdi__)
 287                 sa_family = AF_IMPLINK;
 288                 hlen = 0;
 289         #else
 290                 sa_family = AF_UNSPEC;
 291                 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
 292                 hlen = 24;
 293         #endif
 294                 break;
 295 #endif /* FDDI */
 296
 297         case DLT_RAW:
 298         case DLT_NULL:
 299                 sa_family = AF_UNSPEC;
 300                 hlen = 0;
 301                 break;
 302
 303         #ifdef __FreeBSD__
 304         case DLT_ATM_RFC1483:
 305                 /*
 306                  * en atm driver requires 4-byte atm pseudo header.
 307                  * though it isn't standard, vpi:vci needs to be
 308                  * specified anyway.
 309                  */
 310                 sa_family = AF_UNSPEC;
 311                 hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
 312                 break;
 313         #endif
 314
 315         case DLT_PPP:
 316                 sa_family = AF_UNSPEC;
 317                 hlen = 4;       /* This should match PPP_HDRLEN */
 318                 break;
 319
 320         case DLT_APPLE_IP_OVER_IEEE1394:
 321                 sa_family = AF_UNSPEC;
 322                 hlen = sizeof(struct firewire_header);
 323                 break;
 324
 325         case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
 326                 sa_family = AF_IEEE80211;
 327                 hlen = 0;
 328                 break;
 329
 330         case DLT_IEEE802_11_RADIO:
 331                 sa_family = AF_IEEE80211;
 332                 hlen = 0;
 333                 break;
 334
 335         default:
 336                 return (EIO);
 337         }
 338
 339         // LP64todo - fix this!
 340         len = uio_resid(uio);
 341         *datlen = len - hlen;
 342         if ((unsigned)len > MCLBYTES)
 343                 return (EIO);
 344
 345         if (sockp) {
 346                 /*
 347                  * Build a sockaddr based on the data link layer type.
 348                  * We do this at this level because the ethernet header
 349                  * is copied directly into the data field of the sockaddr.
 350                  * In the case of SLIP, there is no header and the packet
 351                  * is forwarded as is.
 352                  * Also, we are careful to leave room at the front of the mbuf
 353                  * for the link level header.
 354                  */
 355                 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
 356                         return (EIO);
 357                 }
 358                 sockp->sa_family = sa_family;
 359         } else {
 360                 /*
 361                  * We're directly sending the packet data supplied by
 362                  * the user; we don't need to make room for the link
 363                  * header, and don't need the header length value any
 364                  * more, so set it to 0.
 365                  */
 366                 hlen = 0;
 367         }
 368
 369         MGETHDR(m, M_WAIT, MT_DATA);
 370         if (m == 0)
 371                 return (ENOBUFS);
 372         if ((unsigned)len > MHLEN) {
 373                 MCLGET(m, M_WAIT);
 374                 if ((m->m_flags & M_EXT) == 0) {
 375                         error = ENOBUFS;
 376                         goto bad;
 377                 }
 378         }
 379         m->m_pkthdr.len = m->m_len = len;
 380         m->m_pkthdr.rcvif = NULL;
 381         *mp = m;
 382
 383         /*
 384          * Make room for link header.
 385          */
 386         if (hlen != 0) {
 387                 m->m_pkthdr.len -= hlen;
 388                 m->m_len -= hlen;
 389                 m->m_data += hlen; /* XXX */
 390                 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
 391                 if (error)
 392                         goto bad;
 393         }
 394         error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
 395         if (error)
 396                 goto bad;
 397
 398         /* Check for multicast destination */
 399         switch (linktype) {
 400                 case DLT_EN10MB: {
 401                         struct ether_header *eh = mtod(m, struct ether_header *);
 402
 403                         if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 404                                 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0)
 405                                         m->m_flags |= M_BCAST;
 406                                 else
 407                                         m->m_flags |= M_MCAST;
 408                         }
 409                         break;
 410                 }
 411         }
 412
 413         return 0;
 414  bad:
 415         m_freem(m);
 416         return (error);
 417 }
 418
 419 #ifdef __APPLE__
 420
 421 /*
 422  * The dynamic addition of a new device node must block all processes that
 423  * are opening the last device so that no process will get an unexpected
 424  * ENOENT
 425  */
 426 static void
 427 bpf_make_dev_t(int maj)
 428 {
 429         static int              bpf_growing = 0;
 430         unsigned int    cur_size = nbpfilter, i;
 431
 432         if (nbpfilter >= bpf_maxdevices)
 433                 return;
 434
 435         while (bpf_growing) {
 436                 /* Wait until new device has been created */
 437                 (void)tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
 438         }
 439         if (nbpfilter > cur_size) {
 440                 /* other thread grew it already */
 441                 return;
 442         }
 443         bpf_growing = 1;
 444
 445         /* need to grow bpf_dtab first */
 446         if (nbpfilter == bpf_dtab_size) {
 447                 int new_dtab_size;
 448                 struct bpf_d **new_dtab = NULL;
 449                 struct bpf_d **old_dtab = NULL;
 450
 451                 new_dtab_size = bpf_dtab_size + NBPFILTER;
 452                 new_dtab = (struct bpf_d **)_MALLOC(sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
 453                 if (new_dtab == 0) {
 454                         printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
 455                         goto done;
 456                 }
 457                 if (bpf_dtab) {
 458                         bcopy(bpf_dtab, new_dtab,
 459                                   sizeof(struct bpf_d *) * bpf_dtab_size);
 460                 }
 461                 bzero(new_dtab + bpf_dtab_size,
 462                           sizeof(struct bpf_d *) * NBPFILTER);
 463                 old_dtab = bpf_dtab;
 464                 bpf_dtab = new_dtab;
 465                 bpf_dtab_size = new_dtab_size;
 466                 if (old_dtab != NULL)
 467                         _FREE(old_dtab, M_DEVBUF);
 468         }
 469         i = nbpfilter++;
 470         (void) devfs_make_node(makedev(maj, i),
 471                                 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
 472                                 "bpf%d", i);
 473 done:
 474         bpf_growing = 0;
 475         wakeup((caddr_t)&bpf_growing);
 476 }
 477
 478 #endif
 479
 480 /*
 481  * Attach file to the bpf interface, i.e. make d listen on bp.
 482  */
 483 static errno_t
 484 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 485 {
 486         int first = bp->bif_dlist == NULL;
 487         int     error = 0;
 488
 489         /*
 490          * Point d at bp, and add d to the interface's list of listeners.
 491          * Finally, point the driver's bpf cookie at the interface so
 492          * it will divert packets to bpf.
 493          */
 494         d->bd_bif = bp;
 495         d->bd_next = bp->bif_dlist;
 496         bp->bif_dlist = d;
 497
 498         /*
 499          * Take a reference on the device even if an error is returned
 500          * because we keep the device in the interface's list of listeners
 501          */
 502         bpf_acquire_d(d);
 503
 504         if (first) {
 505                 /* Find the default bpf entry for this ifp */
 506                 if (bp->bif_ifp->if_bpf == NULL) {
 507                         struct bpf_if   *tmp, *primary = NULL;
 508
 509                         for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
 510                                 if (tmp->bif_ifp == bp->bif_ifp) {
 511                                         primary = tmp;
 512                                         break;
 513                                 }
 514                         }
 515                         bp->bif_ifp->if_bpf = primary;
 516                 }
 517                 /* Only call dlil_set_bpf_tap for primary dlt */
 518                 if (bp->bif_ifp->if_bpf == bp)
 519                         dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT, bpf_tap_callback);
 520
 521                 if (bp->bif_tap != NULL)
 522                         error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT);
 523         }
 524
 525         /*
 526          * Reset the detach flags in case we previously detached an interface
 527          */
 528         d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
 529
 530         if (bp->bif_dlt == DLT_PKTAP) {
 531                 d->bd_flags |= BPF_FINALIZE_PKTAP;
 532         } else {
 533                 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
 534         }
 535         return error;
 536 }
 537
 538 /*
 539  * Detach a file from its interface.
 540  *
 541  * Return 1 if was closed by some thread, 0 otherwise
 542  */
 543 static int
 544 bpf_detachd(struct bpf_d *d, int closing)
 545 {
 546         struct bpf_d **p;
 547         struct bpf_if *bp;
 548         struct ifnet  *ifp;
 549
 550         /*
 551          * Some other thread already detached
 552          */
 553         if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0)
 554                 goto done;
 555         /*
 556          * This thread is doing the detach
 557          */
 558         d->bd_flags |= BPF_DETACHING;
 559
 560         ifp = d->bd_bif->bif_ifp;
 561         bp = d->bd_bif;
 562
 563         if (bpf_debug != 0)
 564                 printf("%s: %llx %s%s\n",
 565                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
 566                     if_name(ifp), closing ? " closing" : "");
 567
 568         /* Remove d from the interface's descriptor list. */
 569         p = &bp->bif_dlist;
 570         while (*p != d) {
 571                 p = &(*p)->bd_next;
 572                 if (*p == 0)
 573                         panic("bpf_detachd: descriptor not in list");
 574         }
 575         *p = (*p)->bd_next;
 576         if (bp->bif_dlist == 0) {
 577                 /*
 578                  * Let the driver know that there are no more listeners.
 579                  */
 580                 /* Only call dlil_set_bpf_tap for primary dlt */
 581                 if (bp->bif_ifp->if_bpf == bp)
 582                         dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
 583                 if (bp->bif_tap)
 584                         bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
 585
 586                 for (bp = bpf_iflist; bp; bp = bp->bif_next)
 587                         if (bp->bif_ifp == ifp && bp->bif_dlist != 0)
 588                                 break;
 589                 if (bp == NULL)
 590                         ifp->if_bpf = NULL;
 591         }
 592         d->bd_bif = NULL;
 593         /*
 594          * Check if this descriptor had requested promiscuous mode.
 595          * If so, turn it off.
 596          */
 597         if (d->bd_promisc) {
 598                 d->bd_promisc = 0;
 599                 lck_mtx_unlock(bpf_mlock);
 600                 if (ifnet_set_promiscuous(ifp, 0)) {
 601                         /*
 602                          * Something is really wrong if we were able to put
 603                          * the driver into promiscuous mode, but can't
 604                          * take it out.
 605                          * Most likely the network interface is gone.
 606                          */
 607                         printf("%s: ifnet_set_promiscuous failed\n", __func__);
 608                 }
 609                 lck_mtx_lock(bpf_mlock);
 610         }
 611
 612         /*
 613          * Wake up other thread that are waiting for this thread to finish
 614          * detaching
 615          */
 616         d->bd_flags &= ~BPF_DETACHING;
 617         d->bd_flags |= BPF_DETACHED;
 618         /*
 619          * Note that We've kept the reference because we may have dropped
 620          * the lock when turning off promiscuous mode
 621          */
 622         bpf_release_d(d);
 623
 624 done:
 625         /*
 626          * When closing makes sure no other thread refer to the bpf_d
 627          */
 628         if (bpf_debug != 0)
 629                 printf("%s: %llx done\n",
 630                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
 631         /*
 632          * Let the caller know the bpf_d is closed
 633          */
 634         if ((d->bd_flags & BPF_CLOSING))
 635                 return (1);
 636         else
 637                 return (0);
 638 }
 639
 640
 641 /*
 642  * Start asynchronous timer, if necessary.
 643  * Must be called with bpf_mlock held.
 644  */
 645 static void
 646 bpf_start_timer(struct bpf_d *d)
 647 {
 648         uint64_t deadline;
 649         struct timeval tv;
 650
 651         if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
 652                 tv.tv_sec = d->bd_rtout / hz;
 653                 tv.tv_usec = (d->bd_rtout % hz) * tick;
 654
 655                 clock_interval_to_deadline(
 656                     (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
 657                     NSEC_PER_USEC, &deadline);
 658                 /*
 659                  * The state is BPF_IDLE, so the timer hasn't
 660                  * been started yet, and hasn't gone off yet;
 661                  * there is no thread call scheduled, so this
 662                  * won't change the schedule.
 663                  *
 664                  * XXX - what if, by the time it gets entered,
 665                  * the deadline has already passed?
 666                  */
 667                 thread_call_enter_delayed(d->bd_thread_call, deadline);
 668                 d->bd_state = BPF_WAITING;
 669         }
 670 }
 671
 672 /*
 673  * Cancel asynchronous timer.
 674  * Must be called with bpf_mlock held.
 675  */
 676 static boolean_t
 677 bpf_stop_timer(struct bpf_d *d)
 678 {
 679         /*
 680          * If the timer has already gone off, this does nothing.
 681          * Our caller is expected to set d->bd_state to BPF_IDLE,
 682          * with the bpf_mlock, after we are called. bpf_timed_out()
 683          * also grabs bpf_mlock, so, if the timer has gone off and
 684          * bpf_timed_out() hasn't finished, it's waiting for the
 685          * lock; when this thread releases the lock, it will
 686          * find the state is BPF_IDLE, and just release the
 687          * lock and return.
 688          */
 689         return (thread_call_cancel(d->bd_thread_call));
 690 }
 691
 692 void
 693 bpf_acquire_d(struct bpf_d *d)
 694 {
 695         void *lr_saved =  __builtin_return_address(0);
 696
 697         LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
 698
 699         d->bd_refcnt += 1;
 700
 701         d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
 702         d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
 703 }
 704
 705 void
 706 bpf_release_d(struct bpf_d *d)
 707 {
 708         void *lr_saved =  __builtin_return_address(0);
 709
 710         LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
 711
 712         if (d->bd_refcnt <= 0)
 713                 panic("%s: %p refcnt <= 0", __func__, d);
 714
 715         d->bd_refcnt -= 1;
 716
 717         d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
 718         d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
 719
 720         if (d->bd_refcnt == 0) {
 721                 /* Assert the device is detached */
 722                 if ((d->bd_flags & BPF_DETACHED) == 0)
 723                         panic("%s: %p BPF_DETACHED not set", __func__, d);
 724
 725                 _FREE(d, M_DEVBUF);
 726         }
 727 }
 728
 729 /*
 730  * Open ethernet device.  Returns ENXIO for illegal minor device number,
 731  * EBUSY if file is open by another process.
 732  */
 733 /* ARGSUSED */
 734 int
 735 bpfopen(dev_t dev, int flags, __unused int fmt,
 736         __unused struct proc *p)
 737 {
 738         struct bpf_d *d;
 739
 740         lck_mtx_lock(bpf_mlock);
 741         if ((unsigned int) minor(dev) >= nbpfilter) {
 742                 lck_mtx_unlock(bpf_mlock);
 743                 return (ENXIO);
 744         }
 745         /*
 746          * New device nodes are created on demand when opening the last one.
 747          * The programming model is for processes to loop on the minor starting at 0
 748          * as long as EBUSY is returned. The loop stops when either the open succeeds or
 749          * an error other that EBUSY is returned. That means that bpf_make_dev_t() must
 750          * block all processes that are opening the last  node. If not all
 751          * processes are blocked, they could unexpectedly get ENOENT and abort their
 752          * opening loop.
 753          */
 754         if ((unsigned int) minor(dev) == (nbpfilter - 1))
 755                 bpf_make_dev_t(major(dev));
 756
 757         /*
 758          * Each minor can be opened by only one process.  If the requested
 759          * minor is in use, return EBUSY.
 760          *
 761          * Important: bpfopen() and bpfclose() have to check and set the status of a device
 762          * in the same lockin context otherwise the device may be leaked because the vnode use count
 763          * will be unpextectly greater than 1 when close() is called.
 764          */
 765         if (bpf_dtab[minor(dev)] == 0) {
 766                 bpf_dtab[minor(dev)] = (void *)1;       /* Mark opening */
 767         } else {
 768                 lck_mtx_unlock(bpf_mlock);
 769                 return (EBUSY);
 770         }
 771         d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
 772             M_WAIT | M_ZERO);
 773         if (d == NULL) {
 774                 /* this really is a catastrophic failure */
 775                 printf("bpfopen: malloc bpf_d failed\n");
 776                 bpf_dtab[minor(dev)] = NULL;
 777                 lck_mtx_unlock(bpf_mlock);
 778                 return ENOMEM;
 779         }
 780
 781         /* Mark "in use" and do most initialization. */
 782         bpf_acquire_d(d);
 783         d->bd_bufsize = bpf_bufsize;
 784         d->bd_sig = SIGIO;
 785         d->bd_seesent = 1;
 786         d->bd_oflags = flags;
 787         d->bd_state = BPF_IDLE;
 788         d->bd_traffic_class = SO_TC_BE;
 789         d->bd_flags |= BPF_DETACHED;
 790         if (bpf_wantpktap)
 791                 d->bd_flags |= BPF_WANT_PKTAP;
 792         else
 793                 d->bd_flags &= ~BPF_WANT_PKTAP;
 794         d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
 795         if (d->bd_thread_call == NULL) {
 796                 printf("bpfopen: malloc thread call failed\n");
 797                 bpf_dtab[minor(dev)] = NULL;
 798                 bpf_release_d(d);
 799                 lck_mtx_unlock(bpf_mlock);
 800
 801                 return (ENOMEM);
 802         }
 803 #if CONFIG_MACF_NET
 804         mac_bpfdesc_label_init(d);
 805         mac_bpfdesc_label_associate(kauth_cred_get(), d);
 806 #endif
 807         bpf_dtab[minor(dev)] = d;                               /* Mark opened */
 808         lck_mtx_unlock(bpf_mlock);
 809
 810         return (0);
 811 }
 812
 813 /*
 814  * Close the descriptor by detaching it from its interface,
 815  * deallocating its buffers, and marking it free.
 816  */
 817 /* ARGSUSED */
 818 int
 819 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
 820          __unused struct proc *p)
 821 {
 822         struct bpf_d *d;
 823
 824         /* Take BPF lock to ensure no other thread is using the device */
 825         lck_mtx_lock(bpf_mlock);
 826
 827         d = bpf_dtab[minor(dev)];
 828         if (d == 0 || d == (void *)1) {
 829                 lck_mtx_unlock(bpf_mlock);
 830                 return (ENXIO);
 831         }
 832
 833         /*
 834          * Other threads may call bpd_detachd() if we drop the bpf_mlock
 835          */
 836         d->bd_flags |= BPF_CLOSING;
 837
 838         if (bpf_debug != 0)
 839                 printf("%s: %llx\n",
 840                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
 841
 842         bpf_dtab[minor(dev)] = (void *)1;               /* Mark closing */
 843
 844         /*
 845          * Deal with any in-progress timeouts.
 846          */
 847         switch (d->bd_state) {
 848                 case BPF_IDLE:
 849                         /*
 850                          * Not waiting for a timeout, and no timeout happened.
 851                          */
 852                         break;
 853
 854                 case BPF_WAITING:
 855                         /*
 856                          * Waiting for a timeout.
 857                          * Cancel any timer that has yet to go off,
 858                          * and mark the state as "closing".
 859                          * Then drop the lock to allow any timers that
 860                          * *have* gone off to run to completion, and wait
 861                          * for them to finish.
 862                          */
 863                         if (!bpf_stop_timer(d)) {
 864                                 /*
 865                                  * There was no pending call, so the call must
 866                                  * have been in progress. Wait for the call to
 867                                  * complete; we have to drop the lock while
 868                                  * waiting. to let the in-progrss call complete
 869                                  */
 870                                 d->bd_state = BPF_DRAINING;
 871                                 while (d->bd_state == BPF_DRAINING)
 872                                         msleep((caddr_t)d, bpf_mlock, PRINET,
 873                                                         "bpfdraining", NULL);
 874                         }
 875                         d->bd_state = BPF_IDLE;
 876                         break;
 877
 878                 case BPF_TIMED_OUT:
 879                         /*
 880                          * Timer went off, and the timeout routine finished.
 881                          */
 882                         d->bd_state = BPF_IDLE;
 883                         break;
 884
 885                 case BPF_DRAINING:
 886                         /*
 887                          * Another thread is blocked on a close waiting for
 888                          * a timeout to finish.
 889                          * This "shouldn't happen", as the first thread to enter
 890                          * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
 891                          * all subsequent threads should see that and fail with
 892                          * ENXIO.
 893                          */
 894                         panic("Two threads blocked in a BPF close");
 895                         break;
 896         }
 897
 898         if (d->bd_bif)
 899                 bpf_detachd(d, 1);
 900         selthreadclear(&d->bd_sel);
 901 #if CONFIG_MACF_NET
 902         mac_bpfdesc_label_destroy(d);
 903 #endif
 904         thread_call_free(d->bd_thread_call);
 905
 906         while (d->bd_hbuf_read)
 907                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
 908
 909         bpf_freed(d);
 910
 911         /* Mark free in same context as bpfopen comes to check */
 912         bpf_dtab[minor(dev)] = NULL;                    /* Mark closed */
 913
 914         bpf_release_d(d);
 915
 916         lck_mtx_unlock(bpf_mlock);
 917
 918         return (0);
 919 }
 920
 921
 922 #define BPF_SLEEP bpf_sleep
 923
 924 static int
 925 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
 926 {
 927         u_int64_t abstime = 0;
 928
 929         if(timo)
 930                 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
 931
 932         return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
 933 }
 934
 935 /*
 936  * Rotate the packet buffers in descriptor d.  Move the store buffer
 937  * into the hold slot, and the free buffer into the store slot.
 938  * Zero the length of the new store buffer.
 939  */
 940 #define ROTATE_BUFFERS(d) \
 941         if (d->bd_hbuf_read) \
 942                 panic("rotating bpf buffers during read"); \
 943         (d)->bd_hbuf = (d)->bd_sbuf; \
 944         (d)->bd_hlen = (d)->bd_slen; \
 945         (d)->bd_hcnt = (d)->bd_scnt; \
 946         (d)->bd_sbuf = (d)->bd_fbuf; \
 947         (d)->bd_slen = 0; \
 948         (d)->bd_scnt = 0; \
 949         (d)->bd_fbuf = NULL;
 950 /*
 951  *  bpfread - read next chunk of packets from buffers
 952  */
 953 int
 954 bpfread(dev_t dev, struct uio *uio, int ioflag)
 955 {
 956         struct bpf_d *d;
 957         caddr_t hbuf;
 958         int timed_out, hbuf_len;
 959         int error;
 960         int flags;
 961
 962         lck_mtx_lock(bpf_mlock);
 963
 964         d = bpf_dtab[minor(dev)];
 965         if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
 966                 lck_mtx_unlock(bpf_mlock);
 967                 return (ENXIO);
 968         }
 969
 970         bpf_acquire_d(d);
 971
 972         /*
 973          * Restrict application to use a buffer the same size as
 974          * as kernel buffers.
 975          */
 976         if (uio_resid(uio) != d->bd_bufsize) {
 977                 bpf_release_d(d);
 978                 lck_mtx_unlock(bpf_mlock);
 979                 return (EINVAL);
 980         }
 981
 982         if (d->bd_state == BPF_WAITING)
 983                 bpf_stop_timer(d);
 984
 985         timed_out = (d->bd_state == BPF_TIMED_OUT);
 986         d->bd_state = BPF_IDLE;
 987
 988         while (d->bd_hbuf_read)
 989                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
 990
 991         if ((d->bd_flags & BPF_CLOSING) != 0) {
 992                 bpf_release_d(d);
 993                 lck_mtx_unlock(bpf_mlock);
 994                 return (ENXIO);
 995         }
 996         /*
 997          * If the hold buffer is empty, then do a timed sleep, which
 998          * ends when the timeout expires or when enough packets
 999          * have arrived to fill the store buffer.
1000          */
1001         while (d->bd_hbuf == 0) {
1002                 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY))
1003                         && d->bd_slen != 0) {
1004                         /*
1005                          * We're in immediate mode, or are reading
1006                          * in non-blocking mode, or a timer was
1007                          * started before the read (e.g., by select()
1008                          * or poll()) and has expired and a packet(s)
1009                          * either arrived since the previous
1010                          * read or arrived while we were asleep.
1011                          * Rotate the buffers and return what's here.
1012                          */
1013                         ROTATE_BUFFERS(d);
1014                         break;
1015                 }
1016
1017                 /*
1018                  * No data is available, check to see if the bpf device
1019                  * is still pointed at a real interface.  If not, return
1020                  * ENXIO so that the userland process knows to rebind
1021                  * it before using it again.
1022                  */
1023                 if (d->bd_bif == NULL) {
1024                         bpf_release_d(d);
1025                         lck_mtx_unlock(bpf_mlock);
1026                         return (ENXIO);
1027                 }
1028                 if (ioflag & IO_NDELAY) {
1029                         bpf_release_d(d);
1030                         lck_mtx_unlock(bpf_mlock);
1031                         return (EWOULDBLOCK);
1032                 }
1033                 error = BPF_SLEEP(d, PRINET|PCATCH, "bpf",
1034                                   d->bd_rtout);
1035                 /*
1036                  * Make sure device is still opened
1037                  */
1038                 if ((d->bd_flags & BPF_CLOSING) != 0) {
1039                         bpf_release_d(d);
1040                         lck_mtx_unlock(bpf_mlock);
1041                         return (ENXIO);
1042                 }
1043
1044                 while (d->bd_hbuf_read)
1045                         msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1046
1047                 if ((d->bd_flags & BPF_CLOSING) != 0) {
1048                         bpf_release_d(d);
1049                         lck_mtx_unlock(bpf_mlock);
1050                         return (ENXIO);
1051                 }
1052
1053                 if (error == EINTR || error == ERESTART) {
1054                         if (d->bd_hbuf != NULL) {
1055                                 /*
1056                                  * Because we msleep, the hold buffer might
1057                                  * be filled when we wake up.  Avoid rotating
1058                                  * in this case.
1059                                  */
1060                                 break;
1061                         }
1062                         if (d->bd_slen != 0) {
1063                                 /*
1064                                  * Sometimes we may be interrupted often and
1065                                  * the sleep above will not timeout.
1066                                  * Regardless, we should rotate the buffers
1067                                  * if there's any new data pending and
1068                                  * return it.
1069                                  */
1070                                 ROTATE_BUFFERS(d);
1071                                 break;
1072                         }
1073                         bpf_release_d(d);
1074                         lck_mtx_unlock(bpf_mlock);
1075                         if (error == ERESTART) {
1076                                 printf("%s: %llx ERESTART to EINTR\n",
1077                                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
1078                                 error = EINTR;
1079                         }
1080                         return (error);
1081                 }
1082                 if (error == EWOULDBLOCK) {
1083                         /*
1084                          * On a timeout, return what's in the buffer,
1085                          * which may be nothing.  If there is something
1086                          * in the store buffer, we can rotate the buffers.
1087                          */
1088                         if (d->bd_hbuf)
1089                                 /*
1090                                  * We filled up the buffer in between
1091                                  * getting the timeout and arriving
1092                                  * here, so we don't need to rotate.
1093                                  */
1094                                 break;
1095
1096                         if (d->bd_slen == 0) {
1097                                 bpf_release_d(d);
1098                                 lck_mtx_unlock(bpf_mlock);
1099                                 return (0);
1100                         }
1101                         ROTATE_BUFFERS(d);
1102                         break;
1103                 }
1104         }
1105         /*
1106          * At this point, we know we have something in the hold slot.
1107          */
1108
1109         /*
1110          * Set the hold buffer read. So we do not
1111          * rotate the buffers until the hold buffer
1112          * read is complete. Also to avoid issues resulting
1113          * from page faults during disk sleep (<rdar://problem/13436396>).
1114          */
1115         d->bd_hbuf_read = 1;
1116         hbuf = d->bd_hbuf;
1117         hbuf_len = d->bd_hlen;
1118         flags = d->bd_flags;
1119         lck_mtx_unlock(bpf_mlock);
1120
1121 #ifdef __APPLE__
1122         /*
1123          * Before we move data to userland, we fill out the extended
1124          * header fields.
1125          */
1126         if (flags & BPF_EXTENDED_HDR) {
1127                 char *p;
1128
1129                 p = hbuf;
1130                 while (p < hbuf + hbuf_len) {
1131                         struct bpf_hdr_ext *ehp;
1132                         uint32_t flowid;
1133                         struct so_procinfo soprocinfo;
1134                         int found = 0;
1135
1136                         ehp = (struct bpf_hdr_ext *)(void *)p;
1137                         if ((flowid = ehp->bh_flowid)) {
1138                                 if (ehp->bh_proto == IPPROTO_TCP)
1139                                         found = inp_findinpcb_procinfo(&tcbinfo,
1140                                             flowid, &soprocinfo);
1141                                 else if (ehp->bh_proto == IPPROTO_UDP)
1142                                         found = inp_findinpcb_procinfo(&udbinfo,
1143                                             flowid, &soprocinfo);
1144                                 if (found == 1) {
1145                                         ehp->bh_pid = soprocinfo.spi_pid;
1146                                         proc_name(ehp->bh_pid, ehp->bh_comm, MAXCOMLEN);
1147                                 }
1148                                 ehp->bh_flowid = 0;
1149                         }
1150
1151                         if (flags & BPF_FINALIZE_PKTAP) {
1152                                 struct pktap_header *pktaphdr;
1153
1154                                 pktaphdr = (struct pktap_header *)(void *)
1155                                     (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1156
1157                                 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1158                                         pktap_finalize_proc_info(pktaphdr);
1159
1160                                 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1161                                         ehp->bh_tstamp.tv_sec =
1162                                                 pktaphdr->pth_tstamp.tv_sec;
1163                                         ehp->bh_tstamp.tv_usec =
1164                                                 pktaphdr->pth_tstamp.tv_usec;
1165                                 }
1166                         }
1167                         p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1168                 }
1169         } else if (flags & BPF_FINALIZE_PKTAP) {
1170                 char *p;
1171
1172                 p = hbuf;
1173                 while (p < hbuf + hbuf_len) {
1174                         struct bpf_hdr *hp;
1175                         struct pktap_header *pktaphdr;
1176
1177                         hp = (struct bpf_hdr *)(void *)p;
1178                         pktaphdr = (struct pktap_header *)(void *)
1179                             (p + BPF_WORDALIGN(hp->bh_hdrlen));
1180
1181                         if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1182                                 pktap_finalize_proc_info(pktaphdr);
1183
1184                         if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1185                                 hp->bh_tstamp.tv_sec =
1186                                         pktaphdr->pth_tstamp.tv_sec;
1187                                 hp->bh_tstamp.tv_usec =
1188                                         pktaphdr->pth_tstamp.tv_usec;
1189                         }
1190
1191                         p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1192                 }
1193         }
1194 #endif
1195
1196         /*
1197          * Move data from hold buffer into user space.
1198          * We know the entire buffer is transferred since
1199          * we checked above that the read buffer is bpf_bufsize bytes.
1200          */
1201         error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1202
1203         lck_mtx_lock(bpf_mlock);
1204         /*
1205          * Make sure device is still opened
1206          */
1207         if ((d->bd_flags & BPF_CLOSING) != 0) {
1208                 bpf_release_d(d);
1209                 lck_mtx_unlock(bpf_mlock);
1210                 return (ENXIO);
1211         }
1212
1213         d->bd_hbuf_read = 0;
1214         d->bd_fbuf = d->bd_hbuf;
1215         d->bd_hbuf = NULL;
1216         d->bd_hlen = 0;
1217         d->bd_hcnt = 0;
1218         wakeup((caddr_t)d);
1219
1220         bpf_release_d(d);
1221         lck_mtx_unlock(bpf_mlock);
1222         return (error);
1223
1224 }
1225
1226
1227 /*
1228  * If there are processes sleeping on this descriptor, wake them up.
1229  */
1230 static void
1231 bpf_wakeup(struct bpf_d *d)
1232 {
1233         if (d->bd_state == BPF_WAITING) {
1234                 bpf_stop_timer(d);
1235                 d->bd_state = BPF_IDLE;
1236         }
1237         wakeup((caddr_t)d);
1238         if (d->bd_async && d->bd_sig && d->bd_sigio)
1239                 pgsigio(d->bd_sigio, d->bd_sig);
1240
1241         selwakeup(&d->bd_sel);
1242         if ((d->bd_flags & BPF_KNOTE))
1243                 KNOTE(&d->bd_sel.si_note, 1);
1244 }
1245
1246
1247 static void
1248 bpf_timed_out(void *arg, __unused void *dummy)
1249 {
1250         struct bpf_d *d = (struct bpf_d *)arg;
1251
1252         lck_mtx_lock(bpf_mlock);
1253         if (d->bd_state == BPF_WAITING) {
1254                 /*
1255                  * There's a select or kqueue waiting for this; if there's
1256                  * now stuff to read, wake it up.
1257                  */
1258                 d->bd_state = BPF_TIMED_OUT;
1259                 if (d->bd_slen != 0)
1260                         bpf_wakeup(d);
1261         } else if (d->bd_state == BPF_DRAINING) {
1262                 /*
1263                  * A close is waiting for this to finish.
1264                  * Mark it as finished, and wake the close up.
1265                  */
1266                 d->bd_state = BPF_IDLE;
1267                 bpf_wakeup(d);
1268         }
1269         lck_mtx_unlock(bpf_mlock);
1270 }
1271
1272
1273
1274
1275
1276 /* keep in sync with bpf_movein above: */
1277 #define MAX_DATALINK_HDR_LEN    (sizeof(struct firewire_header))
1278
1279 int
1280 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1281 {
1282         struct bpf_d *d;
1283         struct ifnet *ifp;
1284         struct mbuf *m = NULL;
1285         int error;
1286         char              dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1287         int datlen = 0;
1288         int bif_dlt;
1289         int bd_hdrcmplt;
1290
1291         lck_mtx_lock(bpf_mlock);
1292
1293         d = bpf_dtab[minor(dev)];
1294         if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
1295                 lck_mtx_unlock(bpf_mlock);
1296                 return (ENXIO);
1297         }
1298
1299         bpf_acquire_d(d);
1300
1301         if (d->bd_bif == 0) {
1302                 bpf_release_d(d);
1303                 lck_mtx_unlock(bpf_mlock);
1304                 return (ENXIO);
1305         }
1306
1307         ifp = d->bd_bif->bif_ifp;
1308
1309         if ((ifp->if_flags & IFF_UP) == 0) {
1310                 bpf_release_d(d);
1311                 lck_mtx_unlock(bpf_mlock);
1312                 return (ENETDOWN);
1313         }
1314         if (uio_resid(uio) == 0) {
1315                 bpf_release_d(d);
1316                 lck_mtx_unlock(bpf_mlock);
1317                 return (0);
1318         }
1319         ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1320
1321         /*
1322          * fix for PR-6849527
1323          * geting variables onto stack before dropping lock for bpf_movein()
1324          */
1325         bif_dlt = (int)d->bd_bif->bif_dlt;
1326         bd_hdrcmplt  = d->bd_hdrcmplt;
1327
1328         /* bpf_movein allocating mbufs; drop lock */
1329         lck_mtx_unlock(bpf_mlock);
1330
1331         error = bpf_movein(uio, bif_dlt, &m,
1332         bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1333         &datlen);
1334
1335         /* take the lock again */
1336         lck_mtx_lock(bpf_mlock);
1337         if (error) {
1338                 bpf_release_d(d);
1339                 lck_mtx_unlock(bpf_mlock);
1340                 return (error);
1341         }
1342
1343         /* verify the device is still open */
1344         if ((d->bd_flags & BPF_CLOSING) != 0) {
1345                 bpf_release_d(d);
1346                 lck_mtx_unlock(bpf_mlock);
1347                 m_freem(m);
1348                 return (ENXIO);
1349         }
1350
1351         if (d->bd_bif == NULL) {
1352                 bpf_release_d(d);
1353                 lck_mtx_unlock(bpf_mlock);
1354                 m_free(m);
1355                 return (ENXIO);
1356         }
1357
1358         if ((unsigned)datlen > ifp->if_mtu) {
1359                 bpf_release_d(d);
1360                 lck_mtx_unlock(bpf_mlock);
1361                 m_freem(m);
1362                 return (EMSGSIZE);
1363         }
1364
1365
1366 #if CONFIG_MACF_NET
1367         mac_mbuf_label_associate_bpfdesc(d, m);
1368 #endif
1369
1370         bpf_set_packet_service_class(m, d->bd_traffic_class);
1371
1372         lck_mtx_unlock(bpf_mlock);
1373
1374         /*
1375          * The driver frees the mbuf.
1376          */
1377         if (d->bd_hdrcmplt) {
1378                 if (d->bd_bif->bif_send)
1379                         error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1380                 else
1381                         error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1382         } else {
1383                 error = dlil_output(ifp, PF_INET, m, NULL,
1384                     (struct sockaddr *)dst_buf, 0, NULL);
1385         }
1386
1387         lck_mtx_lock(bpf_mlock);
1388         bpf_release_d(d);
1389         lck_mtx_unlock(bpf_mlock);
1390
1391         return (error);
1392 }
1393
1394 /*
1395  * Reset a descriptor by flushing its packet buffer and clearing the
1396  * receive and drop counts.
1397  */
1398 static void
1399 reset_d(struct bpf_d *d)
1400 {
1401         if (d->bd_hbuf_read)
1402                 panic("resetting buffers during read");
1403
1404         if (d->bd_hbuf) {
1405                 /* Free the hold buffer. */
1406                 d->bd_fbuf = d->bd_hbuf;
1407                 d->bd_hbuf = NULL;
1408         }
1409         d->bd_slen = 0;
1410         d->bd_hlen = 0;
1411         d->bd_scnt = 0;
1412         d->bd_hcnt = 0;
1413         d->bd_rcount = 0;
1414         d->bd_dcount = 0;
1415 }
1416
1417 /*
1418  *  FIONREAD            Check for read packet available.
1419  *  SIOCGIFADDR         Get interface address - convenient hook to driver.
1420  *  BIOCGBLEN           Get buffer len [for read()].
1421  *  BIOCSETF            Set ethernet read filter.
1422  *  BIOCFLUSH           Flush read packet buffer.
1423  *  BIOCPROMISC         Put interface into promiscuous mode.
1424  *  BIOCGDLT            Get link layer type.
1425  *  BIOCGETIF           Get interface name.
1426  *  BIOCSETIF           Set interface.
1427  *  BIOCSRTIMEOUT       Set read timeout.
1428  *  BIOCGRTIMEOUT       Get read timeout.
1429  *  BIOCGSTATS          Get packet stats.
1430  *  BIOCIMMEDIATE       Set immediate mode.
1431  *  BIOCVERSION         Get filter language version.
1432  *  BIOCGHDRCMPLT       Get "header already complete" flag
1433  *  BIOCSHDRCMPLT       Set "header already complete" flag
1434  *  BIOCGSEESENT        Get "see packets sent" flag
1435  *  BIOCSSEESENT        Set "see packets sent" flag
1436  *  BIOCSETTC           Set traffic class.
1437  *  BIOCGETTC           Get traffic class.
1438  *  BIOCSEXTHDR         Set "extended header" flag
1439  *  BIOCSHEADDROP       Drop head of the buffer if user is not reading
1440  *  BIOCGHEADDROP       Get "head-drop" flag
1441  */
1442 /* ARGSUSED */
1443 int
1444 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1445     struct proc *p)
1446 {
1447         struct bpf_d *d;
1448         int error = 0;
1449         u_int int_arg;
1450         struct ifreq ifr;
1451
1452         lck_mtx_lock(bpf_mlock);
1453
1454         d = bpf_dtab[minor(dev)];
1455         if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
1456                 lck_mtx_unlock(bpf_mlock);
1457                 return (ENXIO);
1458         }
1459
1460         bpf_acquire_d(d);
1461
1462         if (d->bd_state == BPF_WAITING)
1463                 bpf_stop_timer(d);
1464         d->bd_state = BPF_IDLE;
1465
1466         switch (cmd) {
1467
1468         default:
1469                 error = EINVAL;
1470                 break;
1471
1472         /*
1473          * Check for read packet available.
1474          */
1475         case FIONREAD:                  /* int */
1476                 {
1477                         int n;
1478
1479                         n = d->bd_slen;
1480                         if (d->bd_hbuf && d->bd_hbuf_read == 0)
1481                                 n += d->bd_hlen;
1482
1483                         bcopy(&n, addr, sizeof (n));
1484                         break;
1485                 }
1486
1487         case SIOCGIFADDR:               /* struct ifreq */
1488                 {
1489                         struct ifnet *ifp;
1490
1491                         if (d->bd_bif == 0)
1492                                 error = EINVAL;
1493                         else {
1494                                 ifp = d->bd_bif->bif_ifp;
1495                                 error = ifnet_ioctl(ifp, 0, cmd, addr);
1496                         }
1497                         break;
1498                 }
1499
1500         /*
1501          * Get buffer len [for read()].
1502          */
1503         case BIOCGBLEN:                 /* u_int */
1504                 bcopy(&d->bd_bufsize, addr, sizeof (u_int));
1505                 break;
1506
1507         /*
1508          * Set buffer length.
1509          */
1510         case BIOCSBLEN:                 /* u_int */
1511                 if (d->bd_bif != 0)
1512                         error = EINVAL;
1513                 else {
1514                         u_int size;
1515
1516                         bcopy(addr, &size, sizeof (size));
1517
1518                         /*
1519                          * Allow larger buffer in head drop mode with the
1520                          * assumption the capture is in standby mode to
1521                          * keep a cache of recent traffic
1522                          */
1523                         if (d->bd_headdrop != 0 && size > 2 * bpf_maxbufsize)
1524                                 size = 2 * bpf_maxbufsize;
1525                         else if (size > bpf_maxbufsize)
1526                                 size = bpf_maxbufsize;
1527                         else if (size < BPF_MINBUFSIZE)
1528                                 size = BPF_MINBUFSIZE;
1529                         bcopy(&size, addr, sizeof (size));
1530                         d->bd_bufsize = size;
1531                 }
1532                 break;
1533
1534         /*
1535          * Set link layer read filter.
1536          */
1537         case BIOCSETF32:
1538         case BIOCSETFNR32: {            /* struct bpf_program32 */
1539                 struct bpf_program32 prg32;
1540
1541                 bcopy(addr, &prg32, sizeof (prg32));
1542                 error = bpf_setf(d, prg32.bf_len,
1543                     CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1544                 break;
1545         }
1546
1547         case BIOCSETF64:
1548         case BIOCSETFNR64: {            /* struct bpf_program64 */
1549                 struct bpf_program64 prg64;
1550
1551                 bcopy(addr, &prg64, sizeof (prg64));
1552                 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
1553                 break;
1554         }
1555
1556         /*
1557          * Flush read packet buffer.
1558          */
1559         case BIOCFLUSH:
1560                 while (d->bd_hbuf_read) {
1561                         msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1562                 }
1563                 if ((d->bd_flags & BPF_CLOSING) != 0) {
1564                         error = ENXIO;
1565                         break;
1566                 }
1567                 reset_d(d);
1568                 break;
1569
1570         /*
1571          * Put interface into promiscuous mode.
1572          */
1573         case BIOCPROMISC:
1574                 if (d->bd_bif == 0) {
1575                         /*
1576                          * No interface attached yet.
1577                          */
1578                         error = EINVAL;
1579                         break;
1580                 }
1581                 if (d->bd_promisc == 0) {
1582                         lck_mtx_unlock(bpf_mlock);
1583                         error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1584                         lck_mtx_lock(bpf_mlock);
1585                         if (error == 0)
1586                                 d->bd_promisc = 1;
1587                 }
1588                 break;
1589
1590         /*
1591          * Get device parameters.
1592          */
1593         case BIOCGDLT:                  /* u_int */
1594                 if (d->bd_bif == 0)
1595                         error = EINVAL;
1596                 else
1597                         bcopy(&d->bd_bif->bif_dlt, addr, sizeof (u_int));
1598                 break;
1599
1600         /*
1601          * Get a list of supported data link types.
1602          */
1603         case BIOCGDLTLIST:              /* struct bpf_dltlist */
1604                 if (d->bd_bif == NULL) {
1605                         error = EINVAL;
1606                 } else {
1607                         error = bpf_getdltlist(d, addr, p);
1608                 }
1609                 break;
1610
1611         /*
1612          * Set data link type.
1613          */
1614         case BIOCSDLT:                  /* u_int */
1615                 if (d->bd_bif == NULL) {
1616                         error = EINVAL;
1617                 } else {
1618                         u_int dlt;
1619
1620                         bcopy(addr, &dlt, sizeof (dlt));
1621
1622                         if (dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
1623                                 printf("BIOCSDLT downgrade DLT_PKTAP to DLT_RAW\n");
1624                                 dlt = DLT_RAW;
1625                         }
1626                         error = bpf_setdlt(d, dlt);
1627                 }
1628                 break;
1629
1630         /*
1631          * Get interface name.
1632          */
1633         case BIOCGETIF:                 /* struct ifreq */
1634                 if (d->bd_bif == 0)
1635                         error = EINVAL;
1636                 else {
1637                         struct ifnet *const ifp = d->bd_bif->bif_ifp;
1638
1639                         snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1640                             sizeof (ifr.ifr_name), "%s", if_name(ifp));
1641                 }
1642                 break;
1643
1644         /*
1645          * Set interface.
1646          */
1647         case BIOCSETIF: {               /* struct ifreq */
1648                 ifnet_t ifp;
1649
1650                 bcopy(addr, &ifr, sizeof (ifr));
1651                 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1652                 ifp = ifunit(ifr.ifr_name);
1653                 if (ifp == NULL)
1654                         error = ENXIO;
1655                 else
1656                         error = bpf_setif(d, ifp);
1657                 break;
1658         }
1659
1660         /*
1661          * Set read timeout.
1662          */
1663         case BIOCSRTIMEOUT32: {         /* struct user32_timeval */
1664                 struct user32_timeval _tv;
1665                 struct timeval tv;
1666
1667                 bcopy(addr, &_tv, sizeof (_tv));
1668                 tv.tv_sec  = _tv.tv_sec;
1669                 tv.tv_usec = _tv.tv_usec;
1670
1671                 /*
1672                  * Subtract 1 tick from tvtohz() since this isn't
1673                  * a one-shot timer.
1674                  */
1675                 if ((error = itimerfix(&tv)) == 0)
1676                         d->bd_rtout = tvtohz(&tv) - 1;
1677                 break;
1678         }
1679
1680         case BIOCSRTIMEOUT64: {         /* struct user64_timeval */
1681                 struct user64_timeval _tv;
1682                 struct timeval tv;
1683
1684                 bcopy(addr, &_tv, sizeof (_tv));
1685                 tv.tv_sec  = _tv.tv_sec;
1686                 tv.tv_usec = _tv.tv_usec;
1687
1688                 /*
1689                  * Subtract 1 tick from tvtohz() since this isn't
1690                  * a one-shot timer.
1691                  */
1692                 if ((error = itimerfix(&tv)) == 0)
1693                         d->bd_rtout = tvtohz(&tv) - 1;
1694                 break;
1695         }
1696
1697         /*
1698          * Get read timeout.
1699          */
1700         case BIOCGRTIMEOUT32: {         /* struct user32_timeval */
1701                 struct user32_timeval tv;
1702
1703                 bzero(&tv, sizeof (tv));
1704                 tv.tv_sec = d->bd_rtout / hz;
1705                 tv.tv_usec = (d->bd_rtout % hz) * tick;
1706                 bcopy(&tv, addr, sizeof (tv));
1707                 break;
1708         }
1709
1710         case BIOCGRTIMEOUT64: {         /* struct user64_timeval */
1711                 struct user64_timeval tv;
1712
1713                 bzero(&tv, sizeof (tv));
1714                 tv.tv_sec = d->bd_rtout / hz;
1715                 tv.tv_usec = (d->bd_rtout % hz) * tick;
1716                 bcopy(&tv, addr, sizeof (tv));
1717                 break;
1718         }
1719
1720         /*
1721          * Get packet stats.
1722          */
1723         case BIOCGSTATS: {              /* struct bpf_stat */
1724                 struct bpf_stat bs;
1725
1726                 bzero(&bs, sizeof (bs));
1727                 bs.bs_recv = d->bd_rcount;
1728                 bs.bs_drop = d->bd_dcount;
1729                 bcopy(&bs, addr, sizeof (bs));
1730                 break;
1731         }
1732
1733         /*
1734          * Set immediate mode.
1735          */
1736         case BIOCIMMEDIATE:             /* u_int */
1737                 d->bd_immediate = *(u_int *)(void *)addr;
1738                 break;
1739
1740         case BIOCVERSION: {             /* struct bpf_version */
1741                 struct bpf_version bv;
1742
1743                 bzero(&bv, sizeof (bv));
1744                 bv.bv_major = BPF_MAJOR_VERSION;
1745                 bv.bv_minor = BPF_MINOR_VERSION;
1746                 bcopy(&bv, addr, sizeof (bv));
1747                 break;
1748         }
1749
1750         /*
1751          * Get "header already complete" flag
1752          */
1753         case BIOCGHDRCMPLT:             /* u_int */
1754                 bcopy(&d->bd_hdrcmplt, addr, sizeof (u_int));
1755                 break;
1756
1757         /*
1758          * Set "header already complete" flag
1759          */
1760         case BIOCSHDRCMPLT:             /* u_int */
1761                 bcopy(addr, &int_arg, sizeof (int_arg));
1762                 d->bd_hdrcmplt = int_arg ? 1 : 0;
1763                 break;
1764
1765         /*
1766          * Get "see sent packets" flag
1767          */
1768         case BIOCGSEESENT:              /* u_int */
1769                 bcopy(&d->bd_seesent, addr, sizeof (u_int));
1770                 break;
1771
1772         /*
1773          * Set "see sent packets" flag
1774          */
1775         case BIOCSSEESENT:              /* u_int */
1776                 bcopy(addr, &d->bd_seesent, sizeof (u_int));
1777                 break;
1778
1779         /*
1780          * Set traffic service class
1781          */
1782         case BIOCSETTC: {               /* int */
1783                 int tc;
1784
1785                 bcopy(addr, &tc, sizeof (int));
1786                 error = bpf_set_traffic_class(d, tc);
1787                 break;
1788         }
1789
1790         /*
1791          * Get traffic service class
1792          */
1793         case BIOCGETTC:                 /* int */
1794                 bcopy(&d->bd_traffic_class, addr, sizeof (int));
1795                 break;
1796
1797         case FIONBIO:           /* Non-blocking I/O; int */
1798                 break;
1799
1800         case FIOASYNC:          /* Send signal on receive packets; int */
1801                 bcopy(addr, &d->bd_async, sizeof (int));
1802                 break;
1803 #ifndef __APPLE__
1804         case FIOSETOWN:
1805                 error = fsetown(*(int *)addr, &d->bd_sigio);
1806                 break;
1807
1808         case FIOGETOWN:
1809                 *(int *)addr = fgetown(d->bd_sigio);
1810                 break;
1811
1812         /* This is deprecated, FIOSETOWN should be used instead. */
1813         case TIOCSPGRP:
1814                 error = fsetown(-(*(int *)addr), &d->bd_sigio);
1815                 break;
1816
1817         /* This is deprecated, FIOGETOWN should be used instead. */
1818         case TIOCGPGRP:
1819                 *(int *)addr = -fgetown(d->bd_sigio);
1820                 break;
1821 #endif
1822         case BIOCSRSIG: {       /* Set receive signal; u_int */
1823                 u_int sig;
1824
1825                 bcopy(addr, &sig, sizeof (u_int));
1826
1827                 if (sig >= NSIG)
1828                         error = EINVAL;
1829                 else
1830                         d->bd_sig = sig;
1831                 break;
1832         }
1833         case BIOCGRSIG:                 /* u_int */
1834                 bcopy(&d->bd_sig, addr, sizeof (u_int));
1835                 break;
1836 #ifdef __APPLE__
1837         case BIOCSEXTHDR:               /* u_int */
1838                 bcopy(addr, &int_arg, sizeof (int_arg));
1839                 if (int_arg)
1840                         d->bd_flags |= BPF_EXTENDED_HDR;
1841                 else
1842                         d->bd_flags &= ~BPF_EXTENDED_HDR;
1843                 break;
1844
1845         case BIOCGIFATTACHCOUNT: {              /* struct ifreq */
1846                 ifnet_t ifp;
1847                 struct bpf_if *bp;
1848
1849                 bcopy(addr, &ifr, sizeof (ifr));
1850                 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1851                 ifp = ifunit(ifr.ifr_name);
1852                 if (ifp == NULL) {
1853                         error = ENXIO;
1854                         break;
1855                 }
1856                 ifr.ifr_intval = 0;
1857                 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1858                         struct bpf_d *bpf_d;
1859
1860                         if (bp->bif_ifp == NULL || bp->bif_ifp != ifp)
1861                                 continue;
1862                         for (bpf_d = bp->bif_dlist; bpf_d; bpf_d = bpf_d->bd_next) {
1863                                 ifr.ifr_intval += 1;
1864                         }
1865                 }
1866                 bcopy(&ifr, addr, sizeof (ifr));
1867                 break;
1868         }
1869         case BIOCGWANTPKTAP:                    /* u_int */
1870                 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
1871                 bcopy(&int_arg, addr, sizeof (int_arg));
1872                 break;
1873
1874         case BIOCSWANTPKTAP:                    /* u_int */
1875                 bcopy(addr, &int_arg, sizeof (int_arg));
1876                 if (int_arg)
1877                         d->bd_flags |= BPF_WANT_PKTAP;
1878                 else
1879                         d->bd_flags &= ~BPF_WANT_PKTAP;
1880                 break;
1881 #endif
1882
1883         case BIOCSHEADDROP:
1884                 bcopy(addr, &int_arg, sizeof (int_arg));
1885                 d->bd_headdrop = int_arg ? 1 : 0;
1886                 break;
1887
1888         case BIOCGHEADDROP:
1889                 bcopy(&d->bd_headdrop, addr, sizeof (int));
1890                 break;
1891         }
1892
1893         bpf_release_d(d);
1894         lck_mtx_unlock(bpf_mlock);
1895
1896         return (error);
1897 }
1898
1899 /*
1900  * Set d's packet filter program to fp.  If this file already has a filter,
1901  * free it and replace it.  Returns EINVAL for bogus requests.
1902  */
1903 static int
1904 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
1905     u_long cmd)
1906 {
1907         struct bpf_insn *fcode, *old;
1908         u_int flen, size;
1909
1910         while (d->bd_hbuf_read)
1911                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1912
1913         if ((d->bd_flags & BPF_CLOSING) != 0)
1914                 return (ENXIO);
1915
1916         old = d->bd_filter;
1917         if (bf_insns == USER_ADDR_NULL) {
1918                 if (bf_len != 0)
1919                         return (EINVAL);
1920                 d->bd_filter = NULL;
1921                 reset_d(d);
1922                 if (old != 0)
1923                         FREE((caddr_t)old, M_DEVBUF);
1924                 return (0);
1925         }
1926         flen = bf_len;
1927         if (flen > BPF_MAXINSNS)
1928                 return (EINVAL);
1929
1930         size = flen * sizeof(struct bpf_insn);
1931         fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
1932 #ifdef __APPLE__
1933         if (fcode == NULL)
1934                 return (ENOBUFS);
1935 #endif
1936         if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
1937             bpf_validate(fcode, (int)flen)) {
1938                 d->bd_filter = fcode;
1939
1940                 if (cmd == BIOCSETF32 || cmd == BIOCSETF64)
1941                         reset_d(d);
1942
1943                 if (old != 0)
1944                         FREE((caddr_t)old, M_DEVBUF);
1945
1946                 return (0);
1947         }
1948         FREE((caddr_t)fcode, M_DEVBUF);
1949         return (EINVAL);
1950 }
1951
1952 /*
1953  * Detach a file from its current interface (if attached at all) and attach
1954  * to the interface indicated by the name stored in ifr.
1955  * Return an errno or 0.
1956  */
1957 static int
1958 bpf_setif(struct bpf_d *d, ifnet_t theywant)
1959 {
1960         struct bpf_if *bp;
1961         int error;
1962
1963         while (d->bd_hbuf_read)
1964                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1965
1966         if ((d->bd_flags & BPF_CLOSING) != 0)
1967                 return (ENXIO);
1968
1969         /*
1970          * Look through attached interfaces for the named one.
1971          */
1972         for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1973                 struct ifnet *ifp = bp->bif_ifp;
1974
1975                 if (ifp == 0 || ifp != theywant)
1976                         continue;
1977                 /*
1978                  * Do not use DLT_PKTAP, unless requested explicitly
1979                  */
1980                 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
1981                         continue;
1982                 /*
1983                  * Skip the coprocessor interface
1984                  */
1985                 if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp))
1986                         continue;
1987                 /*
1988                  * We found the requested interface.
1989                  * Allocate the packet buffers.
1990                  */
1991                 error = bpf_allocbufs(d);
1992                 if (error != 0)
1993                         return (error);
1994                 /*
1995                  * Detach if attached to something else.
1996                  */
1997                 if (bp != d->bd_bif) {
1998                         if (d->bd_bif != NULL) {
1999                                 if (bpf_detachd(d, 0) != 0)
2000                                         return (ENXIO);
2001                         }
2002                         if (bpf_attachd(d, bp) != 0)
2003                                 return (ENXIO);
2004                 }
2005                 reset_d(d);
2006                 return (0);
2007         }
2008         /* Not found. */
2009         return (ENXIO);
2010 }
2011
2012
2013
2014 /*
2015  * Get a list of available data link type of the interface.
2016  */
2017 static int
2018 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2019 {
2020         u_int           n;
2021         int             error;
2022         struct ifnet    *ifp;
2023         struct bpf_if   *bp;
2024         user_addr_t     dlist;
2025         struct bpf_dltlist bfl;
2026
2027         bcopy(addr, &bfl, sizeof (bfl));
2028         if (proc_is64bit(p)) {
2029                 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2030         } else {
2031                 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2032         }
2033
2034         ifp = d->bd_bif->bif_ifp;
2035         n = 0;
2036         error = 0;
2037
2038         for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2039                 if (bp->bif_ifp != ifp)
2040                         continue;
2041                 /*
2042                  * Do not use DLT_PKTAP, unless requested explicitly
2043                  */
2044                 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
2045                         continue;
2046                 if (dlist != USER_ADDR_NULL) {
2047                         if (n >= bfl.bfl_len) {
2048                                 return (ENOMEM);
2049                         }
2050                         error = copyout(&bp->bif_dlt, dlist,
2051                             sizeof (bp->bif_dlt));
2052                         if (error != 0)
2053                                 break;
2054                         dlist += sizeof (bp->bif_dlt);
2055                 }
2056                 n++;
2057         }
2058         bfl.bfl_len = n;
2059         bcopy(&bfl, addr, sizeof (bfl));
2060
2061         return (error);
2062 }
2063
2064 /*
2065  * Set the data link type of a BPF instance.
2066  */
2067 static int
2068 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2069 {
2070         int error, opromisc;
2071         struct ifnet *ifp;
2072         struct bpf_if *bp;
2073
2074         if (d->bd_bif->bif_dlt == dlt)
2075                 return (0);
2076
2077         while (d->bd_hbuf_read)
2078                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2079
2080         if ((d->bd_flags & BPF_CLOSING) != 0)
2081                 return (ENXIO);
2082
2083         ifp = d->bd_bif->bif_ifp;
2084         for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2085                 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2086                         /*
2087                          * Do not use DLT_PKTAP, unless requested explicitly
2088                          */
2089                         if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2090                                 continue;
2091                         }
2092                         break;
2093                 }
2094         }
2095         if (bp != NULL) {
2096                 opromisc = d->bd_promisc;
2097                 if (bpf_detachd(d, 0) != 0)
2098                         return (ENXIO);
2099                 error = bpf_attachd(d, bp);
2100                 if (error) {
2101                         printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
2102                                 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error);
2103                         return error;
2104                 }
2105                 reset_d(d);
2106                 if (opromisc) {
2107                         lck_mtx_unlock(bpf_mlock);
2108                         error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2109                         lck_mtx_lock(bpf_mlock);
2110                         if (error) {
2111                                 printf("%s: ifpromisc %s%d failed (%d)\n",
2112                                     __func__, ifnet_name(bp->bif_ifp),
2113                                     ifnet_unit(bp->bif_ifp), error);
2114                         } else {
2115                                 d->bd_promisc = 1;
2116                         }
2117                 }
2118         }
2119         return (bp == NULL ? EINVAL : 0);
2120 }
2121
2122 static int
2123 bpf_set_traffic_class(struct bpf_d *d, int tc)
2124 {
2125         int error = 0;
2126
2127         if (!SO_VALID_TC(tc))
2128                 error = EINVAL;
2129         else
2130                 d->bd_traffic_class = tc;
2131
2132         return (error);
2133 }
2134
2135 static void
2136 bpf_set_packet_service_class(struct mbuf *m, int tc)
2137 {
2138         if (!(m->m_flags & M_PKTHDR))
2139                 return;
2140
2141         VERIFY(SO_VALID_TC(tc));
2142         (void) m_set_service_class(m, so_tc2msc(tc));
2143 }
2144
2145 /*
2146  * Support for select()
2147  *
2148  * Return true iff the specific operation will not block indefinitely.
2149  * Otherwise, return false but make a note that a selwakeup() must be done.
2150  */
2151 int
2152 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2153 {
2154         struct bpf_d *d;
2155         int ret = 0;
2156
2157         lck_mtx_lock(bpf_mlock);
2158
2159         d = bpf_dtab[minor(dev)];
2160         if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
2161                 lck_mtx_unlock(bpf_mlock);
2162                 return (ENXIO);
2163         }
2164
2165         bpf_acquire_d(d);
2166
2167         if (d->bd_bif == NULL) {
2168                 bpf_release_d(d);
2169                 lck_mtx_unlock(bpf_mlock);
2170                 return (ENXIO);
2171         }
2172
2173         while (d->bd_hbuf_read)
2174                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2175
2176         if ((d->bd_flags & BPF_CLOSING) != 0) {
2177                 bpf_release_d(d);
2178                 lck_mtx_unlock(bpf_mlock);
2179                 return (ENXIO);
2180         }
2181
2182         switch (which) {
2183                 case FREAD:
2184                         if (d->bd_hlen != 0 ||
2185                                         ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
2186                                          d->bd_slen != 0))
2187                                 ret = 1; /* read has data to return */
2188                         else {
2189                                 /*
2190                                  * Read has no data to return.
2191                                  * Make the select wait, and start a timer if
2192                                  * necessary.
2193                                  */
2194                                 selrecord(p, &d->bd_sel, wql);
2195                                 bpf_start_timer(d);
2196                         }
2197                         break;
2198
2199                 case FWRITE:
2200                         ret = 1; /* can't determine whether a write would block */
2201                         break;
2202         }
2203
2204         bpf_release_d(d);
2205         lck_mtx_unlock(bpf_mlock);
2206
2207         return (ret);
2208 }
2209
2210
2211 /*
2212  * Support for kevent() system call.  Register EVFILT_READ filters and
2213  * reject all others.
2214  */
2215 int bpfkqfilter(dev_t dev, struct knote *kn);
2216 static void filt_bpfdetach(struct knote *);
2217 static int filt_bpfread(struct knote *, long);
2218 static int filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev);
2219 static int filt_bpfprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev);
2220
2221 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2222         .f_isfd = 1,
2223         .f_detach = filt_bpfdetach,
2224         .f_event = filt_bpfread,
2225         .f_touch = filt_bpftouch,
2226         .f_process = filt_bpfprocess,
2227 };
2228
2229 static int
2230 filt_bpfread_common(struct knote *kn, struct bpf_d *d)
2231 {
2232         int ready = 0;
2233
2234         if (d->bd_immediate) {
2235                 /*
2236                  * If there's data in the hold buffer, it's the
2237                  * amount of data a read will return.
2238                  *
2239                  * If there's no data in the hold buffer, but
2240                  * there's data in the store buffer, a read will
2241                  * immediately rotate the store buffer to the
2242                  * hold buffer, the amount of data in the store
2243                  * buffer is the amount of data a read will
2244                  * return.
2245                  *
2246                  * If there's no data in either buffer, we're not
2247                  * ready to read.
2248                  */
2249                 kn->kn_data = ((d->bd_hlen == 0  || d->bd_hbuf_read)
2250                     ? d->bd_slen : d->bd_hlen);
2251                 int64_t lowwat = 1;
2252                 if (kn->kn_sfflags & NOTE_LOWAT)
2253                 {
2254                         if (kn->kn_sdata > d->bd_bufsize)
2255                                 lowwat = d->bd_bufsize;
2256                         else if (kn->kn_sdata > lowwat)
2257                                 lowwat = kn->kn_sdata;
2258                 }
2259                 ready = (kn->kn_data >= lowwat);
2260         } else {
2261                 /*
2262                  * If there's data in the hold buffer, it's the
2263                  * amount of data a read will return.
2264                  *
2265                  * If there's no data in the hold buffer, but
2266                  * there's data in the store buffer, if the
2267                  * timer has expired a read will immediately
2268                  * rotate the store buffer to the hold buffer,
2269                  * so the amount of data in the store buffer is
2270                  * the amount of data a read will return.
2271                  *
2272                  * If there's no data in either buffer, or there's
2273                  * no data in the hold buffer and the timer hasn't
2274                  * expired, we're not ready to read.
2275                  */
2276                 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read) && d->bd_state == BPF_TIMED_OUT ?
2277                                 d->bd_slen : d->bd_hlen);
2278                 ready = (kn->kn_data > 0);
2279         }
2280         if (!ready)
2281                 bpf_start_timer(d);
2282
2283         return (ready);
2284 }
2285
2286 int
2287 bpfkqfilter(dev_t dev, struct knote *kn)
2288 {
2289         struct bpf_d *d;
2290         int res;
2291
2292         /*
2293          * Is this device a bpf?
2294          */
2295         if (major(dev) != CDEV_MAJOR ||
2296             kn->kn_filter != EVFILT_READ) {
2297                 kn->kn_flags = EV_ERROR;
2298                 kn->kn_data = EINVAL;
2299                 return 0;
2300         }
2301
2302         lck_mtx_lock(bpf_mlock);
2303
2304         d = bpf_dtab[minor(dev)];
2305
2306         if (d == 0 ||
2307             d == (void *)1 ||
2308             d->bd_bif == NULL ||
2309             (d->bd_flags & BPF_CLOSING) != 0) {
2310                 lck_mtx_unlock(bpf_mlock);
2311                 kn->kn_flags = EV_ERROR;
2312                 kn->kn_data = ENXIO;
2313                 return 0;
2314         }
2315
2316         kn->kn_hook = d;
2317         kn->kn_filtid = EVFILTID_BPFREAD;
2318         KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2319         d->bd_flags |= BPF_KNOTE;
2320
2321         /* capture the current state */
2322         res = filt_bpfread_common(kn, d);
2323
2324         lck_mtx_unlock(bpf_mlock);
2325
2326         return (res);
2327 }
2328
2329 static void
2330 filt_bpfdetach(struct knote *kn)
2331 {
2332         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2333
2334         lck_mtx_lock(bpf_mlock);
2335         if (d->bd_flags & BPF_KNOTE) {
2336                 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2337                 d->bd_flags &= ~BPF_KNOTE;
2338         }
2339         lck_mtx_unlock(bpf_mlock);
2340 }
2341
2342 static int
2343 filt_bpfread(struct knote *kn, long hint)
2344 {
2345 #pragma unused(hint)
2346         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2347
2348         return filt_bpfread_common(kn, d);
2349 }
2350
2351 static int
2352 filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev)
2353 {
2354         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2355         int res;
2356
2357         lck_mtx_lock(bpf_mlock);
2358
2359         /* save off the lowat threshold and flag */
2360         kn->kn_sdata = kev->data;
2361         kn->kn_sfflags = kev->fflags;
2362         if ((kn->kn_status & KN_UDATA_SPECIFIC) == 0)
2363                 kn->kn_udata = kev->udata;
2364
2365         /* output data will be re-generated here */
2366         res = filt_bpfread_common(kn, d);
2367
2368         lck_mtx_unlock(bpf_mlock);
2369
2370         return res;
2371 }
2372
2373 static int
2374 filt_bpfprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev)
2375 {
2376 #pragma unused(data)
2377         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2378         int res;
2379
2380         lck_mtx_lock(bpf_mlock);
2381         res = filt_bpfread_common(kn, d);
2382         if (res) {
2383                 *kev = kn->kn_kevent;
2384         }
2385         lck_mtx_unlock(bpf_mlock);
2386
2387         return res;
2388 }
2389
2390 /*
2391  * Copy data from an mbuf chain into a buffer.  This code is derived
2392  * from m_copydata in kern/uipc_mbuf.c.
2393  */
2394 static void
2395 bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
2396 {
2397         u_int count;
2398         u_char *dst;
2399
2400         dst = dst_arg;
2401         while (len > 0) {
2402                 if (m == 0)
2403                         panic("bpf_mcopy");
2404                 count = min(m->m_len, len);
2405                 bcopy(mbuf_data(m), dst, count);
2406                 m = m->m_next;
2407                 dst += count;
2408                 len -= count;
2409         }
2410 }
2411
2412 static inline void
2413 bpf_tap_imp(
2414         ifnet_t         ifp,
2415         u_int32_t       dlt,
2416         struct bpf_packet *bpf_pkt,
2417         int             outbound)
2418 {
2419         struct bpf_d    *d;
2420         u_int slen;
2421         struct bpf_if *bp;
2422
2423         /*
2424          * It's possible that we get here after the bpf descriptor has been
2425          * detached from the interface; in such a case we simply return.
2426          * Lock ordering is important since we can be called asynchronously
2427          * (from IOKit) to process an inbound packet; when that happens
2428          * we would have been holding its "gateLock" and will be acquiring
2429          * "bpf_mlock" upon entering this routine.  Due to that, we release
2430          * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2431          * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2432          * when a ifnet_set_promiscuous request simultaneously collides with
2433          * an inbound packet being passed into the tap callback.
2434          */
2435         lck_mtx_lock(bpf_mlock);
2436         if (ifp->if_bpf == NULL) {
2437                 lck_mtx_unlock(bpf_mlock);
2438                 return;
2439         }
2440         for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2441                 if (bp->bif_ifp != ifp) {
2442                         /* wrong interface */
2443                         bp = NULL;
2444                         break;
2445                 }
2446                 if (dlt == 0 || bp->bif_dlt == dlt) {
2447                         /* tapping default DLT or DLT matches */
2448                         break;
2449                 }
2450         }
2451         if (bp == NULL) {
2452                 goto done;
2453         }
2454         for (d = bp->bif_dlist; d; d = d->bd_next) {
2455                 if (outbound && !d->bd_seesent)
2456                         continue;
2457                 ++d->bd_rcount;
2458                 slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2459                                   bpf_pkt->bpfp_total_length, 0);
2460                 if (slen != 0) {
2461 #if CONFIG_MACF_NET
2462                         if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0)
2463                                 continue;
2464 #endif
2465                         catchpacket(d, bpf_pkt, slen, outbound);
2466                 }
2467         }
2468
2469  done:
2470         lck_mtx_unlock(bpf_mlock);
2471 }
2472
2473 static inline void
2474 bpf_tap_mbuf(
2475         ifnet_t         ifp,
2476         u_int32_t       dlt,
2477         mbuf_t          m,
2478         void*           hdr,
2479         size_t          hlen,
2480         int             outbound)
2481 {
2482         struct bpf_packet bpf_pkt;
2483         struct mbuf *m0;
2484
2485         if (ifp->if_bpf == NULL) {
2486                 /* quickly check without taking lock */
2487                 return;
2488         }
2489         bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2490         bpf_pkt.bpfp_mbuf = m;
2491         bpf_pkt.bpfp_total_length = 0;
2492         for (m0 = m; m0 != NULL; m0 = m0->m_next)
2493                 bpf_pkt.bpfp_total_length += m0->m_len;
2494         bpf_pkt.bpfp_header = hdr;
2495         if (hdr != NULL) {
2496                 bpf_pkt.bpfp_total_length += hlen;
2497                 bpf_pkt.bpfp_header_length = hlen;
2498         } else {
2499                 bpf_pkt.bpfp_header_length = 0;
2500         }
2501         bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2502 }
2503
2504 void
2505 bpf_tap_out(
2506         ifnet_t         ifp,
2507         u_int32_t       dlt,
2508         mbuf_t          m,
2509         void*           hdr,
2510         size_t          hlen)
2511 {
2512         bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
2513 }
2514
2515 void
2516 bpf_tap_in(
2517         ifnet_t         ifp,
2518         u_int32_t       dlt,
2519         mbuf_t          m,
2520         void*           hdr,
2521         size_t          hlen)
2522 {
2523         bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
2524 }
2525
2526 /* Callback registered with Ethernet driver. */
2527 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2528 {
2529         bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2530
2531         return 0;
2532 }
2533
2534
2535 static void
2536 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
2537 {
2538         /* copy the optional header */
2539         if (pkt->bpfp_header_length != 0) {
2540                 size_t  count = min(len, pkt->bpfp_header_length);
2541                 bcopy(pkt->bpfp_header, dst, count);
2542                 len -= count;
2543                 dst += count;
2544         }
2545         if (len == 0) {
2546                 /* nothing past the header */
2547                 return;
2548         }
2549         /* copy the packet */
2550         switch (pkt->bpfp_type) {
2551         case BPF_PACKET_TYPE_MBUF:
2552                 bpf_mcopy(pkt->bpfp_mbuf, dst, len);
2553                 break;
2554         default:
2555                 break;
2556         }
2557 }
2558
2559 /*
2560  * Move the packet data from interface memory (pkt) into the
2561  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
2562  * otherwise 0.
2563  */
2564 static void
2565 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
2566         u_int snaplen, int outbound)
2567 {
2568         struct bpf_hdr *hp;
2569         struct bpf_hdr_ext *ehp;
2570         int totlen, curlen;
2571         int hdrlen, caplen;
2572         int do_wakeup = 0;
2573         u_char *payload;
2574         struct timeval tv;
2575
2576         hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
2577             d->bd_bif->bif_hdrlen;
2578         /*
2579          * Figure out how many bytes to move.  If the packet is
2580          * greater or equal to the snapshot length, transfer that
2581          * much.  Otherwise, transfer the whole packet (unless
2582          * we hit the buffer size limit).
2583          */
2584         totlen = hdrlen + min(snaplen, pkt->bpfp_total_length);
2585         if (totlen > d->bd_bufsize)
2586                 totlen = d->bd_bufsize;
2587
2588         /*
2589          * Round up the end of the previous packet to the next longword.
2590          */
2591         curlen = BPF_WORDALIGN(d->bd_slen);
2592         if (curlen + totlen > d->bd_bufsize) {
2593                 /*
2594                  * This packet will overflow the storage buffer.
2595                  * Rotate the buffers if we can, then wakeup any
2596                  * pending reads.
2597                  *
2598                  * We cannot rotate buffers if a read is in progress
2599                  * so drop the packet
2600                  */
2601                 if (d->bd_hbuf_read) {
2602                         ++d->bd_dcount;
2603                         return;
2604                 }
2605
2606                 if (d->bd_fbuf == NULL) {
2607                         if (d->bd_headdrop == 0) {
2608                                 /*
2609                                  * We haven't completed the previous read yet,
2610                                  * so drop the packet.
2611                                  */
2612                                 ++d->bd_dcount;
2613                                 return;
2614                         }
2615                         /*
2616                          * Drop the hold buffer as it contains older packets
2617                          */
2618                         d->bd_dcount += d->bd_hcnt;
2619                         d->bd_fbuf = d->bd_hbuf;
2620                         ROTATE_BUFFERS(d);
2621                 } else {
2622                         ROTATE_BUFFERS(d);
2623                 }
2624                 do_wakeup = 1;
2625                 curlen = 0;
2626         }
2627         else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
2628                 /*
2629                  * Immediate mode is set, or the read timeout has
2630                  * already expired during a select call. A packet
2631                  * arrived, so the reader should be woken up.
2632                  */
2633                 do_wakeup = 1;
2634
2635         /*
2636          * Append the bpf header.
2637          */
2638         microtime(&tv);
2639         if (d->bd_flags & BPF_EXTENDED_HDR) {
2640                 struct mbuf *m;
2641
2642                 m = (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF)
2643                         ? pkt->bpfp_mbuf : NULL;
2644                 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
2645                 memset(ehp, 0, sizeof(*ehp));
2646                 ehp->bh_tstamp.tv_sec = tv.tv_sec;
2647                 ehp->bh_tstamp.tv_usec = tv.tv_usec;
2648
2649                 ehp->bh_datalen = pkt->bpfp_total_length;
2650                 ehp->bh_hdrlen = hdrlen;
2651                 caplen = ehp->bh_caplen = totlen - hdrlen;
2652                 if (m == NULL) {
2653                         if (outbound) {
2654                                 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2655                         } else {
2656                                 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2657                         }
2658                 } else if (outbound) {
2659                         ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2660
2661                         /* only do lookups on non-raw INPCB */
2662                         if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID|
2663                             PKTF_FLOW_LOCALSRC|PKTF_FLOW_RAWSOCK)) ==
2664                             (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) &&
2665                             m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
2666                                 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
2667                                 ehp->bh_proto = m->m_pkthdr.pkt_proto;
2668                         }
2669                         ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
2670                         if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT)
2671                                 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
2672                         if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ)
2673                                 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
2674                         if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT)
2675                                 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
2676                         if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
2677                                 ehp->bh_unsent_bytes =
2678                                     m->m_pkthdr.bufstatus_if;
2679                                 ehp->bh_unsent_snd =
2680                                     m->m_pkthdr.bufstatus_sndbuf;
2681                         }
2682                 } else
2683                         ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2684                 payload = (u_char *)ehp + hdrlen;
2685         } else {
2686                 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
2687                 hp->bh_tstamp.tv_sec = tv.tv_sec;
2688                 hp->bh_tstamp.tv_usec = tv.tv_usec;
2689                 hp->bh_datalen = pkt->bpfp_total_length;
2690                 hp->bh_hdrlen = hdrlen;
2691                 caplen = hp->bh_caplen = totlen - hdrlen;
2692                 payload = (u_char *)hp + hdrlen;
2693         }
2694         /*
2695          * Copy the packet data into the store buffer and update its length.
2696          */
2697         copy_bpf_packet(pkt, payload, caplen);
2698         d->bd_slen = curlen + totlen;
2699         d->bd_scnt += 1;
2700
2701         if (do_wakeup)
2702                 bpf_wakeup(d);
2703 }
2704
2705 /*
2706  * Initialize all nonzero fields of a descriptor.
2707  */
2708 static int
2709 bpf_allocbufs(struct bpf_d *d)
2710 {
2711         if (d->bd_sbuf != NULL) {
2712                 FREE(d->bd_sbuf, M_DEVBUF);
2713                 d->bd_sbuf = NULL;
2714         }
2715         if (d->bd_hbuf != NULL) {
2716                 FREE(d->bd_hbuf, M_DEVBUF);
2717                 d->bd_hbuf = NULL;
2718         }
2719         if (d->bd_fbuf != NULL) {
2720                 FREE(d->bd_fbuf, M_DEVBUF);
2721                 d->bd_fbuf = NULL;
2722         }
2723
2724         d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2725         if (d->bd_fbuf == NULL)
2726                 return (ENOBUFS);
2727
2728         d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2729         if (d->bd_sbuf == NULL) {
2730                 FREE(d->bd_fbuf, M_DEVBUF);
2731                 d->bd_fbuf = NULL;
2732                 return (ENOBUFS);
2733         }
2734         d->bd_slen = 0;
2735         d->bd_hlen = 0;
2736         d->bd_scnt = 0;
2737         d->bd_hcnt = 0;
2738         return (0);
2739 }
2740
2741 /*
2742  * Free buffers currently in use by a descriptor.
2743  * Called on close.
2744  */
2745 static void
2746 bpf_freed(struct bpf_d *d)
2747 {
2748         /*
2749          * We don't need to lock out interrupts since this descriptor has
2750          * been detached from its interface and it yet hasn't been marked
2751          * free.
2752          */
2753         if (d->bd_hbuf_read)
2754                 panic("bpf buffer freed during read");
2755
2756         if (d->bd_sbuf != 0) {
2757                 FREE(d->bd_sbuf, M_DEVBUF);
2758                 if (d->bd_hbuf != 0)
2759                         FREE(d->bd_hbuf, M_DEVBUF);
2760                 if (d->bd_fbuf != 0)
2761                         FREE(d->bd_fbuf, M_DEVBUF);
2762         }
2763         if (d->bd_filter)
2764                 FREE((caddr_t)d->bd_filter, M_DEVBUF);
2765 }
2766
2767 /*
2768  * Attach an interface to bpf.  driverp is a pointer to a (struct bpf_if *)
2769  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
2770  * size of the link header (variable length headers not yet supported).
2771  */
2772 void
2773 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2774 {
2775         bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
2776 }
2777
2778 errno_t
2779 bpf_attach(
2780         ifnet_t                 ifp,
2781         u_int32_t               dlt,
2782         u_int32_t               hdrlen,
2783         bpf_send_func   send,
2784         bpf_tap_func    tap)
2785 {
2786         struct bpf_if *bp;
2787         struct bpf_if *bp_new;
2788         struct bpf_if *bp_before_first = NULL;
2789         struct bpf_if *bp_first = NULL;
2790         struct bpf_if *bp_last = NULL;
2791         boolean_t found;
2792
2793         bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF,
2794             M_WAIT | M_ZERO);
2795         if (bp_new == 0)
2796                 panic("bpfattach");
2797
2798         lck_mtx_lock(bpf_mlock);
2799
2800         /*
2801          * Check if this interface/dlt is already attached. Remember the
2802          * first and last attachment for this interface, as well as the
2803          * element before the first attachment.
2804          */
2805         found = FALSE;
2806         for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
2807                 if (bp->bif_ifp != ifp) {
2808                         if (bp_first != NULL) {
2809                                 /* no more elements for this interface */
2810                                 break;
2811                         }
2812                         bp_before_first = bp;
2813                 } else {
2814                         if (bp->bif_dlt == dlt) {
2815                                 found = TRUE;
2816                                 break;
2817                         }
2818                         if (bp_first == NULL) {
2819                                 bp_first = bp;
2820                         }
2821                         bp_last = bp;
2822                 }
2823         }
2824         if (found) {
2825                 lck_mtx_unlock(bpf_mlock);
2826                 printf("bpfattach - %s with dlt %d is already attached\n",
2827                         if_name(ifp), dlt);
2828                 FREE(bp_new, M_DEVBUF);
2829                 return EEXIST;
2830         }
2831
2832         bp_new->bif_ifp = ifp;
2833         bp_new->bif_dlt = dlt;
2834         bp_new->bif_send = send;
2835         bp_new->bif_tap = tap;
2836
2837         if (bp_first == NULL) {
2838                 /* No other entries for this ifp */
2839                 bp_new->bif_next = bpf_iflist;
2840                 bpf_iflist = bp_new;
2841         }
2842         else {
2843                 if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
2844                         /* Make this the first entry for this interface */
2845                         if (bp_before_first != NULL) {
2846                                 /*  point the previous to us */
2847                                 bp_before_first->bif_next = bp_new;
2848                         } else {
2849                                 /* we're the new head */
2850                                 bpf_iflist = bp_new;
2851                         }
2852                         bp_new->bif_next = bp_first;
2853                 } else {
2854                         /* Add this after the last entry for this interface */
2855                         bp_new->bif_next = bp_last->bif_next;
2856                         bp_last->bif_next = bp_new;
2857                 }
2858         }
2859
2860         /*
2861          * Compute the length of the bpf header.  This is not necessarily
2862          * equal to SIZEOF_BPF_HDR because we want to insert spacing such
2863          * that the network layer header begins on a longword boundary (for
2864          * performance reasons and to alleviate alignment restrictions).
2865          */
2866         bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
2867         bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
2868             sizeof(struct bpf_hdr_ext)) - hdrlen;
2869
2870         /* Take a reference on the interface */
2871         ifnet_reference(ifp);
2872
2873         lck_mtx_unlock(bpf_mlock);
2874
2875 #ifndef __APPLE__
2876         if (bootverbose)
2877                 printf("bpf: %s attached\n", if_name(ifp));
2878 #endif
2879
2880         return 0;
2881 }
2882
2883 /*
2884  * Detach bpf from an interface.  This involves detaching each descriptor
2885  * associated with the interface, and leaving bd_bif NULL.  Notify each
2886  * descriptor as it's detached so that any sleepers wake up and get
2887  * ENXIO.
2888  */
2889 void
2890 bpfdetach(struct ifnet *ifp)
2891 {
2892         struct bpf_if   *bp, *bp_prev, *bp_next;
2893         struct bpf_d    *d;
2894
2895         if (bpf_debug != 0)
2896                 printf("%s: %s\n", __func__, if_name(ifp));
2897
2898         lck_mtx_lock(bpf_mlock);
2899
2900         /*
2901          * Build the list of devices attached to that interface
2902          * that we need to free while keeping the lock to maintain
2903          * the integrity of the interface list
2904          */
2905         bp_prev = NULL;
2906         for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
2907                 bp_next = bp->bif_next;
2908
2909                 if (ifp != bp->bif_ifp) {
2910                         bp_prev = bp;
2911                         continue;
2912                 }
2913                 /* Unlink from the interface list */
2914                 if (bp_prev)
2915                         bp_prev->bif_next = bp->bif_next;
2916                 else
2917                         bpf_iflist = bp->bif_next;
2918
2919                 /* Detach the devices attached to the interface */
2920                 while ((d = bp->bif_dlist) != NULL) {
2921                         /*
2922                          * Take an extra reference to prevent the device
2923                          * from being freed when bpf_detachd() releases
2924                          * the reference for the interface list
2925                          */
2926                         bpf_acquire_d(d);
2927                         bpf_detachd(d, 0);
2928                         bpf_wakeup(d);
2929                         bpf_release_d(d);
2930                 }
2931                 ifnet_release(ifp);
2932         }
2933
2934         lck_mtx_unlock(bpf_mlock);
2935 }
2936
2937 void
2938 bpf_init(__unused void *unused)
2939 {
2940 #ifdef __APPLE__
2941         int     i;
2942         int     maj;
2943
2944         if (bpf_devsw_installed == 0) {
2945                 bpf_devsw_installed = 1;
2946                 bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
2947                 bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
2948                 bpf_mlock_attr = lck_attr_alloc_init();
2949                 lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
2950                 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
2951                 if (maj == -1) {
2952                         if (bpf_mlock_attr)
2953                                 lck_attr_free(bpf_mlock_attr);
2954                         if (bpf_mlock_grp)
2955                                 lck_grp_free(bpf_mlock_grp);
2956                         if (bpf_mlock_grp_attr)
2957                                 lck_grp_attr_free(bpf_mlock_grp_attr);
2958
2959                         bpf_mlock = NULL;
2960                         bpf_mlock_attr = NULL;
2961                         bpf_mlock_grp = NULL;
2962                         bpf_mlock_grp_attr = NULL;
2963                         bpf_devsw_installed = 0;
2964                         printf("bpf_init: failed to allocate a major number!\n");
2965                         return;
2966                 }
2967
2968                 for (i = 0 ; i < NBPFILTER; i++)
2969                         bpf_make_dev_t(maj);
2970         }
2971 #else
2972         cdevsw_add(&bpf_cdevsw);
2973 #endif
2974 }
2975
2976 #ifndef __APPLE__
2977 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
2978 #endif
2979
2980 #if CONFIG_MACF_NET
2981 struct label *
2982 mac_bpfdesc_label_get(struct bpf_d *d)
2983 {
2984
2985         return (d->bd_label);
2986 }
2987
2988 void
2989 mac_bpfdesc_label_set(struct bpf_d *d, struct label *label)
2990 {
2991
2992         d->bd_label = label;
2993 }
2994 #endif