bsd/net/bpf.c

   1 /*
   2  * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1990, 1991, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  *
  32  * This code is derived from the Stanford/CMU enet packet filter,
  33  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  34  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  35  * Berkeley Laboratory.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)bpf.c       8.2 (Berkeley) 3/28/94
  66  *
  67  * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
  68  */
  69 /*
  70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  71  * support for mandatory and extensible security protections.  This notice
  72  * is included in support of clause 2.2 (b) of the Apple Public License,
  73  * Version 2.0.
  74  */
  75
  76 #include "bpf.h"
  77
  78 #ifndef __GNUC__
  79 #define inline
  80 #else
  81 #define inline __inline
  82 #endif
  83
  84 #include <sys/param.h>
  85 #include <sys/systm.h>
  86 #include <sys/conf.h>
  87 #include <sys/malloc.h>
  88 #include <sys/mbuf.h>
  89 #include <sys/time.h>
  90 #include <sys/proc.h>
  91 #include <sys/signalvar.h>
  92 #include <sys/filio.h>
  93 #include <sys/sockio.h>
  94 #include <sys/ttycom.h>
  95 #include <sys/filedesc.h>
  96 #include <sys/uio_internal.h>
  97 #include <sys/file_internal.h>
  98 #include <sys/event.h>
  99
 100 #include <sys/poll.h>
 101
 102 #include <sys/socket.h>
 103 #include <sys/socketvar.h>
 104 #include <sys/vnode.h>
 105
 106 #include <net/if.h>
 107 #include <net/bpf.h>
 108 #include <net/bpfdesc.h>
 109
 110 #include <netinet/in.h>
 111 #include <netinet/in_pcb.h>
 112 #include <netinet/in_var.h>
 113 #include <netinet/ip_var.h>
 114 #include <netinet/tcp.h>
 115 #include <netinet/tcp_var.h>
 116 #include <netinet/udp.h>
 117 #include <netinet/udp_var.h>
 118 #include <netinet/if_ether.h>
 119 #include <sys/kernel.h>
 120 #include <sys/sysctl.h>
 121 #include <net/firewire.h>
 122
 123 #include <miscfs/devfs/devfs.h>
 124 #include <net/dlil.h>
 125 #include <net/pktap.h>
 126
 127 #include <kern/locks.h>
 128 #include <kern/thread_call.h>
 129 #include <libkern/section_keywords.h>
 130
 131 #if CONFIG_MACF_NET
 132 #include <security/mac_framework.h>
 133 #endif /* MAC_NET */
 134
 135 extern int tvtohz(struct timeval *);
 136
 137 #define BPF_BUFSIZE 4096
 138 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
 139
 140
 141 #define PRINET  26                      /* interruptible */
 142
 143 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
 144
 145 /*
 146  * The default read buffer size is patchable.
 147  */
 148 static unsigned int bpf_bufsize = BPF_BUFSIZE;
 149 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
 150         &bpf_bufsize, 0, "");
 151 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
 152 SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
 153         &bpf_maxbufsize, 0, "");
 154 static unsigned int bpf_maxdevices = 256;
 155 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
 156         &bpf_maxdevices, 0, "");
 157 /*
 158  * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
 159  * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
 160  * explicitly to be able to use DLT_PKTAP.
 161  */
 162 #if CONFIG_EMBEDDED
 163 static unsigned int bpf_wantpktap = 1;
 164 #else
 165 static unsigned int bpf_wantpktap = 0;
 166 #endif
 167 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
 168         &bpf_wantpktap, 0, "");
 169
 170 static int bpf_debug = 0;
 171 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
 172         &bpf_debug, 0, "");
 173
 174 /*
 175  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
 176  *  bpf_dtab holds pointer to the descriptors, indexed by minor device #
 177  */
 178 static struct bpf_if    *bpf_iflist;
 179 #ifdef __APPLE__
 180 /*
 181  * BSD now stores the bpf_d in the dev_t which is a struct
 182  * on their system. Our dev_t is an int, so we still store
 183  * the bpf_d in a separate table indexed by minor device #.
 184  *
 185  * The value stored in bpf_dtab[n] represent three states:
 186  *  0: device not opened
 187  *  1: device opening or closing
 188  *  other: device <n> opened with pointer to storage
 189  */
 190 static struct bpf_d     **bpf_dtab = NULL;
 191 static unsigned int bpf_dtab_size = 0;
 192 static unsigned int     nbpfilter = 0;
 193
 194 decl_lck_mtx_data(static, bpf_mlock_data);
 195 static lck_mtx_t                *bpf_mlock = &bpf_mlock_data;
 196 static lck_grp_t                *bpf_mlock_grp;
 197 static lck_grp_attr_t   *bpf_mlock_grp_attr;
 198 static lck_attr_t               *bpf_mlock_attr;
 199
 200 #endif /* __APPLE__ */
 201
 202 static int      bpf_allocbufs(struct bpf_d *);
 203 static errno_t  bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
 204 static int      bpf_detachd(struct bpf_d *d, int);
 205 static void     bpf_freed(struct bpf_d *);
 206 static int      bpf_movein(struct uio *, int,
 207                     struct mbuf **, struct sockaddr *, int *);
 208 static int      bpf_setif(struct bpf_d *, ifnet_t ifp);
 209 static void     bpf_timed_out(void *, void *);
 210 static void     bpf_wakeup(struct bpf_d *);
 211 static void     catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
 212 static void     reset_d(struct bpf_d *);
 213 static int      bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
 214 static int      bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
 215 static int      bpf_setdlt(struct bpf_d *, u_int);
 216 static int      bpf_set_traffic_class(struct bpf_d *, int);
 217 static void     bpf_set_packet_service_class(struct mbuf *, int);
 218
 219 static void     bpf_acquire_d(struct bpf_d *);
 220 static void     bpf_release_d(struct bpf_d *);
 221
 222 static  int bpf_devsw_installed;
 223
 224 void bpf_init(void *unused);
 225 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
 226
 227 /*
 228  * Darwin differs from BSD here, the following are static
 229  * on BSD and not static on Darwin.
 230  */
 231         d_open_t            bpfopen;
 232         d_close_t           bpfclose;
 233         d_read_t            bpfread;
 234         d_write_t           bpfwrite;
 235         ioctl_fcn_t         bpfioctl;
 236         select_fcn_t        bpfselect;
 237
 238
 239 /* Darwin's cdevsw struct differs slightly from BSDs */
 240 #define CDEV_MAJOR 23
 241 static struct cdevsw bpf_cdevsw = {
 242         /* open */          bpfopen,
 243         /* close */         bpfclose,
 244         /* read */          bpfread,
 245         /* write */         bpfwrite,
 246         /* ioctl */         bpfioctl,
 247         /* stop */          eno_stop,
 248         /* reset */         eno_reset,
 249         /* tty */           NULL,
 250         /* select */        bpfselect,
 251         /* mmap */          eno_mmap,
 252         /* strategy*/       eno_strat,
 253         /* getc */          eno_getc,
 254         /* putc */          eno_putc,
 255         /* type */          0
 256 };
 257
 258 #define SOCKADDR_HDR_LEN           offsetof(struct sockaddr, sa_data)
 259
 260 static int
 261 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *sockp, int *datlen)
 262 {
 263         struct mbuf *m;
 264         int error;
 265         int len;
 266         uint8_t sa_family;
 267         int hlen;
 268
 269         switch (linktype) {
 270
 271 #if SLIP
 272         case DLT_SLIP:
 273                 sa_family = AF_INET;
 274                 hlen = 0;
 275                 break;
 276 #endif /* SLIP */
 277
 278         case DLT_EN10MB:
 279                 sa_family = AF_UNSPEC;
 280                 /* XXX Would MAXLINKHDR be better? */
 281                 hlen = sizeof(struct ether_header);
 282                 break;
 283
 284 #if FDDI
 285         case DLT_FDDI:
 286         #if defined(__FreeBSD__) || defined(__bsdi__)
 287                 sa_family = AF_IMPLINK;
 288                 hlen = 0;
 289         #else
 290                 sa_family = AF_UNSPEC;
 291                 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
 292                 hlen = 24;
 293         #endif
 294                 break;
 295 #endif /* FDDI */
 296
 297         case DLT_RAW:
 298         case DLT_NULL:
 299                 sa_family = AF_UNSPEC;
 300                 hlen = 0;
 301                 break;
 302
 303         #ifdef __FreeBSD__
 304         case DLT_ATM_RFC1483:
 305                 /*
 306                  * en atm driver requires 4-byte atm pseudo header.
 307                  * though it isn't standard, vpi:vci needs to be
 308                  * specified anyway.
 309                  */
 310                 sa_family = AF_UNSPEC;
 311                 hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
 312                 break;
 313         #endif
 314
 315         case DLT_PPP:
 316                 sa_family = AF_UNSPEC;
 317                 hlen = 4;       /* This should match PPP_HDRLEN */
 318                 break;
 319
 320         case DLT_APPLE_IP_OVER_IEEE1394:
 321                 sa_family = AF_UNSPEC;
 322                 hlen = sizeof(struct firewire_header);
 323                 break;
 324
 325         case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
 326                 sa_family = AF_IEEE80211;
 327                 hlen = 0;
 328                 break;
 329
 330         case DLT_IEEE802_11_RADIO:
 331                 sa_family = AF_IEEE80211;
 332                 hlen = 0;
 333                 break;
 334
 335         default:
 336                 return (EIO);
 337         }
 338
 339         // LP64todo - fix this!
 340         len = uio_resid(uio);
 341         *datlen = len - hlen;
 342         if ((unsigned)len > MCLBYTES)
 343                 return (EIO);
 344
 345         if (sockp) {
 346                 /*
 347                  * Build a sockaddr based on the data link layer type.
 348                  * We do this at this level because the ethernet header
 349                  * is copied directly into the data field of the sockaddr.
 350                  * In the case of SLIP, there is no header and the packet
 351                  * is forwarded as is.
 352                  * Also, we are careful to leave room at the front of the mbuf
 353                  * for the link level header.
 354                  */
 355                 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
 356                         return (EIO);
 357                 }
 358                 sockp->sa_family = sa_family;
 359         } else {
 360                 /*
 361                  * We're directly sending the packet data supplied by
 362                  * the user; we don't need to make room for the link
 363                  * header, and don't need the header length value any
 364                  * more, so set it to 0.
 365                  */
 366                 hlen = 0;
 367         }
 368
 369         MGETHDR(m, M_WAIT, MT_DATA);
 370         if (m == 0)
 371                 return (ENOBUFS);
 372         if ((unsigned)len > MHLEN) {
 373                 MCLGET(m, M_WAIT);
 374                 if ((m->m_flags & M_EXT) == 0) {
 375                         error = ENOBUFS;
 376                         goto bad;
 377                 }
 378         }
 379         m->m_pkthdr.len = m->m_len = len;
 380         m->m_pkthdr.rcvif = NULL;
 381         *mp = m;
 382
 383         /*
 384          * Make room for link header.
 385          */
 386         if (hlen != 0) {
 387                 m->m_pkthdr.len -= hlen;
 388                 m->m_len -= hlen;
 389                 m->m_data += hlen; /* XXX */
 390                 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
 391                 if (error)
 392                         goto bad;
 393         }
 394         error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
 395         if (error)
 396                 goto bad;
 397
 398         /* Check for multicast destination */
 399         switch (linktype) {
 400                 case DLT_EN10MB: {
 401                         struct ether_header *eh = mtod(m, struct ether_header *);
 402
 403                         if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 404                                 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0)
 405                                         m->m_flags |= M_BCAST;
 406                                 else
 407                                         m->m_flags |= M_MCAST;
 408                         }
 409                         break;
 410                 }
 411         }
 412
 413         return 0;
 414  bad:
 415         m_freem(m);
 416         return (error);
 417 }
 418
 419 #ifdef __APPLE__
 420
 421 /*
 422  * The dynamic addition of a new device node must block all processes that
 423  * are opening the last device so that no process will get an unexpected
 424  * ENOENT
 425  */
 426 static void
 427 bpf_make_dev_t(int maj)
 428 {
 429         static int              bpf_growing = 0;
 430         unsigned int    cur_size = nbpfilter, i;
 431
 432         if (nbpfilter >= bpf_maxdevices)
 433                 return;
 434
 435         while (bpf_growing) {
 436                 /* Wait until new device has been created */
 437                 (void)tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
 438         }
 439         if (nbpfilter > cur_size) {
 440                 /* other thread grew it already */
 441                 return;
 442         }
 443         bpf_growing = 1;
 444
 445         /* need to grow bpf_dtab first */
 446         if (nbpfilter == bpf_dtab_size) {
 447                 int new_dtab_size;
 448                 struct bpf_d **new_dtab = NULL;
 449                 struct bpf_d **old_dtab = NULL;
 450
 451                 new_dtab_size = bpf_dtab_size + NBPFILTER;
 452                 new_dtab = (struct bpf_d **)_MALLOC(sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
 453                 if (new_dtab == 0) {
 454                         printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
 455                         goto done;
 456                 }
 457                 if (bpf_dtab) {
 458                         bcopy(bpf_dtab, new_dtab,
 459                                   sizeof(struct bpf_d *) * bpf_dtab_size);
 460                 }
 461                 bzero(new_dtab + bpf_dtab_size,
 462                           sizeof(struct bpf_d *) * NBPFILTER);
 463                 old_dtab = bpf_dtab;
 464                 bpf_dtab = new_dtab;
 465                 bpf_dtab_size = new_dtab_size;
 466                 if (old_dtab != NULL)
 467                         _FREE(old_dtab, M_DEVBUF);
 468         }
 469         i = nbpfilter++;
 470         (void) devfs_make_node(makedev(maj, i),
 471                                 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
 472                                 "bpf%d", i);
 473 done:
 474         bpf_growing = 0;
 475         wakeup((caddr_t)&bpf_growing);
 476 }
 477
 478 #endif
 479
 480 /*
 481  * Attach file to the bpf interface, i.e. make d listen on bp.
 482  */
 483 static errno_t
 484 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 485 {
 486         int first = bp->bif_dlist == NULL;
 487         int     error = 0;
 488
 489         /*
 490          * Point d at bp, and add d to the interface's list of listeners.
 491          * Finally, point the driver's bpf cookie at the interface so
 492          * it will divert packets to bpf.
 493          */
 494         d->bd_bif = bp;
 495         d->bd_next = bp->bif_dlist;
 496         bp->bif_dlist = d;
 497
 498         /*
 499          * Take a reference on the device even if an error is returned
 500          * because we keep the device in the interface's list of listeners
 501          */
 502         bpf_acquire_d(d);
 503
 504         if (first) {
 505                 /* Find the default bpf entry for this ifp */
 506                 if (bp->bif_ifp->if_bpf == NULL) {
 507                         struct bpf_if   *tmp, *primary = NULL;
 508
 509                         for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
 510                                 if (tmp->bif_ifp == bp->bif_ifp) {
 511                                         primary = tmp;
 512                                         break;
 513                                 }
 514                         }
 515                         bp->bif_ifp->if_bpf = primary;
 516                 }
 517                 /* Only call dlil_set_bpf_tap for primary dlt */
 518                 if (bp->bif_ifp->if_bpf == bp)
 519                         dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT, bpf_tap_callback);
 520
 521                 if (bp->bif_tap != NULL)
 522                         error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT);
 523         }
 524
 525         /*
 526          * Reset the detach flags in case we previously detached an interface
 527          */
 528         d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
 529
 530         if (bp->bif_dlt == DLT_PKTAP) {
 531                 d->bd_flags |= BPF_FINALIZE_PKTAP;
 532         } else {
 533                 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
 534         }
 535         return error;
 536 }
 537
 538 /*
 539  * Detach a file from its interface.
 540  *
 541  * Return 1 if was closed by some thread, 0 otherwise
 542  */
 543 static int
 544 bpf_detachd(struct bpf_d *d, int closing)
 545 {
 546         struct bpf_d **p;
 547         struct bpf_if *bp;
 548         struct ifnet  *ifp;
 549
 550         int bpf_closed = d->bd_flags & BPF_CLOSING;
 551         /*
 552          * Some other thread already detached
 553          */
 554         if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0)
 555                 goto done;
 556         /*
 557          * This thread is doing the detach
 558          */
 559         d->bd_flags |= BPF_DETACHING;
 560
 561         ifp = d->bd_bif->bif_ifp;
 562         bp = d->bd_bif;
 563
 564         if (bpf_debug != 0)
 565                 printf("%s: %llx %s%s\n",
 566                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
 567                     if_name(ifp), closing ? " closing" : "");
 568
 569         /* Remove d from the interface's descriptor list. */
 570         p = &bp->bif_dlist;
 571         while (*p != d) {
 572                 p = &(*p)->bd_next;
 573                 if (*p == 0)
 574                         panic("bpf_detachd: descriptor not in list");
 575         }
 576         *p = (*p)->bd_next;
 577         if (bp->bif_dlist == 0) {
 578                 /*
 579                  * Let the driver know that there are no more listeners.
 580                  */
 581                 /* Only call dlil_set_bpf_tap for primary dlt */
 582                 if (bp->bif_ifp->if_bpf == bp)
 583                         dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
 584                 if (bp->bif_tap)
 585                         bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
 586
 587                 for (bp = bpf_iflist; bp; bp = bp->bif_next)
 588                         if (bp->bif_ifp == ifp && bp->bif_dlist != 0)
 589                                 break;
 590                 if (bp == NULL)
 591                         ifp->if_bpf = NULL;
 592         }
 593         d->bd_bif = NULL;
 594         /*
 595          * Check if this descriptor had requested promiscuous mode.
 596          * If so, turn it off.
 597          */
 598         if (d->bd_promisc) {
 599                 d->bd_promisc = 0;
 600                 lck_mtx_unlock(bpf_mlock);
 601                 if (ifnet_set_promiscuous(ifp, 0)) {
 602                         /*
 603                          * Something is really wrong if we were able to put
 604                          * the driver into promiscuous mode, but can't
 605                          * take it out.
 606                          * Most likely the network interface is gone.
 607                          */
 608                         printf("%s: ifnet_set_promiscuous failed\n", __func__);
 609                 }
 610                 lck_mtx_lock(bpf_mlock);
 611         }
 612
 613         /*
 614          * Wake up other thread that are waiting for this thread to finish
 615          * detaching
 616          */
 617         d->bd_flags &= ~BPF_DETACHING;
 618         d->bd_flags |= BPF_DETACHED;
 619
 620         /* Refresh the local variable as d could have been modified */
 621         bpf_closed = d->bd_flags & BPF_CLOSING;
 622         /*
 623          * Note that We've kept the reference because we may have dropped
 624          * the lock when turning off promiscuous mode
 625          */
 626         bpf_release_d(d);
 627
 628 done:
 629         /*
 630          * When closing makes sure no other thread refer to the bpf_d
 631          */
 632         if (bpf_debug != 0)
 633                 printf("%s: %llx done\n",
 634                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
 635         /*
 636          * Let the caller know the bpf_d is closed
 637          */
 638         if (bpf_closed)
 639                 return (1);
 640         else
 641                 return (0);
 642 }
 643
 644
 645 /*
 646  * Start asynchronous timer, if necessary.
 647  * Must be called with bpf_mlock held.
 648  */
 649 static void
 650 bpf_start_timer(struct bpf_d *d)
 651 {
 652         uint64_t deadline;
 653         struct timeval tv;
 654
 655         if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
 656                 tv.tv_sec = d->bd_rtout / hz;
 657                 tv.tv_usec = (d->bd_rtout % hz) * tick;
 658
 659                 clock_interval_to_deadline(
 660                     (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
 661                     NSEC_PER_USEC, &deadline);
 662                 /*
 663                  * The state is BPF_IDLE, so the timer hasn't
 664                  * been started yet, and hasn't gone off yet;
 665                  * there is no thread call scheduled, so this
 666                  * won't change the schedule.
 667                  *
 668                  * XXX - what if, by the time it gets entered,
 669                  * the deadline has already passed?
 670                  */
 671                 thread_call_enter_delayed(d->bd_thread_call, deadline);
 672                 d->bd_state = BPF_WAITING;
 673         }
 674 }
 675
 676 /*
 677  * Cancel asynchronous timer.
 678  * Must be called with bpf_mlock held.
 679  */
 680 static boolean_t
 681 bpf_stop_timer(struct bpf_d *d)
 682 {
 683         /*
 684          * If the timer has already gone off, this does nothing.
 685          * Our caller is expected to set d->bd_state to BPF_IDLE,
 686          * with the bpf_mlock, after we are called. bpf_timed_out()
 687          * also grabs bpf_mlock, so, if the timer has gone off and
 688          * bpf_timed_out() hasn't finished, it's waiting for the
 689          * lock; when this thread releases the lock, it will
 690          * find the state is BPF_IDLE, and just release the
 691          * lock and return.
 692          */
 693         return (thread_call_cancel(d->bd_thread_call));
 694 }
 695
 696 void
 697 bpf_acquire_d(struct bpf_d *d)
 698 {
 699         void *lr_saved =  __builtin_return_address(0);
 700
 701         LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
 702
 703         d->bd_refcnt += 1;
 704
 705         d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
 706         d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
 707 }
 708
 709 void
 710 bpf_release_d(struct bpf_d *d)
 711 {
 712         void *lr_saved =  __builtin_return_address(0);
 713
 714         LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
 715
 716         if (d->bd_refcnt <= 0)
 717                 panic("%s: %p refcnt <= 0", __func__, d);
 718
 719         d->bd_refcnt -= 1;
 720
 721         d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
 722         d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
 723
 724         if (d->bd_refcnt == 0) {
 725                 /* Assert the device is detached */
 726                 if ((d->bd_flags & BPF_DETACHED) == 0)
 727                         panic("%s: %p BPF_DETACHED not set", __func__, d);
 728
 729                 _FREE(d, M_DEVBUF);
 730         }
 731 }
 732
 733 /*
 734  * Open ethernet device.  Returns ENXIO for illegal minor device number,
 735  * EBUSY if file is open by another process.
 736  */
 737 /* ARGSUSED */
 738 int
 739 bpfopen(dev_t dev, int flags, __unused int fmt,
 740         __unused struct proc *p)
 741 {
 742         struct bpf_d *d;
 743
 744         lck_mtx_lock(bpf_mlock);
 745         if ((unsigned int) minor(dev) >= nbpfilter) {
 746                 lck_mtx_unlock(bpf_mlock);
 747                 return (ENXIO);
 748         }
 749         /*
 750          * New device nodes are created on demand when opening the last one.
 751          * The programming model is for processes to loop on the minor starting at 0
 752          * as long as EBUSY is returned. The loop stops when either the open succeeds or
 753          * an error other that EBUSY is returned. That means that bpf_make_dev_t() must
 754          * block all processes that are opening the last  node. If not all
 755          * processes are blocked, they could unexpectedly get ENOENT and abort their
 756          * opening loop.
 757          */
 758         if ((unsigned int) minor(dev) == (nbpfilter - 1))
 759                 bpf_make_dev_t(major(dev));
 760
 761         /*
 762          * Each minor can be opened by only one process.  If the requested
 763          * minor is in use, return EBUSY.
 764          *
 765          * Important: bpfopen() and bpfclose() have to check and set the status of a device
 766          * in the same lockin context otherwise the device may be leaked because the vnode use count
 767          * will be unpextectly greater than 1 when close() is called.
 768          */
 769         if (bpf_dtab[minor(dev)] == 0) {
 770                 bpf_dtab[minor(dev)] = (void *)1;       /* Mark opening */
 771         } else {
 772                 lck_mtx_unlock(bpf_mlock);
 773                 return (EBUSY);
 774         }
 775         d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
 776             M_WAIT | M_ZERO);
 777         if (d == NULL) {
 778                 /* this really is a catastrophic failure */
 779                 printf("bpfopen: malloc bpf_d failed\n");
 780                 bpf_dtab[minor(dev)] = NULL;
 781                 lck_mtx_unlock(bpf_mlock);
 782                 return ENOMEM;
 783         }
 784
 785         /* Mark "in use" and do most initialization. */
 786         bpf_acquire_d(d);
 787         d->bd_bufsize = bpf_bufsize;
 788         d->bd_sig = SIGIO;
 789         d->bd_seesent = 1;
 790         d->bd_oflags = flags;
 791         d->bd_state = BPF_IDLE;
 792         d->bd_traffic_class = SO_TC_BE;
 793         d->bd_flags |= BPF_DETACHED;
 794         if (bpf_wantpktap)
 795                 d->bd_flags |= BPF_WANT_PKTAP;
 796         else
 797                 d->bd_flags &= ~BPF_WANT_PKTAP;
 798         d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
 799         if (d->bd_thread_call == NULL) {
 800                 printf("bpfopen: malloc thread call failed\n");
 801                 bpf_dtab[minor(dev)] = NULL;
 802                 bpf_release_d(d);
 803                 lck_mtx_unlock(bpf_mlock);
 804
 805                 return (ENOMEM);
 806         }
 807 #if CONFIG_MACF_NET
 808         mac_bpfdesc_label_init(d);
 809         mac_bpfdesc_label_associate(kauth_cred_get(), d);
 810 #endif
 811         bpf_dtab[minor(dev)] = d;                               /* Mark opened */
 812         lck_mtx_unlock(bpf_mlock);
 813
 814         return (0);
 815 }
 816
 817 /*
 818  * Close the descriptor by detaching it from its interface,
 819  * deallocating its buffers, and marking it free.
 820  */
 821 /* ARGSUSED */
 822 int
 823 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
 824          __unused struct proc *p)
 825 {
 826         struct bpf_d *d;
 827
 828         /* Take BPF lock to ensure no other thread is using the device */
 829         lck_mtx_lock(bpf_mlock);
 830
 831         d = bpf_dtab[minor(dev)];
 832         if (d == 0 || d == (void *)1) {
 833                 lck_mtx_unlock(bpf_mlock);
 834                 return (ENXIO);
 835         }
 836
 837         /*
 838          * Other threads may call bpd_detachd() if we drop the bpf_mlock
 839          */
 840         d->bd_flags |= BPF_CLOSING;
 841
 842         if (bpf_debug != 0)
 843                 printf("%s: %llx\n",
 844                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
 845
 846         bpf_dtab[minor(dev)] = (void *)1;               /* Mark closing */
 847
 848         /*
 849          * Deal with any in-progress timeouts.
 850          */
 851         switch (d->bd_state) {
 852                 case BPF_IDLE:
 853                         /*
 854                          * Not waiting for a timeout, and no timeout happened.
 855                          */
 856                         break;
 857
 858                 case BPF_WAITING:
 859                         /*
 860                          * Waiting for a timeout.
 861                          * Cancel any timer that has yet to go off,
 862                          * and mark the state as "closing".
 863                          * Then drop the lock to allow any timers that
 864                          * *have* gone off to run to completion, and wait
 865                          * for them to finish.
 866                          */
 867                         if (!bpf_stop_timer(d)) {
 868                                 /*
 869                                  * There was no pending call, so the call must
 870                                  * have been in progress. Wait for the call to
 871                                  * complete; we have to drop the lock while
 872                                  * waiting. to let the in-progrss call complete
 873                                  */
 874                                 d->bd_state = BPF_DRAINING;
 875                                 while (d->bd_state == BPF_DRAINING)
 876                                         msleep((caddr_t)d, bpf_mlock, PRINET,
 877                                                         "bpfdraining", NULL);
 878                         }
 879                         d->bd_state = BPF_IDLE;
 880                         break;
 881
 882                 case BPF_TIMED_OUT:
 883                         /*
 884                          * Timer went off, and the timeout routine finished.
 885                          */
 886                         d->bd_state = BPF_IDLE;
 887                         break;
 888
 889                 case BPF_DRAINING:
 890                         /*
 891                          * Another thread is blocked on a close waiting for
 892                          * a timeout to finish.
 893                          * This "shouldn't happen", as the first thread to enter
 894                          * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
 895                          * all subsequent threads should see that and fail with
 896                          * ENXIO.
 897                          */
 898                         panic("Two threads blocked in a BPF close");
 899                         break;
 900         }
 901
 902         if (d->bd_bif)
 903                 bpf_detachd(d, 1);
 904         selthreadclear(&d->bd_sel);
 905 #if CONFIG_MACF_NET
 906         mac_bpfdesc_label_destroy(d);
 907 #endif
 908         thread_call_free(d->bd_thread_call);
 909
 910         while (d->bd_hbuf_read)
 911                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
 912
 913         bpf_freed(d);
 914
 915         /* Mark free in same context as bpfopen comes to check */
 916         bpf_dtab[minor(dev)] = NULL;                    /* Mark closed */
 917
 918         bpf_release_d(d);
 919
 920         lck_mtx_unlock(bpf_mlock);
 921
 922         return (0);
 923 }
 924
 925
 926 #define BPF_SLEEP bpf_sleep
 927
 928 static int
 929 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
 930 {
 931         u_int64_t abstime = 0;
 932
 933         if(timo)
 934                 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
 935
 936         return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
 937 }
 938
 939 /*
 940  * Rotate the packet buffers in descriptor d.  Move the store buffer
 941  * into the hold slot, and the free buffer into the store slot.
 942  * Zero the length of the new store buffer.
 943  */
 944 #define ROTATE_BUFFERS(d) \
 945         if (d->bd_hbuf_read) \
 946                 panic("rotating bpf buffers during read"); \
 947         (d)->bd_hbuf = (d)->bd_sbuf; \
 948         (d)->bd_hlen = (d)->bd_slen; \
 949         (d)->bd_hcnt = (d)->bd_scnt; \
 950         (d)->bd_sbuf = (d)->bd_fbuf; \
 951         (d)->bd_slen = 0; \
 952         (d)->bd_scnt = 0; \
 953         (d)->bd_fbuf = NULL;
 954 /*
 955  *  bpfread - read next chunk of packets from buffers
 956  */
 957 int
 958 bpfread(dev_t dev, struct uio *uio, int ioflag)
 959 {
 960         struct bpf_d *d;
 961         caddr_t hbuf;
 962         int timed_out, hbuf_len;
 963         int error;
 964         int flags;
 965
 966         lck_mtx_lock(bpf_mlock);
 967
 968         d = bpf_dtab[minor(dev)];
 969         if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
 970                 lck_mtx_unlock(bpf_mlock);
 971                 return (ENXIO);
 972         }
 973
 974         bpf_acquire_d(d);
 975
 976         /*
 977          * Restrict application to use a buffer the same size as
 978          * as kernel buffers.
 979          */
 980         if (uio_resid(uio) != d->bd_bufsize) {
 981                 bpf_release_d(d);
 982                 lck_mtx_unlock(bpf_mlock);
 983                 return (EINVAL);
 984         }
 985
 986         if (d->bd_state == BPF_WAITING)
 987                 bpf_stop_timer(d);
 988
 989         timed_out = (d->bd_state == BPF_TIMED_OUT);
 990         d->bd_state = BPF_IDLE;
 991
 992         while (d->bd_hbuf_read)
 993                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
 994
 995         if ((d->bd_flags & BPF_CLOSING) != 0) {
 996                 bpf_release_d(d);
 997                 lck_mtx_unlock(bpf_mlock);
 998                 return (ENXIO);
 999         }
1000         /*
1001          * If the hold buffer is empty, then do a timed sleep, which
1002          * ends when the timeout expires or when enough packets
1003          * have arrived to fill the store buffer.
1004          */
1005         while (d->bd_hbuf == 0) {
1006                 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY))
1007                         && d->bd_slen != 0) {
1008                         /*
1009                          * We're in immediate mode, or are reading
1010                          * in non-blocking mode, or a timer was
1011                          * started before the read (e.g., by select()
1012                          * or poll()) and has expired and a packet(s)
1013                          * either arrived since the previous
1014                          * read or arrived while we were asleep.
1015                          * Rotate the buffers and return what's here.
1016                          */
1017                         ROTATE_BUFFERS(d);
1018                         break;
1019                 }
1020
1021                 /*
1022                  * No data is available, check to see if the bpf device
1023                  * is still pointed at a real interface.  If not, return
1024                  * ENXIO so that the userland process knows to rebind
1025                  * it before using it again.
1026                  */
1027                 if (d->bd_bif == NULL) {
1028                         bpf_release_d(d);
1029                         lck_mtx_unlock(bpf_mlock);
1030                         return (ENXIO);
1031                 }
1032                 if (ioflag & IO_NDELAY) {
1033                         bpf_release_d(d);
1034                         lck_mtx_unlock(bpf_mlock);
1035                         return (EWOULDBLOCK);
1036                 }
1037                 error = BPF_SLEEP(d, PRINET|PCATCH, "bpf",
1038                                   d->bd_rtout);
1039                 /*
1040                  * Make sure device is still opened
1041                  */
1042                 if ((d->bd_flags & BPF_CLOSING) != 0) {
1043                         bpf_release_d(d);
1044                         lck_mtx_unlock(bpf_mlock);
1045                         return (ENXIO);
1046                 }
1047
1048                 while (d->bd_hbuf_read)
1049                         msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1050
1051                 if ((d->bd_flags & BPF_CLOSING) != 0) {
1052                         bpf_release_d(d);
1053                         lck_mtx_unlock(bpf_mlock);
1054                         return (ENXIO);
1055                 }
1056
1057                 if (error == EINTR || error == ERESTART) {
1058                         if (d->bd_hbuf != NULL) {
1059                                 /*
1060                                  * Because we msleep, the hold buffer might
1061                                  * be filled when we wake up.  Avoid rotating
1062                                  * in this case.
1063                                  */
1064                                 break;
1065                         }
1066                         if (d->bd_slen != 0) {
1067                                 /*
1068                                  * Sometimes we may be interrupted often and
1069                                  * the sleep above will not timeout.
1070                                  * Regardless, we should rotate the buffers
1071                                  * if there's any new data pending and
1072                                  * return it.
1073                                  */
1074                                 ROTATE_BUFFERS(d);
1075                                 break;
1076                         }
1077                         bpf_release_d(d);
1078                         lck_mtx_unlock(bpf_mlock);
1079                         if (error == ERESTART) {
1080                                 printf("%s: %llx ERESTART to EINTR\n",
1081                                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
1082                                 error = EINTR;
1083                         }
1084                         return (error);
1085                 }
1086                 if (error == EWOULDBLOCK) {
1087                         /*
1088                          * On a timeout, return what's in the buffer,
1089                          * which may be nothing.  If there is something
1090                          * in the store buffer, we can rotate the buffers.
1091                          */
1092                         if (d->bd_hbuf)
1093                                 /*
1094                                  * We filled up the buffer in between
1095                                  * getting the timeout and arriving
1096                                  * here, so we don't need to rotate.
1097                                  */
1098                                 break;
1099
1100                         if (d->bd_slen == 0) {
1101                                 bpf_release_d(d);
1102                                 lck_mtx_unlock(bpf_mlock);
1103                                 return (0);
1104                         }
1105                         ROTATE_BUFFERS(d);
1106                         break;
1107                 }
1108         }
1109         /*
1110          * At this point, we know we have something in the hold slot.
1111          */
1112
1113         /*
1114          * Set the hold buffer read. So we do not
1115          * rotate the buffers until the hold buffer
1116          * read is complete. Also to avoid issues resulting
1117          * from page faults during disk sleep (<rdar://problem/13436396>).
1118          */
1119         d->bd_hbuf_read = 1;
1120         hbuf = d->bd_hbuf;
1121         hbuf_len = d->bd_hlen;
1122         flags = d->bd_flags;
1123         lck_mtx_unlock(bpf_mlock);
1124
1125 #ifdef __APPLE__
1126         /*
1127          * Before we move data to userland, we fill out the extended
1128          * header fields.
1129          */
1130         if (flags & BPF_EXTENDED_HDR) {
1131                 char *p;
1132
1133                 p = hbuf;
1134                 while (p < hbuf + hbuf_len) {
1135                         struct bpf_hdr_ext *ehp;
1136                         uint32_t flowid;
1137                         struct so_procinfo soprocinfo;
1138                         int found = 0;
1139
1140                         ehp = (struct bpf_hdr_ext *)(void *)p;
1141                         if ((flowid = ehp->bh_flowid)) {
1142                                 if (ehp->bh_proto == IPPROTO_TCP)
1143                                         found = inp_findinpcb_procinfo(&tcbinfo,
1144                                             flowid, &soprocinfo);
1145                                 else if (ehp->bh_proto == IPPROTO_UDP)
1146                                         found = inp_findinpcb_procinfo(&udbinfo,
1147                                             flowid, &soprocinfo);
1148                                 if (found == 1) {
1149                                         ehp->bh_pid = soprocinfo.spi_pid;
1150                                         proc_name(ehp->bh_pid, ehp->bh_comm, MAXCOMLEN);
1151                                 }
1152                                 ehp->bh_flowid = 0;
1153                         }
1154
1155                         if (flags & BPF_FINALIZE_PKTAP) {
1156                                 struct pktap_header *pktaphdr;
1157
1158                                 pktaphdr = (struct pktap_header *)(void *)
1159                                     (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1160
1161                                 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1162                                         pktap_finalize_proc_info(pktaphdr);
1163
1164                                 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1165                                         ehp->bh_tstamp.tv_sec =
1166                                                 pktaphdr->pth_tstamp.tv_sec;
1167                                         ehp->bh_tstamp.tv_usec =
1168                                                 pktaphdr->pth_tstamp.tv_usec;
1169                                 }
1170                         }
1171                         p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1172                 }
1173         } else if (flags & BPF_FINALIZE_PKTAP) {
1174                 char *p;
1175
1176                 p = hbuf;
1177                 while (p < hbuf + hbuf_len) {
1178                         struct bpf_hdr *hp;
1179                         struct pktap_header *pktaphdr;
1180
1181                         hp = (struct bpf_hdr *)(void *)p;
1182                         pktaphdr = (struct pktap_header *)(void *)
1183                             (p + BPF_WORDALIGN(hp->bh_hdrlen));
1184
1185                         if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1186                                 pktap_finalize_proc_info(pktaphdr);
1187
1188                         if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1189                                 hp->bh_tstamp.tv_sec =
1190                                         pktaphdr->pth_tstamp.tv_sec;
1191                                 hp->bh_tstamp.tv_usec =
1192                                         pktaphdr->pth_tstamp.tv_usec;
1193                         }
1194
1195                         p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1196                 }
1197         }
1198 #endif
1199
1200         /*
1201          * Move data from hold buffer into user space.
1202          * We know the entire buffer is transferred since
1203          * we checked above that the read buffer is bpf_bufsize bytes.
1204          */
1205         error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1206
1207         lck_mtx_lock(bpf_mlock);
1208         /*
1209          * Make sure device is still opened
1210          */
1211         if ((d->bd_flags & BPF_CLOSING) != 0) {
1212                 bpf_release_d(d);
1213                 lck_mtx_unlock(bpf_mlock);
1214                 return (ENXIO);
1215         }
1216
1217         d->bd_hbuf_read = 0;
1218         d->bd_fbuf = d->bd_hbuf;
1219         d->bd_hbuf = NULL;
1220         d->bd_hlen = 0;
1221         d->bd_hcnt = 0;
1222         wakeup((caddr_t)d);
1223
1224         bpf_release_d(d);
1225         lck_mtx_unlock(bpf_mlock);
1226         return (error);
1227
1228 }
1229
1230
1231 /*
1232  * If there are processes sleeping on this descriptor, wake them up.
1233  */
1234 static void
1235 bpf_wakeup(struct bpf_d *d)
1236 {
1237         if (d->bd_state == BPF_WAITING) {
1238                 bpf_stop_timer(d);
1239                 d->bd_state = BPF_IDLE;
1240         }
1241         wakeup((caddr_t)d);
1242         if (d->bd_async && d->bd_sig && d->bd_sigio)
1243                 pgsigio(d->bd_sigio, d->bd_sig);
1244
1245         selwakeup(&d->bd_sel);
1246         if ((d->bd_flags & BPF_KNOTE))
1247                 KNOTE(&d->bd_sel.si_note, 1);
1248 }
1249
1250
1251 static void
1252 bpf_timed_out(void *arg, __unused void *dummy)
1253 {
1254         struct bpf_d *d = (struct bpf_d *)arg;
1255
1256         lck_mtx_lock(bpf_mlock);
1257         if (d->bd_state == BPF_WAITING) {
1258                 /*
1259                  * There's a select or kqueue waiting for this; if there's
1260                  * now stuff to read, wake it up.
1261                  */
1262                 d->bd_state = BPF_TIMED_OUT;
1263                 if (d->bd_slen != 0)
1264                         bpf_wakeup(d);
1265         } else if (d->bd_state == BPF_DRAINING) {
1266                 /*
1267                  * A close is waiting for this to finish.
1268                  * Mark it as finished, and wake the close up.
1269                  */
1270                 d->bd_state = BPF_IDLE;
1271                 bpf_wakeup(d);
1272         }
1273         lck_mtx_unlock(bpf_mlock);
1274 }
1275
1276
1277
1278
1279
1280 /* keep in sync with bpf_movein above: */
1281 #define MAX_DATALINK_HDR_LEN    (sizeof(struct firewire_header))
1282
1283 int
1284 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1285 {
1286         struct bpf_d *d;
1287         struct ifnet *ifp;
1288         struct mbuf *m = NULL;
1289         int error;
1290         char              dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1291         int datlen = 0;
1292         int bif_dlt;
1293         int bd_hdrcmplt;
1294
1295         lck_mtx_lock(bpf_mlock);
1296
1297         d = bpf_dtab[minor(dev)];
1298         if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
1299                 lck_mtx_unlock(bpf_mlock);
1300                 return (ENXIO);
1301         }
1302
1303         bpf_acquire_d(d);
1304
1305         if (d->bd_bif == 0) {
1306                 bpf_release_d(d);
1307                 lck_mtx_unlock(bpf_mlock);
1308                 return (ENXIO);
1309         }
1310
1311         ifp = d->bd_bif->bif_ifp;
1312
1313         if ((ifp->if_flags & IFF_UP) == 0) {
1314                 bpf_release_d(d);
1315                 lck_mtx_unlock(bpf_mlock);
1316                 return (ENETDOWN);
1317         }
1318         if (uio_resid(uio) == 0) {
1319                 bpf_release_d(d);
1320                 lck_mtx_unlock(bpf_mlock);
1321                 return (0);
1322         }
1323         ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1324
1325         /*
1326          * fix for PR-6849527
1327          * geting variables onto stack before dropping lock for bpf_movein()
1328          */
1329         bif_dlt = (int)d->bd_bif->bif_dlt;
1330         bd_hdrcmplt  = d->bd_hdrcmplt;
1331
1332         /* bpf_movein allocating mbufs; drop lock */
1333         lck_mtx_unlock(bpf_mlock);
1334
1335         error = bpf_movein(uio, bif_dlt, &m,
1336         bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1337         &datlen);
1338
1339         /* take the lock again */
1340         lck_mtx_lock(bpf_mlock);
1341         if (error) {
1342                 bpf_release_d(d);
1343                 lck_mtx_unlock(bpf_mlock);
1344                 return (error);
1345         }
1346
1347         /* verify the device is still open */
1348         if ((d->bd_flags & BPF_CLOSING) != 0) {
1349                 bpf_release_d(d);
1350                 lck_mtx_unlock(bpf_mlock);
1351                 m_freem(m);
1352                 return (ENXIO);
1353         }
1354
1355         if (d->bd_bif == NULL) {
1356                 bpf_release_d(d);
1357                 lck_mtx_unlock(bpf_mlock);
1358                 m_free(m);
1359                 return (ENXIO);
1360         }
1361
1362         if ((unsigned)datlen > ifp->if_mtu) {
1363                 bpf_release_d(d);
1364                 lck_mtx_unlock(bpf_mlock);
1365                 m_freem(m);
1366                 return (EMSGSIZE);
1367         }
1368
1369
1370 #if CONFIG_MACF_NET
1371         mac_mbuf_label_associate_bpfdesc(d, m);
1372 #endif
1373
1374         bpf_set_packet_service_class(m, d->bd_traffic_class);
1375
1376         lck_mtx_unlock(bpf_mlock);
1377
1378         /*
1379          * The driver frees the mbuf.
1380          */
1381         if (d->bd_hdrcmplt) {
1382                 if (d->bd_bif->bif_send)
1383                         error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1384                 else
1385                         error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1386         } else {
1387                 error = dlil_output(ifp, PF_INET, m, NULL,
1388                     (struct sockaddr *)dst_buf, 0, NULL);
1389         }
1390
1391         lck_mtx_lock(bpf_mlock);
1392         bpf_release_d(d);
1393         lck_mtx_unlock(bpf_mlock);
1394
1395         return (error);
1396 }
1397
1398 /*
1399  * Reset a descriptor by flushing its packet buffer and clearing the
1400  * receive and drop counts.
1401  */
1402 static void
1403 reset_d(struct bpf_d *d)
1404 {
1405         if (d->bd_hbuf_read)
1406                 panic("resetting buffers during read");
1407
1408         if (d->bd_hbuf) {
1409                 /* Free the hold buffer. */
1410                 d->bd_fbuf = d->bd_hbuf;
1411                 d->bd_hbuf = NULL;
1412         }
1413         d->bd_slen = 0;
1414         d->bd_hlen = 0;
1415         d->bd_scnt = 0;
1416         d->bd_hcnt = 0;
1417         d->bd_rcount = 0;
1418         d->bd_dcount = 0;
1419 }
1420
1421 /*
1422  *  FIONREAD            Check for read packet available.
1423  *  SIOCGIFADDR         Get interface address - convenient hook to driver.
1424  *  BIOCGBLEN           Get buffer len [for read()].
1425  *  BIOCSETF            Set ethernet read filter.
1426  *  BIOCFLUSH           Flush read packet buffer.
1427  *  BIOCPROMISC         Put interface into promiscuous mode.
1428  *  BIOCGDLT            Get link layer type.
1429  *  BIOCGETIF           Get interface name.
1430  *  BIOCSETIF           Set interface.
1431  *  BIOCSRTIMEOUT       Set read timeout.
1432  *  BIOCGRTIMEOUT       Get read timeout.
1433  *  BIOCGSTATS          Get packet stats.
1434  *  BIOCIMMEDIATE       Set immediate mode.
1435  *  BIOCVERSION         Get filter language version.
1436  *  BIOCGHDRCMPLT       Get "header already complete" flag
1437  *  BIOCSHDRCMPLT       Set "header already complete" flag
1438  *  BIOCGSEESENT        Get "see packets sent" flag
1439  *  BIOCSSEESENT        Set "see packets sent" flag
1440  *  BIOCSETTC           Set traffic class.
1441  *  BIOCGETTC           Get traffic class.
1442  *  BIOCSEXTHDR         Set "extended header" flag
1443  *  BIOCSHEADDROP       Drop head of the buffer if user is not reading
1444  *  BIOCGHEADDROP       Get "head-drop" flag
1445  */
1446 /* ARGSUSED */
1447 int
1448 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1449     struct proc *p)
1450 {
1451         struct bpf_d *d;
1452         int error = 0;
1453         u_int int_arg;
1454         struct ifreq ifr;
1455
1456         lck_mtx_lock(bpf_mlock);
1457
1458         d = bpf_dtab[minor(dev)];
1459         if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
1460                 lck_mtx_unlock(bpf_mlock);
1461                 return (ENXIO);
1462         }
1463
1464         bpf_acquire_d(d);
1465
1466         if (d->bd_state == BPF_WAITING)
1467                 bpf_stop_timer(d);
1468         d->bd_state = BPF_IDLE;
1469
1470         switch (cmd) {
1471
1472         default:
1473                 error = EINVAL;
1474                 break;
1475
1476         /*
1477          * Check for read packet available.
1478          */
1479         case FIONREAD:                  /* int */
1480                 {
1481                         int n;
1482
1483                         n = d->bd_slen;
1484                         if (d->bd_hbuf && d->bd_hbuf_read == 0)
1485                                 n += d->bd_hlen;
1486
1487                         bcopy(&n, addr, sizeof (n));
1488                         break;
1489                 }
1490
1491         case SIOCGIFADDR:               /* struct ifreq */
1492                 {
1493                         struct ifnet *ifp;
1494
1495                         if (d->bd_bif == 0)
1496                                 error = EINVAL;
1497                         else {
1498                                 ifp = d->bd_bif->bif_ifp;
1499                                 error = ifnet_ioctl(ifp, 0, cmd, addr);
1500                         }
1501                         break;
1502                 }
1503
1504         /*
1505          * Get buffer len [for read()].
1506          */
1507         case BIOCGBLEN:                 /* u_int */
1508                 bcopy(&d->bd_bufsize, addr, sizeof (u_int));
1509                 break;
1510
1511         /*
1512          * Set buffer length.
1513          */
1514         case BIOCSBLEN:                 /* u_int */
1515                 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING))
1516                         error = EINVAL;
1517                 else {
1518                         u_int size;
1519
1520                         bcopy(addr, &size, sizeof (size));
1521
1522                         /*
1523                          * Allow larger buffer in head drop mode with the
1524                          * assumption the capture is in standby mode to
1525                          * keep a cache of recent traffic
1526                          */
1527                         if (d->bd_headdrop != 0 && size > 2 * bpf_maxbufsize)
1528                                 size = 2 * bpf_maxbufsize;
1529                         else if (size > bpf_maxbufsize)
1530                                 size = bpf_maxbufsize;
1531                         else if (size < BPF_MINBUFSIZE)
1532                                 size = BPF_MINBUFSIZE;
1533                         bcopy(&size, addr, sizeof (size));
1534                         d->bd_bufsize = size;
1535                 }
1536                 break;
1537
1538         /*
1539          * Set link layer read filter.
1540          */
1541         case BIOCSETF32:
1542         case BIOCSETFNR32: {            /* struct bpf_program32 */
1543                 struct bpf_program32 prg32;
1544
1545                 bcopy(addr, &prg32, sizeof (prg32));
1546                 error = bpf_setf(d, prg32.bf_len,
1547                     CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1548                 break;
1549         }
1550
1551         case BIOCSETF64:
1552         case BIOCSETFNR64: {            /* struct bpf_program64 */
1553                 struct bpf_program64 prg64;
1554
1555                 bcopy(addr, &prg64, sizeof (prg64));
1556                 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
1557                 break;
1558         }
1559
1560         /*
1561          * Flush read packet buffer.
1562          */
1563         case BIOCFLUSH:
1564                 while (d->bd_hbuf_read) {
1565                         msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1566                 }
1567                 if ((d->bd_flags & BPF_CLOSING) != 0) {
1568                         error = ENXIO;
1569                         break;
1570                 }
1571                 reset_d(d);
1572                 break;
1573
1574         /*
1575          * Put interface into promiscuous mode.
1576          */
1577         case BIOCPROMISC:
1578                 if (d->bd_bif == 0) {
1579                         /*
1580                          * No interface attached yet.
1581                          */
1582                         error = EINVAL;
1583                         break;
1584                 }
1585                 if (d->bd_promisc == 0) {
1586                         lck_mtx_unlock(bpf_mlock);
1587                         error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1588                         lck_mtx_lock(bpf_mlock);
1589                         if (error == 0)
1590                                 d->bd_promisc = 1;
1591                 }
1592                 break;
1593
1594         /*
1595          * Get device parameters.
1596          */
1597         case BIOCGDLT:                  /* u_int */
1598                 if (d->bd_bif == 0)
1599                         error = EINVAL;
1600                 else
1601                         bcopy(&d->bd_bif->bif_dlt, addr, sizeof (u_int));
1602                 break;
1603
1604         /*
1605          * Get a list of supported data link types.
1606          */
1607         case BIOCGDLTLIST:              /* struct bpf_dltlist */
1608                 if (d->bd_bif == NULL) {
1609                         error = EINVAL;
1610                 } else {
1611                         error = bpf_getdltlist(d, addr, p);
1612                 }
1613                 break;
1614
1615         /*
1616          * Set data link type.
1617          */
1618         case BIOCSDLT:                  /* u_int */
1619                 if (d->bd_bif == NULL) {
1620                         error = EINVAL;
1621                 } else {
1622                         u_int dlt;
1623
1624                         bcopy(addr, &dlt, sizeof (dlt));
1625
1626                         if (dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
1627                                 printf("BIOCSDLT downgrade DLT_PKTAP to DLT_RAW\n");
1628                                 dlt = DLT_RAW;
1629                         }
1630                         error = bpf_setdlt(d, dlt);
1631                 }
1632                 break;
1633
1634         /*
1635          * Get interface name.
1636          */
1637         case BIOCGETIF:                 /* struct ifreq */
1638                 if (d->bd_bif == 0)
1639                         error = EINVAL;
1640                 else {
1641                         struct ifnet *const ifp = d->bd_bif->bif_ifp;
1642
1643                         snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1644                             sizeof (ifr.ifr_name), "%s", if_name(ifp));
1645                 }
1646                 break;
1647
1648         /*
1649          * Set interface.
1650          */
1651         case BIOCSETIF: {               /* struct ifreq */
1652                 ifnet_t ifp;
1653
1654                 bcopy(addr, &ifr, sizeof (ifr));
1655                 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1656                 ifp = ifunit(ifr.ifr_name);
1657                 if (ifp == NULL)
1658                         error = ENXIO;
1659                 else
1660                         error = bpf_setif(d, ifp);
1661                 break;
1662         }
1663
1664         /*
1665          * Set read timeout.
1666          */
1667         case BIOCSRTIMEOUT32: {         /* struct user32_timeval */
1668                 struct user32_timeval _tv;
1669                 struct timeval tv;
1670
1671                 bcopy(addr, &_tv, sizeof (_tv));
1672                 tv.tv_sec  = _tv.tv_sec;
1673                 tv.tv_usec = _tv.tv_usec;
1674
1675                 /*
1676                  * Subtract 1 tick from tvtohz() since this isn't
1677                  * a one-shot timer.
1678                  */
1679                 if ((error = itimerfix(&tv)) == 0)
1680                         d->bd_rtout = tvtohz(&tv) - 1;
1681                 break;
1682         }
1683
1684         case BIOCSRTIMEOUT64: {         /* struct user64_timeval */
1685                 struct user64_timeval _tv;
1686                 struct timeval tv;
1687
1688                 bcopy(addr, &_tv, sizeof (_tv));
1689                 tv.tv_sec  = _tv.tv_sec;
1690                 tv.tv_usec = _tv.tv_usec;
1691
1692                 /*
1693                  * Subtract 1 tick from tvtohz() since this isn't
1694                  * a one-shot timer.
1695                  */
1696                 if ((error = itimerfix(&tv)) == 0)
1697                         d->bd_rtout = tvtohz(&tv) - 1;
1698                 break;
1699         }
1700
1701         /*
1702          * Get read timeout.
1703          */
1704         case BIOCGRTIMEOUT32: {         /* struct user32_timeval */
1705                 struct user32_timeval tv;
1706
1707                 bzero(&tv, sizeof (tv));
1708                 tv.tv_sec = d->bd_rtout / hz;
1709                 tv.tv_usec = (d->bd_rtout % hz) * tick;
1710                 bcopy(&tv, addr, sizeof (tv));
1711                 break;
1712         }
1713
1714         case BIOCGRTIMEOUT64: {         /* struct user64_timeval */
1715                 struct user64_timeval tv;
1716
1717                 bzero(&tv, sizeof (tv));
1718                 tv.tv_sec = d->bd_rtout / hz;
1719                 tv.tv_usec = (d->bd_rtout % hz) * tick;
1720                 bcopy(&tv, addr, sizeof (tv));
1721                 break;
1722         }
1723
1724         /*
1725          * Get packet stats.
1726          */
1727         case BIOCGSTATS: {              /* struct bpf_stat */
1728                 struct bpf_stat bs;
1729
1730                 bzero(&bs, sizeof (bs));
1731                 bs.bs_recv = d->bd_rcount;
1732                 bs.bs_drop = d->bd_dcount;
1733                 bcopy(&bs, addr, sizeof (bs));
1734                 break;
1735         }
1736
1737         /*
1738          * Set immediate mode.
1739          */
1740         case BIOCIMMEDIATE:             /* u_int */
1741                 d->bd_immediate = *(u_int *)(void *)addr;
1742                 break;
1743
1744         case BIOCVERSION: {             /* struct bpf_version */
1745                 struct bpf_version bv;
1746
1747                 bzero(&bv, sizeof (bv));
1748                 bv.bv_major = BPF_MAJOR_VERSION;
1749                 bv.bv_minor = BPF_MINOR_VERSION;
1750                 bcopy(&bv, addr, sizeof (bv));
1751                 break;
1752         }
1753
1754         /*
1755          * Get "header already complete" flag
1756          */
1757         case BIOCGHDRCMPLT:             /* u_int */
1758                 bcopy(&d->bd_hdrcmplt, addr, sizeof (u_int));
1759                 break;
1760
1761         /*
1762          * Set "header already complete" flag
1763          */
1764         case BIOCSHDRCMPLT:             /* u_int */
1765                 bcopy(addr, &int_arg, sizeof (int_arg));
1766                 d->bd_hdrcmplt = int_arg ? 1 : 0;
1767                 break;
1768
1769         /*
1770          * Get "see sent packets" flag
1771          */
1772         case BIOCGSEESENT:              /* u_int */
1773                 bcopy(&d->bd_seesent, addr, sizeof (u_int));
1774                 break;
1775
1776         /*
1777          * Set "see sent packets" flag
1778          */
1779         case BIOCSSEESENT:              /* u_int */
1780                 bcopy(addr, &d->bd_seesent, sizeof (u_int));
1781                 break;
1782
1783         /*
1784          * Set traffic service class
1785          */
1786         case BIOCSETTC: {               /* int */
1787                 int tc;
1788
1789                 bcopy(addr, &tc, sizeof (int));
1790                 error = bpf_set_traffic_class(d, tc);
1791                 break;
1792         }
1793
1794         /*
1795          * Get traffic service class
1796          */
1797         case BIOCGETTC:                 /* int */
1798                 bcopy(&d->bd_traffic_class, addr, sizeof (int));
1799                 break;
1800
1801         case FIONBIO:           /* Non-blocking I/O; int */
1802                 break;
1803
1804         case FIOASYNC:          /* Send signal on receive packets; int */
1805                 bcopy(addr, &d->bd_async, sizeof (int));
1806                 break;
1807 #ifndef __APPLE__
1808         case FIOSETOWN:
1809                 error = fsetown(*(int *)addr, &d->bd_sigio);
1810                 break;
1811
1812         case FIOGETOWN:
1813                 *(int *)addr = fgetown(d->bd_sigio);
1814                 break;
1815
1816         /* This is deprecated, FIOSETOWN should be used instead. */
1817         case TIOCSPGRP:
1818                 error = fsetown(-(*(int *)addr), &d->bd_sigio);
1819                 break;
1820
1821         /* This is deprecated, FIOGETOWN should be used instead. */
1822         case TIOCGPGRP:
1823                 *(int *)addr = -fgetown(d->bd_sigio);
1824                 break;
1825 #endif
1826         case BIOCSRSIG: {       /* Set receive signal; u_int */
1827                 u_int sig;
1828
1829                 bcopy(addr, &sig, sizeof (u_int));
1830
1831                 if (sig >= NSIG)
1832                         error = EINVAL;
1833                 else
1834                         d->bd_sig = sig;
1835                 break;
1836         }
1837         case BIOCGRSIG:                 /* u_int */
1838                 bcopy(&d->bd_sig, addr, sizeof (u_int));
1839                 break;
1840 #ifdef __APPLE__
1841         case BIOCSEXTHDR:               /* u_int */
1842                 bcopy(addr, &int_arg, sizeof (int_arg));
1843                 if (int_arg)
1844                         d->bd_flags |= BPF_EXTENDED_HDR;
1845                 else
1846                         d->bd_flags &= ~BPF_EXTENDED_HDR;
1847                 break;
1848
1849         case BIOCGIFATTACHCOUNT: {              /* struct ifreq */
1850                 ifnet_t ifp;
1851                 struct bpf_if *bp;
1852
1853                 bcopy(addr, &ifr, sizeof (ifr));
1854                 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1855                 ifp = ifunit(ifr.ifr_name);
1856                 if (ifp == NULL) {
1857                         error = ENXIO;
1858                         break;
1859                 }
1860                 ifr.ifr_intval = 0;
1861                 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1862                         struct bpf_d *bpf_d;
1863
1864                         if (bp->bif_ifp == NULL || bp->bif_ifp != ifp)
1865                                 continue;
1866                         for (bpf_d = bp->bif_dlist; bpf_d; bpf_d = bpf_d->bd_next) {
1867                                 ifr.ifr_intval += 1;
1868                         }
1869                 }
1870                 bcopy(&ifr, addr, sizeof (ifr));
1871                 break;
1872         }
1873         case BIOCGWANTPKTAP:                    /* u_int */
1874                 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
1875                 bcopy(&int_arg, addr, sizeof (int_arg));
1876                 break;
1877
1878         case BIOCSWANTPKTAP:                    /* u_int */
1879                 bcopy(addr, &int_arg, sizeof (int_arg));
1880                 if (int_arg)
1881                         d->bd_flags |= BPF_WANT_PKTAP;
1882                 else
1883                         d->bd_flags &= ~BPF_WANT_PKTAP;
1884                 break;
1885 #endif
1886
1887         case BIOCSHEADDROP:
1888                 bcopy(addr, &int_arg, sizeof (int_arg));
1889                 d->bd_headdrop = int_arg ? 1 : 0;
1890                 break;
1891
1892         case BIOCGHEADDROP:
1893                 bcopy(&d->bd_headdrop, addr, sizeof (int));
1894                 break;
1895         }
1896
1897         bpf_release_d(d);
1898         lck_mtx_unlock(bpf_mlock);
1899
1900         return (error);
1901 }
1902
1903 /*
1904  * Set d's packet filter program to fp.  If this file already has a filter,
1905  * free it and replace it.  Returns EINVAL for bogus requests.
1906  */
1907 static int
1908 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
1909     u_long cmd)
1910 {
1911         struct bpf_insn *fcode, *old;
1912         u_int flen, size;
1913
1914         while (d->bd_hbuf_read)
1915                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1916
1917         if ((d->bd_flags & BPF_CLOSING) != 0)
1918                 return (ENXIO);
1919
1920         old = d->bd_filter;
1921         if (bf_insns == USER_ADDR_NULL) {
1922                 if (bf_len != 0)
1923                         return (EINVAL);
1924                 d->bd_filter = NULL;
1925                 reset_d(d);
1926                 if (old != 0)
1927                         FREE((caddr_t)old, M_DEVBUF);
1928                 return (0);
1929         }
1930         flen = bf_len;
1931         if (flen > BPF_MAXINSNS)
1932                 return (EINVAL);
1933
1934         size = flen * sizeof(struct bpf_insn);
1935         fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
1936 #ifdef __APPLE__
1937         if (fcode == NULL)
1938                 return (ENOBUFS);
1939 #endif
1940         if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
1941             bpf_validate(fcode, (int)flen)) {
1942                 d->bd_filter = fcode;
1943
1944                 if (cmd == BIOCSETF32 || cmd == BIOCSETF64)
1945                         reset_d(d);
1946
1947                 if (old != 0)
1948                         FREE((caddr_t)old, M_DEVBUF);
1949
1950                 return (0);
1951         }
1952         FREE((caddr_t)fcode, M_DEVBUF);
1953         return (EINVAL);
1954 }
1955
1956 /*
1957  * Detach a file from its current interface (if attached at all) and attach
1958  * to the interface indicated by the name stored in ifr.
1959  * Return an errno or 0.
1960  */
1961 static int
1962 bpf_setif(struct bpf_d *d, ifnet_t theywant)
1963 {
1964         struct bpf_if *bp;
1965         int error;
1966
1967         while (d->bd_hbuf_read)
1968                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1969
1970         if ((d->bd_flags & BPF_CLOSING) != 0)
1971                 return (ENXIO);
1972
1973         /*
1974          * Look through attached interfaces for the named one.
1975          */
1976         for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1977                 struct ifnet *ifp = bp->bif_ifp;
1978
1979                 if (ifp == 0 || ifp != theywant)
1980                         continue;
1981                 /*
1982                  * Do not use DLT_PKTAP, unless requested explicitly
1983                  */
1984                 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
1985                         continue;
1986                 /*
1987                  * Skip the coprocessor interface
1988                  */
1989                 if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp))
1990                         continue;
1991                 /*
1992                  * We found the requested interface.
1993                  * Allocate the packet buffers.
1994                  */
1995                 error = bpf_allocbufs(d);
1996                 if (error != 0)
1997                         return (error);
1998                 /*
1999                  * Detach if attached to something else.
2000                  */
2001                 if (bp != d->bd_bif) {
2002                         if (d->bd_bif != NULL) {
2003                                 if (bpf_detachd(d, 0) != 0)
2004                                         return (ENXIO);
2005                         }
2006                         if (bpf_attachd(d, bp) != 0)
2007                                 return (ENXIO);
2008                 }
2009                 reset_d(d);
2010                 return (0);
2011         }
2012         /* Not found. */
2013         return (ENXIO);
2014 }
2015
2016
2017
2018 /*
2019  * Get a list of available data link type of the interface.
2020  */
2021 static int
2022 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2023 {
2024         u_int           n;
2025         int             error;
2026         struct ifnet    *ifp;
2027         struct bpf_if   *bp;
2028         user_addr_t     dlist;
2029         struct bpf_dltlist bfl;
2030
2031         bcopy(addr, &bfl, sizeof (bfl));
2032         if (proc_is64bit(p)) {
2033                 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2034         } else {
2035                 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2036         }
2037
2038         ifp = d->bd_bif->bif_ifp;
2039         n = 0;
2040         error = 0;
2041
2042         for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2043                 if (bp->bif_ifp != ifp)
2044                         continue;
2045                 /*
2046                  * Do not use DLT_PKTAP, unless requested explicitly
2047                  */
2048                 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
2049                         continue;
2050                 if (dlist != USER_ADDR_NULL) {
2051                         if (n >= bfl.bfl_len) {
2052                                 return (ENOMEM);
2053                         }
2054                         error = copyout(&bp->bif_dlt, dlist,
2055                             sizeof (bp->bif_dlt));
2056                         if (error != 0)
2057                                 break;
2058                         dlist += sizeof (bp->bif_dlt);
2059                 }
2060                 n++;
2061         }
2062         bfl.bfl_len = n;
2063         bcopy(&bfl, addr, sizeof (bfl));
2064
2065         return (error);
2066 }
2067
2068 /*
2069  * Set the data link type of a BPF instance.
2070  */
2071 static int
2072 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2073 {
2074         int error, opromisc;
2075         struct ifnet *ifp;
2076         struct bpf_if *bp;
2077
2078         if (d->bd_bif->bif_dlt == dlt)
2079                 return (0);
2080
2081         while (d->bd_hbuf_read)
2082                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2083
2084         if ((d->bd_flags & BPF_CLOSING) != 0)
2085                 return (ENXIO);
2086
2087         ifp = d->bd_bif->bif_ifp;
2088         for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2089                 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2090                         /*
2091                          * Do not use DLT_PKTAP, unless requested explicitly
2092                          */
2093                         if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2094                                 continue;
2095                         }
2096                         break;
2097                 }
2098         }
2099         if (bp != NULL) {
2100                 opromisc = d->bd_promisc;
2101                 if (bpf_detachd(d, 0) != 0)
2102                         return (ENXIO);
2103                 error = bpf_attachd(d, bp);
2104                 if (error) {
2105                         printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
2106                                 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error);
2107                         return error;
2108                 }
2109                 reset_d(d);
2110                 if (opromisc) {
2111                         lck_mtx_unlock(bpf_mlock);
2112                         error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2113                         lck_mtx_lock(bpf_mlock);
2114                         if (error) {
2115                                 printf("%s: ifpromisc %s%d failed (%d)\n",
2116                                     __func__, ifnet_name(bp->bif_ifp),
2117                                     ifnet_unit(bp->bif_ifp), error);
2118                         } else {
2119                                 d->bd_promisc = 1;
2120                         }
2121                 }
2122         }
2123         return (bp == NULL ? EINVAL : 0);
2124 }
2125
2126 static int
2127 bpf_set_traffic_class(struct bpf_d *d, int tc)
2128 {
2129         int error = 0;
2130
2131         if (!SO_VALID_TC(tc))
2132                 error = EINVAL;
2133         else
2134                 d->bd_traffic_class = tc;
2135
2136         return (error);
2137 }
2138
2139 static void
2140 bpf_set_packet_service_class(struct mbuf *m, int tc)
2141 {
2142         if (!(m->m_flags & M_PKTHDR))
2143                 return;
2144
2145         VERIFY(SO_VALID_TC(tc));
2146         (void) m_set_service_class(m, so_tc2msc(tc));
2147 }
2148
2149 /*
2150  * Support for select()
2151  *
2152  * Return true iff the specific operation will not block indefinitely.
2153  * Otherwise, return false but make a note that a selwakeup() must be done.
2154  */
2155 int
2156 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2157 {
2158         struct bpf_d *d;
2159         int ret = 0;
2160
2161         lck_mtx_lock(bpf_mlock);
2162
2163         d = bpf_dtab[minor(dev)];
2164         if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
2165                 lck_mtx_unlock(bpf_mlock);
2166                 return (ENXIO);
2167         }
2168
2169         bpf_acquire_d(d);
2170
2171         if (d->bd_bif == NULL) {
2172                 bpf_release_d(d);
2173                 lck_mtx_unlock(bpf_mlock);
2174                 return (ENXIO);
2175         }
2176
2177         while (d->bd_hbuf_read)
2178                 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2179
2180         if ((d->bd_flags & BPF_CLOSING) != 0) {
2181                 bpf_release_d(d);
2182                 lck_mtx_unlock(bpf_mlock);
2183                 return (ENXIO);
2184         }
2185
2186         switch (which) {
2187                 case FREAD:
2188                         if (d->bd_hlen != 0 ||
2189                                         ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
2190                                          d->bd_slen != 0))
2191                                 ret = 1; /* read has data to return */
2192                         else {
2193                                 /*
2194                                  * Read has no data to return.
2195                                  * Make the select wait, and start a timer if
2196                                  * necessary.
2197                                  */
2198                                 selrecord(p, &d->bd_sel, wql);
2199                                 bpf_start_timer(d);
2200                         }
2201                         break;
2202
2203                 case FWRITE:
2204                         ret = 1; /* can't determine whether a write would block */
2205                         break;
2206         }
2207
2208         bpf_release_d(d);
2209         lck_mtx_unlock(bpf_mlock);
2210
2211         return (ret);
2212 }
2213
2214
2215 /*
2216  * Support for kevent() system call.  Register EVFILT_READ filters and
2217  * reject all others.
2218  */
2219 int bpfkqfilter(dev_t dev, struct knote *kn);
2220 static void filt_bpfdetach(struct knote *);
2221 static int filt_bpfread(struct knote *, long);
2222 static int filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev);
2223 static int filt_bpfprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev);
2224
2225 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2226         .f_isfd = 1,
2227         .f_detach = filt_bpfdetach,
2228         .f_event = filt_bpfread,
2229         .f_touch = filt_bpftouch,
2230         .f_process = filt_bpfprocess,
2231 };
2232
2233 static int
2234 filt_bpfread_common(struct knote *kn, struct bpf_d *d)
2235 {
2236         int ready = 0;
2237
2238         if (d->bd_immediate) {
2239                 /*
2240                  * If there's data in the hold buffer, it's the
2241                  * amount of data a read will return.
2242                  *
2243                  * If there's no data in the hold buffer, but
2244                  * there's data in the store buffer, a read will
2245                  * immediately rotate the store buffer to the
2246                  * hold buffer, the amount of data in the store
2247                  * buffer is the amount of data a read will
2248                  * return.
2249                  *
2250                  * If there's no data in either buffer, we're not
2251                  * ready to read.
2252                  */
2253                 kn->kn_data = ((d->bd_hlen == 0  || d->bd_hbuf_read)
2254                     ? d->bd_slen : d->bd_hlen);
2255                 int64_t lowwat = 1;
2256                 if (kn->kn_sfflags & NOTE_LOWAT)
2257                 {
2258                         if (kn->kn_sdata > d->bd_bufsize)
2259                                 lowwat = d->bd_bufsize;
2260                         else if (kn->kn_sdata > lowwat)
2261                                 lowwat = kn->kn_sdata;
2262                 }
2263                 ready = (kn->kn_data >= lowwat);
2264         } else {
2265                 /*
2266                  * If there's data in the hold buffer, it's the
2267                  * amount of data a read will return.
2268                  *
2269                  * If there's no data in the hold buffer, but
2270                  * there's data in the store buffer, if the
2271                  * timer has expired a read will immediately
2272                  * rotate the store buffer to the hold buffer,
2273                  * so the amount of data in the store buffer is
2274                  * the amount of data a read will return.
2275                  *
2276                  * If there's no data in either buffer, or there's
2277                  * no data in the hold buffer and the timer hasn't
2278                  * expired, we're not ready to read.
2279                  */
2280                 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read) && d->bd_state == BPF_TIMED_OUT ?
2281                                 d->bd_slen : d->bd_hlen);
2282                 ready = (kn->kn_data > 0);
2283         }
2284         if (!ready)
2285                 bpf_start_timer(d);
2286
2287         return (ready);
2288 }
2289
2290 int
2291 bpfkqfilter(dev_t dev, struct knote *kn)
2292 {
2293         struct bpf_d *d;
2294         int res;
2295
2296         /*
2297          * Is this device a bpf?
2298          */
2299         if (major(dev) != CDEV_MAJOR ||
2300             kn->kn_filter != EVFILT_READ) {
2301                 kn->kn_flags = EV_ERROR;
2302                 kn->kn_data = EINVAL;
2303                 return 0;
2304         }
2305
2306         lck_mtx_lock(bpf_mlock);
2307
2308         d = bpf_dtab[minor(dev)];
2309
2310         if (d == 0 ||
2311             d == (void *)1 ||
2312             d->bd_bif == NULL ||
2313             (d->bd_flags & BPF_CLOSING) != 0) {
2314                 lck_mtx_unlock(bpf_mlock);
2315                 kn->kn_flags = EV_ERROR;
2316                 kn->kn_data = ENXIO;
2317                 return 0;
2318         }
2319
2320         kn->kn_hook = d;
2321         kn->kn_filtid = EVFILTID_BPFREAD;
2322         KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2323         d->bd_flags |= BPF_KNOTE;
2324
2325         /* capture the current state */
2326         res = filt_bpfread_common(kn, d);
2327
2328         lck_mtx_unlock(bpf_mlock);
2329
2330         return (res);
2331 }
2332
2333 static void
2334 filt_bpfdetach(struct knote *kn)
2335 {
2336         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2337
2338         lck_mtx_lock(bpf_mlock);
2339         if (d->bd_flags & BPF_KNOTE) {
2340                 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2341                 d->bd_flags &= ~BPF_KNOTE;
2342         }
2343         lck_mtx_unlock(bpf_mlock);
2344 }
2345
2346 static int
2347 filt_bpfread(struct knote *kn, long hint)
2348 {
2349 #pragma unused(hint)
2350         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2351
2352         return filt_bpfread_common(kn, d);
2353 }
2354
2355 static int
2356 filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev)
2357 {
2358         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2359         int res;
2360
2361         lck_mtx_lock(bpf_mlock);
2362
2363         /* save off the lowat threshold and flag */
2364         kn->kn_sdata = kev->data;
2365         kn->kn_sfflags = kev->fflags;
2366         if ((kn->kn_status & KN_UDATA_SPECIFIC) == 0)
2367                 kn->kn_udata = kev->udata;
2368
2369         /* output data will be re-generated here */
2370         res = filt_bpfread_common(kn, d);
2371
2372         lck_mtx_unlock(bpf_mlock);
2373
2374         return res;
2375 }
2376
2377 static int
2378 filt_bpfprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev)
2379 {
2380 #pragma unused(data)
2381         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2382         int res;
2383
2384         lck_mtx_lock(bpf_mlock);
2385         res = filt_bpfread_common(kn, d);
2386         if (res) {
2387                 *kev = kn->kn_kevent;
2388         }
2389         lck_mtx_unlock(bpf_mlock);
2390
2391         return res;
2392 }
2393
2394 /*
2395  * Copy data from an mbuf chain into a buffer.  This code is derived
2396  * from m_copydata in kern/uipc_mbuf.c.
2397  */
2398 static void
2399 bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
2400 {
2401         u_int count;
2402         u_char *dst;
2403
2404         dst = dst_arg;
2405         while (len > 0) {
2406                 if (m == 0)
2407                         panic("bpf_mcopy");
2408                 count = min(m->m_len, len);
2409                 bcopy(mbuf_data(m), dst, count);
2410                 m = m->m_next;
2411                 dst += count;
2412                 len -= count;
2413         }
2414 }
2415
2416 static inline void
2417 bpf_tap_imp(
2418         ifnet_t         ifp,
2419         u_int32_t       dlt,
2420         struct bpf_packet *bpf_pkt,
2421         int             outbound)
2422 {
2423         struct bpf_d    *d;
2424         u_int slen;
2425         struct bpf_if *bp;
2426
2427         /*
2428          * It's possible that we get here after the bpf descriptor has been
2429          * detached from the interface; in such a case we simply return.
2430          * Lock ordering is important since we can be called asynchronously
2431          * (from IOKit) to process an inbound packet; when that happens
2432          * we would have been holding its "gateLock" and will be acquiring
2433          * "bpf_mlock" upon entering this routine.  Due to that, we release
2434          * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2435          * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2436          * when a ifnet_set_promiscuous request simultaneously collides with
2437          * an inbound packet being passed into the tap callback.
2438          */
2439         lck_mtx_lock(bpf_mlock);
2440         if (ifp->if_bpf == NULL) {
2441                 lck_mtx_unlock(bpf_mlock);
2442                 return;
2443         }
2444         for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2445                 if (bp->bif_ifp != ifp) {
2446                         /* wrong interface */
2447                         bp = NULL;
2448                         break;
2449                 }
2450                 if (dlt == 0 || bp->bif_dlt == dlt) {
2451                         /* tapping default DLT or DLT matches */
2452                         break;
2453                 }
2454         }
2455         if (bp == NULL) {
2456                 goto done;
2457         }
2458         for (d = bp->bif_dlist; d; d = d->bd_next) {
2459                 if (outbound && !d->bd_seesent)
2460                         continue;
2461                 ++d->bd_rcount;
2462                 slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2463                                   bpf_pkt->bpfp_total_length, 0);
2464                 if (slen != 0) {
2465 #if CONFIG_MACF_NET
2466                         if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0)
2467                                 continue;
2468 #endif
2469                         catchpacket(d, bpf_pkt, slen, outbound);
2470                 }
2471         }
2472
2473  done:
2474         lck_mtx_unlock(bpf_mlock);
2475 }
2476
2477 static inline void
2478 bpf_tap_mbuf(
2479         ifnet_t         ifp,
2480         u_int32_t       dlt,
2481         mbuf_t          m,
2482         void*           hdr,
2483         size_t          hlen,
2484         int             outbound)
2485 {
2486         struct bpf_packet bpf_pkt;
2487         struct mbuf *m0;
2488
2489         if (ifp->if_bpf == NULL) {
2490                 /* quickly check without taking lock */
2491                 return;
2492         }
2493         bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2494         bpf_pkt.bpfp_mbuf = m;
2495         bpf_pkt.bpfp_total_length = 0;
2496         for (m0 = m; m0 != NULL; m0 = m0->m_next)
2497                 bpf_pkt.bpfp_total_length += m0->m_len;
2498         bpf_pkt.bpfp_header = hdr;
2499         if (hdr != NULL) {
2500                 bpf_pkt.bpfp_total_length += hlen;
2501                 bpf_pkt.bpfp_header_length = hlen;
2502         } else {
2503                 bpf_pkt.bpfp_header_length = 0;
2504         }
2505         bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2506 }
2507
2508 void
2509 bpf_tap_out(
2510         ifnet_t         ifp,
2511         u_int32_t       dlt,
2512         mbuf_t          m,
2513         void*           hdr,
2514         size_t          hlen)
2515 {
2516         bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
2517 }
2518
2519 void
2520 bpf_tap_in(
2521         ifnet_t         ifp,
2522         u_int32_t       dlt,
2523         mbuf_t          m,
2524         void*           hdr,
2525         size_t          hlen)
2526 {
2527         bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
2528 }
2529
2530 /* Callback registered with Ethernet driver. */
2531 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2532 {
2533         bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2534
2535         return 0;
2536 }
2537
2538
2539 static void
2540 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
2541 {
2542         /* copy the optional header */
2543         if (pkt->bpfp_header_length != 0) {
2544                 size_t  count = min(len, pkt->bpfp_header_length);
2545                 bcopy(pkt->bpfp_header, dst, count);
2546                 len -= count;
2547                 dst += count;
2548         }
2549         if (len == 0) {
2550                 /* nothing past the header */
2551                 return;
2552         }
2553         /* copy the packet */
2554         switch (pkt->bpfp_type) {
2555         case BPF_PACKET_TYPE_MBUF:
2556                 bpf_mcopy(pkt->bpfp_mbuf, dst, len);
2557                 break;
2558         default:
2559                 break;
2560         }
2561 }
2562
2563 /*
2564  * Move the packet data from interface memory (pkt) into the
2565  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
2566  * otherwise 0.
2567  */
2568 static void
2569 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
2570         u_int snaplen, int outbound)
2571 {
2572         struct bpf_hdr *hp;
2573         struct bpf_hdr_ext *ehp;
2574         int totlen, curlen;
2575         int hdrlen, caplen;
2576         int do_wakeup = 0;
2577         u_char *payload;
2578         struct timeval tv;
2579
2580         hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
2581             d->bd_bif->bif_hdrlen;
2582         /*
2583          * Figure out how many bytes to move.  If the packet is
2584          * greater or equal to the snapshot length, transfer that
2585          * much.  Otherwise, transfer the whole packet (unless
2586          * we hit the buffer size limit).
2587          */
2588         totlen = hdrlen + min(snaplen, pkt->bpfp_total_length);
2589         if (totlen > d->bd_bufsize)
2590                 totlen = d->bd_bufsize;
2591
2592         if (hdrlen > totlen)
2593                 return;
2594
2595         /*
2596          * Round up the end of the previous packet to the next longword.
2597          */
2598         curlen = BPF_WORDALIGN(d->bd_slen);
2599         if (curlen + totlen > d->bd_bufsize) {
2600                 /*
2601                  * This packet will overflow the storage buffer.
2602                  * Rotate the buffers if we can, then wakeup any
2603                  * pending reads.
2604                  *
2605                  * We cannot rotate buffers if a read is in progress
2606                  * so drop the packet
2607                  */
2608                 if (d->bd_hbuf_read) {
2609                         ++d->bd_dcount;
2610                         return;
2611                 }
2612
2613                 if (d->bd_fbuf == NULL) {
2614                         if (d->bd_headdrop == 0) {
2615                                 /*
2616                                  * We haven't completed the previous read yet,
2617                                  * so drop the packet.
2618                                  */
2619                                 ++d->bd_dcount;
2620                                 return;
2621                         }
2622                         /*
2623                          * Drop the hold buffer as it contains older packets
2624                          */
2625                         d->bd_dcount += d->bd_hcnt;
2626                         d->bd_fbuf = d->bd_hbuf;
2627                         ROTATE_BUFFERS(d);
2628                 } else {
2629                         ROTATE_BUFFERS(d);
2630                 }
2631                 do_wakeup = 1;
2632                 curlen = 0;
2633         }
2634         else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
2635                 /*
2636                  * Immediate mode is set, or the read timeout has
2637                  * already expired during a select call. A packet
2638                  * arrived, so the reader should be woken up.
2639                  */
2640                 do_wakeup = 1;
2641
2642         /*
2643          * Append the bpf header.
2644          */
2645         microtime(&tv);
2646         if (d->bd_flags & BPF_EXTENDED_HDR) {
2647                 struct mbuf *m;
2648
2649                 m = (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF)
2650                         ? pkt->bpfp_mbuf : NULL;
2651                 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
2652                 memset(ehp, 0, sizeof(*ehp));
2653                 ehp->bh_tstamp.tv_sec = tv.tv_sec;
2654                 ehp->bh_tstamp.tv_usec = tv.tv_usec;
2655
2656                 ehp->bh_datalen = pkt->bpfp_total_length;
2657                 ehp->bh_hdrlen = hdrlen;
2658                 caplen = ehp->bh_caplen = totlen - hdrlen;
2659                 if (m == NULL) {
2660                         if (outbound) {
2661                                 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2662                         } else {
2663                                 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2664                         }
2665                 } else if (outbound) {
2666                         ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2667
2668                         /* only do lookups on non-raw INPCB */
2669                         if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID|
2670                             PKTF_FLOW_LOCALSRC|PKTF_FLOW_RAWSOCK)) ==
2671                             (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) &&
2672                             m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
2673                                 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
2674                                 ehp->bh_proto = m->m_pkthdr.pkt_proto;
2675                         }
2676                         ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
2677                         if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT)
2678                                 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
2679                         if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ)
2680                                 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
2681                         if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT)
2682                                 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
2683                         if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
2684                                 ehp->bh_unsent_bytes =
2685                                     m->m_pkthdr.bufstatus_if;
2686                                 ehp->bh_unsent_snd =
2687                                     m->m_pkthdr.bufstatus_sndbuf;
2688                         }
2689                 } else
2690                         ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2691                 payload = (u_char *)ehp + hdrlen;
2692         } else {
2693                 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
2694                 hp->bh_tstamp.tv_sec = tv.tv_sec;
2695                 hp->bh_tstamp.tv_usec = tv.tv_usec;
2696                 hp->bh_datalen = pkt->bpfp_total_length;
2697                 hp->bh_hdrlen = hdrlen;
2698                 caplen = hp->bh_caplen = totlen - hdrlen;
2699                 payload = (u_char *)hp + hdrlen;
2700         }
2701         /*
2702          * Copy the packet data into the store buffer and update its length.
2703          */
2704         copy_bpf_packet(pkt, payload, caplen);
2705         d->bd_slen = curlen + totlen;
2706         d->bd_scnt += 1;
2707
2708         if (do_wakeup)
2709                 bpf_wakeup(d);
2710 }
2711
2712 /*
2713  * Initialize all nonzero fields of a descriptor.
2714  */
2715 static int
2716 bpf_allocbufs(struct bpf_d *d)
2717 {
2718         if (d->bd_sbuf != NULL) {
2719                 FREE(d->bd_sbuf, M_DEVBUF);
2720                 d->bd_sbuf = NULL;
2721         }
2722         if (d->bd_hbuf != NULL) {
2723                 FREE(d->bd_hbuf, M_DEVBUF);
2724                 d->bd_hbuf = NULL;
2725         }
2726         if (d->bd_fbuf != NULL) {
2727                 FREE(d->bd_fbuf, M_DEVBUF);
2728                 d->bd_fbuf = NULL;
2729         }
2730
2731         d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2732         if (d->bd_fbuf == NULL)
2733                 return (ENOBUFS);
2734
2735         d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2736         if (d->bd_sbuf == NULL) {
2737                 FREE(d->bd_fbuf, M_DEVBUF);
2738                 d->bd_fbuf = NULL;
2739                 return (ENOBUFS);
2740         }
2741         d->bd_slen = 0;
2742         d->bd_hlen = 0;
2743         d->bd_scnt = 0;
2744         d->bd_hcnt = 0;
2745         return (0);
2746 }
2747
2748 /*
2749  * Free buffers currently in use by a descriptor.
2750  * Called on close.
2751  */
2752 static void
2753 bpf_freed(struct bpf_d *d)
2754 {
2755         /*
2756          * We don't need to lock out interrupts since this descriptor has
2757          * been detached from its interface and it yet hasn't been marked
2758          * free.
2759          */
2760         if (d->bd_hbuf_read)
2761                 panic("bpf buffer freed during read");
2762
2763         if (d->bd_sbuf != 0) {
2764                 FREE(d->bd_sbuf, M_DEVBUF);
2765                 if (d->bd_hbuf != 0)
2766                         FREE(d->bd_hbuf, M_DEVBUF);
2767                 if (d->bd_fbuf != 0)
2768                         FREE(d->bd_fbuf, M_DEVBUF);
2769         }
2770         if (d->bd_filter)
2771                 FREE((caddr_t)d->bd_filter, M_DEVBUF);
2772 }
2773
2774 /*
2775  * Attach an interface to bpf.  driverp is a pointer to a (struct bpf_if *)
2776  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
2777  * size of the link header (variable length headers not yet supported).
2778  */
2779 void
2780 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2781 {
2782         bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
2783 }
2784
2785 errno_t
2786 bpf_attach(
2787         ifnet_t                 ifp,
2788         u_int32_t               dlt,
2789         u_int32_t               hdrlen,
2790         bpf_send_func   send,
2791         bpf_tap_func    tap)
2792 {
2793         struct bpf_if *bp;
2794         struct bpf_if *bp_new;
2795         struct bpf_if *bp_before_first = NULL;
2796         struct bpf_if *bp_first = NULL;
2797         struct bpf_if *bp_last = NULL;
2798         boolean_t found;
2799
2800         bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF,
2801             M_WAIT | M_ZERO);
2802         if (bp_new == 0)
2803                 panic("bpfattach");
2804
2805         lck_mtx_lock(bpf_mlock);
2806
2807         /*
2808          * Check if this interface/dlt is already attached. Remember the
2809          * first and last attachment for this interface, as well as the
2810          * element before the first attachment.
2811          */
2812         found = FALSE;
2813         for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
2814                 if (bp->bif_ifp != ifp) {
2815                         if (bp_first != NULL) {
2816                                 /* no more elements for this interface */
2817                                 break;
2818                         }
2819                         bp_before_first = bp;
2820                 } else {
2821                         if (bp->bif_dlt == dlt) {
2822                                 found = TRUE;
2823                                 break;
2824                         }
2825                         if (bp_first == NULL) {
2826                                 bp_first = bp;
2827                         }
2828                         bp_last = bp;
2829                 }
2830         }
2831         if (found) {
2832                 lck_mtx_unlock(bpf_mlock);
2833                 printf("bpfattach - %s with dlt %d is already attached\n",
2834                         if_name(ifp), dlt);
2835                 FREE(bp_new, M_DEVBUF);
2836                 return EEXIST;
2837         }
2838
2839         bp_new->bif_ifp = ifp;
2840         bp_new->bif_dlt = dlt;
2841         bp_new->bif_send = send;
2842         bp_new->bif_tap = tap;
2843
2844         if (bp_first == NULL) {
2845                 /* No other entries for this ifp */
2846                 bp_new->bif_next = bpf_iflist;
2847                 bpf_iflist = bp_new;
2848         }
2849         else {
2850                 if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
2851                         /* Make this the first entry for this interface */
2852                         if (bp_before_first != NULL) {
2853                                 /*  point the previous to us */
2854                                 bp_before_first->bif_next = bp_new;
2855                         } else {
2856                                 /* we're the new head */
2857                                 bpf_iflist = bp_new;
2858                         }
2859                         bp_new->bif_next = bp_first;
2860                 } else {
2861                         /* Add this after the last entry for this interface */
2862                         bp_new->bif_next = bp_last->bif_next;
2863                         bp_last->bif_next = bp_new;
2864                 }
2865         }
2866
2867         /*
2868          * Compute the length of the bpf header.  This is not necessarily
2869          * equal to SIZEOF_BPF_HDR because we want to insert spacing such
2870          * that the network layer header begins on a longword boundary (for
2871          * performance reasons and to alleviate alignment restrictions).
2872          */
2873         bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
2874         bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
2875             sizeof(struct bpf_hdr_ext)) - hdrlen;
2876
2877         /* Take a reference on the interface */
2878         ifnet_reference(ifp);
2879
2880         lck_mtx_unlock(bpf_mlock);
2881
2882 #ifndef __APPLE__
2883         if (bootverbose)
2884                 printf("bpf: %s attached\n", if_name(ifp));
2885 #endif
2886
2887         return 0;
2888 }
2889
2890 /*
2891  * Detach bpf from an interface.  This involves detaching each descriptor
2892  * associated with the interface, and leaving bd_bif NULL.  Notify each
2893  * descriptor as it's detached so that any sleepers wake up and get
2894  * ENXIO.
2895  */
2896 void
2897 bpfdetach(struct ifnet *ifp)
2898 {
2899         struct bpf_if   *bp, *bp_prev, *bp_next;
2900         struct bpf_d    *d;
2901
2902         if (bpf_debug != 0)
2903                 printf("%s: %s\n", __func__, if_name(ifp));
2904
2905         lck_mtx_lock(bpf_mlock);
2906
2907         /*
2908          * Build the list of devices attached to that interface
2909          * that we need to free while keeping the lock to maintain
2910          * the integrity of the interface list
2911          */
2912         bp_prev = NULL;
2913         for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
2914                 bp_next = bp->bif_next;
2915
2916                 if (ifp != bp->bif_ifp) {
2917                         bp_prev = bp;
2918                         continue;
2919                 }
2920                 /* Unlink from the interface list */
2921                 if (bp_prev)
2922                         bp_prev->bif_next = bp->bif_next;
2923                 else
2924                         bpf_iflist = bp->bif_next;
2925
2926                 /* Detach the devices attached to the interface */
2927                 while ((d = bp->bif_dlist) != NULL) {
2928                         /*
2929                          * Take an extra reference to prevent the device
2930                          * from being freed when bpf_detachd() releases
2931                          * the reference for the interface list
2932                          */
2933                         bpf_acquire_d(d);
2934                         bpf_detachd(d, 0);
2935                         bpf_wakeup(d);
2936                         bpf_release_d(d);
2937                 }
2938                 ifnet_release(ifp);
2939         }
2940
2941         lck_mtx_unlock(bpf_mlock);
2942 }
2943
2944 void
2945 bpf_init(__unused void *unused)
2946 {
2947 #ifdef __APPLE__
2948         int     i;
2949         int     maj;
2950
2951         if (bpf_devsw_installed == 0) {
2952                 bpf_devsw_installed = 1;
2953                 bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
2954                 bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
2955                 bpf_mlock_attr = lck_attr_alloc_init();
2956                 lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
2957                 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
2958                 if (maj == -1) {
2959                         if (bpf_mlock_attr)
2960                                 lck_attr_free(bpf_mlock_attr);
2961                         if (bpf_mlock_grp)
2962                                 lck_grp_free(bpf_mlock_grp);
2963                         if (bpf_mlock_grp_attr)
2964                                 lck_grp_attr_free(bpf_mlock_grp_attr);
2965
2966                         bpf_mlock = NULL;
2967                         bpf_mlock_attr = NULL;
2968                         bpf_mlock_grp = NULL;
2969                         bpf_mlock_grp_attr = NULL;
2970                         bpf_devsw_installed = 0;
2971                         printf("bpf_init: failed to allocate a major number!\n");
2972                         return;
2973                 }
2974
2975                 for (i = 0 ; i < NBPFILTER; i++)
2976                         bpf_make_dev_t(maj);
2977         }
2978 #else
2979         cdevsw_add(&bpf_cdevsw);
2980 #endif
2981 }
2982
2983 #ifndef __APPLE__
2984 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
2985 #endif
2986
2987 #if CONFIG_MACF_NET
2988 struct label *
2989 mac_bpfdesc_label_get(struct bpf_d *d)
2990 {
2991
2992         return (d->bd_label);
2993 }
2994
2995 void
2996 mac_bpfdesc_label_set(struct bpf_d *d, struct label *label)
2997 {
2998
2999         d->bd_label = label;
3000 }
3001 #endif