bsd/net/bpf.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1990, 1991, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  *
  32  * This code is derived from the Stanford/CMU enet packet filter,
  33  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  34  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  35  * Berkeley Laboratory.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)bpf.c       8.2 (Berkeley) 3/28/94
  66  *
  67  * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
  68  */
  69 /*
  70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  71  * support for mandatory and extensible security protections.  This notice
  72  * is included in support of clause 2.2 (b) of the Apple Public License,
  73  * Version 2.0.
  74  */
  75
  76 #include "bpf.h"
  77
  78 #ifndef __GNUC__
  79 #define inline
  80 #else
  81 #define inline __inline
  82 #endif
  83
  84 #include <sys/param.h>
  85 #include <sys/systm.h>
  86 #include <sys/conf.h>
  87 #include <sys/malloc.h>
  88 #include <sys/mbuf.h>
  89 #include <sys/time.h>
  90 #include <sys/proc.h>
  91 #include <sys/signalvar.h>
  92 #include <sys/filio.h>
  93 #include <sys/sockio.h>
  94 #include <sys/ttycom.h>
  95 #include <sys/filedesc.h>
  96 #include <sys/uio_internal.h>
  97 #include <sys/file_internal.h>
  98 #include <sys/event.h>
  99
 100 #include <sys/poll.h>
 101
 102 #include <sys/socket.h>
 103 #include <sys/socketvar.h>
 104 #include <sys/vnode.h>
 105
 106 #include <net/if.h>
 107 #include <net/bpf.h>
 108 #include <net/bpfdesc.h>
 109
 110 #include <netinet/in.h>
 111 #include <netinet/in_pcb.h>
 112 #include <netinet/in_var.h>
 113 #include <netinet/ip_var.h>
 114 #include <netinet/tcp.h>
 115 #include <netinet/tcp_var.h>
 116 #include <netinet/udp.h>
 117 #include <netinet/udp_var.h>
 118 #include <netinet/if_ether.h>
 119 #include <sys/kernel.h>
 120 #include <sys/sysctl.h>
 121 #include <net/firewire.h>
 122
 123 #include <miscfs/devfs/devfs.h>
 124 #include <net/dlil.h>
 125
 126 #include <kern/locks.h>
 127 #include <kern/thread_call.h>
 128
 129 #if CONFIG_MACF_NET
 130 #include <security/mac_framework.h>
 131 #endif /* MAC_NET */
 132
 133 extern int tvtohz(struct timeval *);
 134
 135 #define BPF_BUFSIZE 4096
 136 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
 137
 138
 139 #define PRINET  26                      /* interruptible */
 140
 141 /*
 142  * The default read buffer size is patchable.
 143  */
 144 static unsigned int bpf_bufsize = BPF_BUFSIZE;
 145 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
 146         &bpf_bufsize, 0, "");
 147 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
 148 SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
 149         &bpf_maxbufsize, 0, "");
 150 static unsigned int bpf_maxdevices = 256;
 151 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
 152         &bpf_maxdevices, 0, "");
 153
 154 /*
 155  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
 156  *  bpf_dtab holds pointer to the descriptors, indexed by minor device #
 157  */
 158 static struct bpf_if    *bpf_iflist;
 159 #ifdef __APPLE__
 160 /*
 161  * BSD now stores the bpf_d in the dev_t which is a struct
 162  * on their system. Our dev_t is an int, so we still store
 163  * the bpf_d in a separate table indexed by minor device #.
 164  *
 165  * The value stored in bpf_dtab[n] represent three states:
 166  *  0: device not opened
 167  *  1: device opening or closing
 168  *  other: device <n> opened with pointer to storage
 169  */
 170 static struct bpf_d     **bpf_dtab = NULL;
 171 static unsigned int bpf_dtab_size = 0;
 172 static unsigned int     nbpfilter = 0;
 173
 174 decl_lck_mtx_data(static, bpf_mlock_data);
 175 static lck_mtx_t                *bpf_mlock = &bpf_mlock_data;
 176 static lck_grp_t                *bpf_mlock_grp;
 177 static lck_grp_attr_t   *bpf_mlock_grp_attr;
 178 static lck_attr_t               *bpf_mlock_attr;
 179
 180 /*
 181  * Mark a descriptor free by making it point to itself.
 182  * This is probably cheaper than marking with a constant since
 183  * the address should be in a register anyway.
 184  */
 185 #endif /* __APPLE__ */
 186
 187 static int      bpf_allocbufs(struct bpf_d *);
 188 static errno_t  bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
 189 static void     bpf_detachd(struct bpf_d *d);
 190 static void     bpf_freed(struct bpf_d *);
 191 static void     bpf_mcopy(const void *, void *, size_t);
 192 static int      bpf_movein(struct uio *, int,
 193                     struct mbuf **, struct sockaddr *, int *);
 194 static int      bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt);
 195 static void bpf_timed_out(void *, void *);
 196 static void bpf_wakeup(struct bpf_d *);
 197 static void     catchpacket(struct bpf_d *, u_char *, struct mbuf *, u_int,
 198                     u_int, int, void (*)(const void *, void *, size_t));
 199 static void     reset_d(struct bpf_d *);
 200 static int bpf_setf(struct bpf_d *, u_int bf_len, user_addr_t bf_insns);
 201 static int      bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
 202 static int      bpf_setdlt(struct bpf_d *, u_int);
 203 static int      bpf_set_traffic_class(struct bpf_d *, int);
 204 static void     bpf_set_packet_service_class(struct mbuf *, int);
 205
 206 /*static  void *bpf_devfs_token[MAXBPFILTER];*/
 207
 208 static  int bpf_devsw_installed;
 209
 210 void bpf_init(void *unused);
 211 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
 212
 213 /*
 214  * Darwin differs from BSD here, the following are static
 215  * on BSD and not static on Darwin.
 216  */
 217         d_open_t            bpfopen;
 218         d_close_t           bpfclose;
 219         d_read_t            bpfread;
 220         d_write_t           bpfwrite;
 221         ioctl_fcn_t         bpfioctl;
 222         select_fcn_t        bpfselect;
 223
 224
 225 /* Darwin's cdevsw struct differs slightly from BSDs */
 226 #define CDEV_MAJOR 23
 227 static struct cdevsw bpf_cdevsw = {
 228         /* open */          bpfopen,
 229         /* close */         bpfclose,
 230         /* read */          bpfread,
 231         /* write */         bpfwrite,
 232         /* ioctl */         bpfioctl,
 233         /* stop */          eno_stop,
 234         /* reset */         eno_reset,
 235         /* tty */           NULL,
 236         /* select */        bpfselect,
 237         /* mmap */          eno_mmap,
 238         /* strategy*/       eno_strat,
 239         /* getc */          eno_getc,
 240         /* putc */          eno_putc,
 241         /* type */          0
 242 };
 243
 244 #define SOCKADDR_HDR_LEN           offsetof(struct sockaddr, sa_data)
 245
 246 static int
 247 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *sockp, int *datlen)
 248 {
 249         struct mbuf *m;
 250         int error;
 251         int len;
 252         uint8_t sa_family;
 253         int hlen;
 254
 255         switch (linktype) {
 256
 257 #if SLIP
 258         case DLT_SLIP:
 259                 sa_family = AF_INET;
 260                 hlen = 0;
 261                 break;
 262 #endif /* SLIP */
 263
 264         case DLT_EN10MB:
 265                 sa_family = AF_UNSPEC;
 266                 /* XXX Would MAXLINKHDR be better? */
 267                 hlen = sizeof(struct ether_header);
 268                 break;
 269
 270 #if FDDI
 271         case DLT_FDDI:
 272         #if defined(__FreeBSD__) || defined(__bsdi__)
 273                 sa_family = AF_IMPLINK;
 274                 hlen = 0;
 275         #else
 276                 sa_family = AF_UNSPEC;
 277                 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
 278                 hlen = 24;
 279         #endif
 280                 break;
 281 #endif /* FDDI */
 282
 283         case DLT_RAW:
 284         case DLT_NULL:
 285                 sa_family = AF_UNSPEC;
 286                 hlen = 0;
 287                 break;
 288
 289         #ifdef __FreeBSD__
 290         case DLT_ATM_RFC1483:
 291                 /*
 292                  * en atm driver requires 4-byte atm pseudo header.
 293                  * though it isn't standard, vpi:vci needs to be
 294                  * specified anyway.
 295                  */
 296                 sa_family = AF_UNSPEC;
 297                 hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
 298                 break;
 299         #endif
 300
 301         case DLT_PPP:
 302                 sa_family = AF_UNSPEC;
 303                 hlen = 4;       /* This should match PPP_HDRLEN */
 304                 break;
 305
 306         case DLT_APPLE_IP_OVER_IEEE1394:
 307                 sa_family = AF_UNSPEC;
 308                 hlen = sizeof(struct firewire_header);
 309                 break;
 310
 311         case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
 312                 sa_family = AF_IEEE80211;
 313                 hlen = 0;
 314                 break;
 315
 316         case DLT_IEEE802_11_RADIO:
 317                 sa_family = AF_IEEE80211;
 318                 hlen = 0;
 319                 break;
 320
 321         default:
 322                 return (EIO);
 323         }
 324
 325         // LP64todo - fix this!
 326         len = uio_resid(uio);
 327         *datlen = len - hlen;
 328         if ((unsigned)len > MCLBYTES)
 329                 return (EIO);
 330
 331         if (sockp) {
 332                 /*
 333                  * Build a sockaddr based on the data link layer type.
 334                  * We do this at this level because the ethernet header
 335                  * is copied directly into the data field of the sockaddr.
 336                  * In the case of SLIP, there is no header and the packet
 337                  * is forwarded as is.
 338                  * Also, we are careful to leave room at the front of the mbuf
 339                  * for the link level header.
 340                  */
 341                 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
 342                         return (EIO);
 343                 }
 344                 sockp->sa_family = sa_family;
 345         } else {
 346                 /*
 347                  * We're directly sending the packet data supplied by
 348                  * the user; we don't need to make room for the link
 349                  * header, and don't need the header length value any
 350                  * more, so set it to 0.
 351                  */
 352                 hlen = 0;
 353         }
 354
 355         MGETHDR(m, M_WAIT, MT_DATA);
 356         if (m == 0)
 357                 return (ENOBUFS);
 358         if ((unsigned)len > MHLEN) {
 359                 MCLGET(m, M_WAIT);
 360                 if ((m->m_flags & M_EXT) == 0) {
 361                         error = ENOBUFS;
 362                         goto bad;
 363                 }
 364         }
 365         m->m_pkthdr.len = m->m_len = len;
 366         m->m_pkthdr.rcvif = NULL;
 367         *mp = m;
 368
 369         /*
 370          * Make room for link header.
 371          */
 372         if (hlen != 0) {
 373                 m->m_pkthdr.len -= hlen;
 374                 m->m_len -= hlen;
 375                 m->m_data += hlen; /* XXX */
 376                 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
 377                 if (error)
 378                         goto bad;
 379         }
 380         error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
 381         if (error)
 382                 goto bad;
 383
 384         /* Check for multicast destination */
 385         switch (linktype) {
 386                 case DLT_EN10MB: {
 387                         struct ether_header *eh = mtod(m, struct ether_header *);
 388
 389                         if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 390                                 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0)
 391                                         m->m_flags |= M_BCAST;
 392                                 else
 393                                         m->m_flags |= M_MCAST;
 394                         }
 395                         break;
 396                 }
 397         }
 398
 399         return 0;
 400  bad:
 401         m_freem(m);
 402         return (error);
 403 }
 404
 405 #ifdef __APPLE__
 406
 407 /*
 408  * The dynamic addition of a new device node must block all processes that are opening
 409  * the last device so that no process will get an unexpected ENOENT
 410  */
 411 static void
 412 bpf_make_dev_t(int maj)
 413 {
 414         static int              bpf_growing = 0;
 415         unsigned int    cur_size = nbpfilter, i;
 416
 417         if (nbpfilter >= bpf_maxdevices)
 418                 return;
 419
 420         while (bpf_growing) {
 421                 /* Wait until new device has been created */
 422                 (void)tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
 423         }
 424         if (nbpfilter > cur_size) {
 425                 /* other thread grew it already */
 426                 return;
 427         }
 428         bpf_growing = 1;
 429
 430         /* need to grow bpf_dtab first */
 431         if (nbpfilter == bpf_dtab_size) {
 432                 int new_dtab_size;
 433                 struct bpf_d **new_dtab = NULL;
 434                 struct bpf_d **old_dtab = NULL;
 435
 436                 new_dtab_size = bpf_dtab_size + NBPFILTER;
 437                 new_dtab = (struct bpf_d **)_MALLOC(sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
 438                 if (new_dtab == 0) {
 439                         printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
 440                         goto done;
 441                 }
 442                 if (bpf_dtab) {
 443                         bcopy(bpf_dtab, new_dtab,
 444                                   sizeof(struct bpf_d *) * bpf_dtab_size);
 445                 }
 446                 bzero(new_dtab + bpf_dtab_size,
 447                           sizeof(struct bpf_d *) * NBPFILTER);
 448                 old_dtab = bpf_dtab;
 449                 bpf_dtab = new_dtab;
 450                 bpf_dtab_size = new_dtab_size;
 451                 if (old_dtab != NULL)
 452                         _FREE(old_dtab, M_DEVBUF);
 453         }
 454         i = nbpfilter++;
 455         (void) devfs_make_node(makedev(maj, i),
 456                                 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
 457                                 "bpf%d", i);
 458 done:
 459         bpf_growing = 0;
 460         wakeup((caddr_t)&bpf_growing);
 461 }
 462
 463 #endif
 464
 465 /*
 466  * Attach file to the bpf interface, i.e. make d listen on bp.
 467  */
 468 static errno_t
 469 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 470 {
 471         int first = bp->bif_dlist == NULL;
 472         int     error = 0;
 473
 474         /*
 475          * Point d at bp, and add d to the interface's list of listeners.
 476          * Finally, point the driver's bpf cookie at the interface so
 477          * it will divert packets to bpf.
 478          */
 479         d->bd_bif = bp;
 480         d->bd_next = bp->bif_dlist;
 481         bp->bif_dlist = d;
 482
 483         if (first) {
 484                 /* Find the default bpf entry for this ifp */
 485                 if (bp->bif_ifp->if_bpf == NULL) {
 486                         struct bpf_if   *primary;
 487
 488                         for (primary = bpf_iflist; primary && primary->bif_ifp != bp->bif_ifp;
 489                                  primary = primary->bif_next)
 490                                 ;
 491
 492                         bp->bif_ifp->if_bpf = primary;
 493                 }
 494
 495                 /* Only call dlil_set_bpf_tap for primary dlt */
 496                 if (bp->bif_ifp->if_bpf == bp)
 497                         dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT, bpf_tap_callback);
 498
 499                 if (bp->bif_tap)
 500                         error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT);
 501         }
 502
 503         return error;
 504 }
 505
 506 /*
 507  * Detach a file from its interface.
 508  */
 509 static void
 510 bpf_detachd(struct bpf_d *d)
 511 {
 512         struct bpf_d **p;
 513         struct bpf_if *bp;
 514         struct ifnet  *ifp;
 515
 516         ifp = d->bd_bif->bif_ifp;
 517         bp = d->bd_bif;
 518
 519         /* Remove d from the interface's descriptor list. */
 520         p = &bp->bif_dlist;
 521         while (*p != d) {
 522                 p = &(*p)->bd_next;
 523                 if (*p == 0)
 524                         panic("bpf_detachd: descriptor not in list");
 525         }
 526         *p = (*p)->bd_next;
 527         if (bp->bif_dlist == 0) {
 528                 /*
 529                  * Let the driver know that there are no more listeners.
 530                  */
 531                 /* Only call dlil_set_bpf_tap for primary dlt */
 532                 if (bp->bif_ifp->if_bpf == bp)
 533                         dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
 534                 if (bp->bif_tap)
 535                         bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
 536
 537                 for (bp = bpf_iflist; bp; bp = bp->bif_next)
 538                         if (bp->bif_ifp == ifp && bp->bif_dlist != 0)
 539                                 break;
 540                 if (bp == NULL)
 541                         ifp->if_bpf = NULL;
 542         }
 543         d->bd_bif = NULL;
 544         /*
 545          * Check if this descriptor had requested promiscuous mode.
 546          * If so, turn it off.
 547          */
 548         if (d->bd_promisc) {
 549                 d->bd_promisc = 0;
 550                 lck_mtx_unlock(bpf_mlock);
 551                 if (ifnet_set_promiscuous(ifp, 0)) {
 552                         /*
 553                          * Something is really wrong if we were able to put
 554                          * the driver into promiscuous mode, but can't
 555                          * take it out.
 556                          * Most likely the network interface is gone.
 557                          */
 558                         printf("bpf: ifnet_set_promiscuous failed");
 559                 }
 560                 lck_mtx_lock(bpf_mlock);
 561         }
 562 }
 563
 564
 565 /*
 566  * Start asynchronous timer, if necessary.
 567  * Must be called with bpf_mlock held.
 568  */
 569 static void
 570 bpf_start_timer(struct bpf_d *d)
 571 {
 572         uint64_t deadline;
 573         struct timeval tv;
 574
 575         if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
 576                 tv.tv_sec = d->bd_rtout / hz;
 577                 tv.tv_usec = (d->bd_rtout % hz) * tick;
 578
 579                 clock_interval_to_deadline((uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
 580                                 NSEC_PER_USEC,
 581                                 &deadline);
 582                 /*
 583                  * The state is BPF_IDLE, so the timer hasn't
 584                  * been started yet, and hasn't gone off yet;
 585                  * there is no thread call scheduled, so this
 586                  * won't change the schedule.
 587                  *
 588                  * XXX - what if, by the time it gets entered,
 589                  * the deadline has already passed?
 590                  */
 591                 thread_call_enter_delayed(d->bd_thread_call, deadline);
 592                 d->bd_state = BPF_WAITING;
 593         }
 594 }
 595
 596 /*
 597  * Cancel asynchronous timer.
 598  * Must be called with bpf_mlock held.
 599  */
 600 static boolean_t
 601 bpf_stop_timer(struct bpf_d *d)
 602 {
 603         /*
 604          * If the timer has already gone off, this does nothing.
 605          * Our caller is expected to set d->bd_state to BPF_IDLE,
 606          * with the bpf_mlock, after we are called. bpf_timed_out()
 607          * also grabs bpf_mlock, so, if the timer has gone off and
 608          * bpf_timed_out() hasn't finished, it's waiting for the
 609          * lock; when this thread releases the lock, it will
 610          * find the state is BPF_IDLE, and just release the
 611          * lock and return.
 612          */
 613         return (thread_call_cancel(d->bd_thread_call));
 614 }
 615
 616
 617
 618 /*
 619  * Open ethernet device.  Returns ENXIO for illegal minor device number,
 620  * EBUSY if file is open by another process.
 621  */
 622 /* ARGSUSED */
 623 int
 624 bpfopen(dev_t dev, int flags, __unused int fmt,
 625         __unused struct proc *p)
 626 {
 627         struct bpf_d *d;
 628
 629         lck_mtx_lock(bpf_mlock);
 630         if ((unsigned int) minor(dev) >= nbpfilter) {
 631                 lck_mtx_unlock(bpf_mlock);
 632                 return (ENXIO);
 633         }
 634         /*
 635          * New device nodes are created on demand when opening the last one.
 636          * The programming model is for processes to loop on the minor starting at 0
 637          * as long as EBUSY is returned. The loop stops when either the open succeeds or
 638          * an error other that EBUSY is returned. That means that bpf_make_dev_t() must
 639          * block all processes that are opening the last  node. If not all
 640          * processes are blocked, they could unexpectedly get ENOENT and abort their
 641          * opening loop.
 642          */
 643         if ((unsigned int) minor(dev) == (nbpfilter - 1))
 644                 bpf_make_dev_t(major(dev));
 645
 646         /*
 647          * Each minor can be opened by only one process.  If the requested
 648          * minor is in use, return EBUSY.
 649          *
 650          * Important: bpfopen() and bpfclose() have to check and set the status of a device
 651          * in the same lockin context otherwise the device may be leaked because the vnode use count
 652          * will be unpextectly greater than 1 when close() is called.
 653          */
 654         if (bpf_dtab[minor(dev)] == 0) {
 655                 bpf_dtab[minor(dev)] = (void *)1;       /* Mark opening */
 656         } else {
 657                 lck_mtx_unlock(bpf_mlock);
 658                 return (EBUSY);
 659         }
 660         d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF, M_WAIT);
 661         if (d == NULL) {
 662                 /* this really is a catastrophic failure */
 663                 printf("bpfopen: malloc bpf_d failed\n");
 664                 bpf_dtab[minor(dev)] = NULL;
 665                 lck_mtx_unlock(bpf_mlock);
 666                 return ENOMEM;
 667         }
 668         bzero(d, sizeof(struct bpf_d));
 669
 670         /*
 671          * It is not necessary to take the BPF lock here because no other
 672          * thread can access the device until it is marked opened...
 673          */
 674
 675         /* Mark "in use" and do most initialization. */
 676         d->bd_bufsize = bpf_bufsize;
 677         d->bd_sig = SIGIO;
 678         d->bd_seesent = 1;
 679         d->bd_oflags = flags;
 680         d->bd_state = BPF_IDLE;
 681         d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
 682         d->bd_traffic_class = SO_TC_BE;
 683
 684         if (d->bd_thread_call == NULL) {
 685                 printf("bpfopen: malloc thread call failed\n");
 686                 bpf_dtab[minor(dev)] = NULL;
 687                 lck_mtx_unlock(bpf_mlock);
 688                 _FREE(d, M_DEVBUF);
 689                 return ENOMEM;
 690         }
 691 #if CONFIG_MACF_NET
 692         mac_bpfdesc_label_init(d);
 693         mac_bpfdesc_label_associate(kauth_cred_get(), d);
 694 #endif
 695         bpf_dtab[minor(dev)] = d;                               /* Mark opened */
 696         lck_mtx_unlock(bpf_mlock);
 697
 698         return (0);
 699 }
 700
 701 /*
 702  * Close the descriptor by detaching it from its interface,
 703  * deallocating its buffers, and marking it free.
 704  */
 705 /* ARGSUSED */
 706 int
 707 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
 708          __unused struct proc *p)
 709 {
 710         struct bpf_d *d;
 711
 712         /* Take BPF lock to ensure no other thread is using the device */
 713         lck_mtx_lock(bpf_mlock);
 714
 715         d = bpf_dtab[minor(dev)];
 716         if (d == 0 || d == (void *)1) {
 717                 lck_mtx_unlock(bpf_mlock);
 718                 return (ENXIO);
 719         }
 720         bpf_dtab[minor(dev)] = (void *)1;               /* Mark closing */
 721
 722         /*
 723          * Deal with any in-progress timeouts.
 724          */
 725         switch (d->bd_state) {
 726                 case BPF_IDLE:
 727                         /*
 728                          * Not waiting for a timeout, and no timeout happened.
 729                          */
 730                         break;
 731
 732                 case BPF_WAITING:
 733                         /*
 734                          * Waiting for a timeout.
 735                          * Cancel any timer that has yet to go off,
 736                          * and mark the state as "closing".
 737                          * Then drop the lock to allow any timers that
 738                          * *have* gone off to run to completion, and wait
 739                          * for them to finish.
 740                          */
 741                         if (!bpf_stop_timer(d)) {
 742                                 /*
 743                                  * There was no pending call, so the call must
 744                                  * have been in progress. Wait for the call to
 745                                  * complete; we have to drop the lock while
 746                                  * waiting. to let the in-progrss call complete
 747                                  */
 748                                 d->bd_state = BPF_DRAINING;
 749                                 while (d->bd_state == BPF_DRAINING)
 750                                         msleep((caddr_t)d, bpf_mlock, PRINET,
 751                                                         "bpfdraining", NULL);
 752                         }
 753                         d->bd_state = BPF_IDLE;
 754                         break;
 755
 756                 case BPF_TIMED_OUT:
 757                         /*
 758                          * Timer went off, and the timeout routine finished.
 759                          */
 760                         d->bd_state = BPF_IDLE;
 761                         break;
 762
 763                 case BPF_DRAINING:
 764                         /*
 765                          * Another thread is blocked on a close waiting for
 766                          * a timeout to finish.
 767                          * This "shouldn't happen", as the first thread to enter
 768                          * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
 769                          * all subsequent threads should see that and fail with
 770                          * ENXIO.
 771                          */
 772                         panic("Two threads blocked in a BPF close");
 773                         break;
 774         }
 775
 776         if (d->bd_bif)
 777                 bpf_detachd(d);
 778         selthreadclear(&d->bd_sel);
 779 #if CONFIG_MACF_NET
 780         mac_bpfdesc_label_destroy(d);
 781 #endif
 782         thread_call_free(d->bd_thread_call);
 783         bpf_freed(d);
 784
 785         /* Mark free in same context as bpfopen comes to check */
 786         bpf_dtab[minor(dev)] = NULL;                    /* Mark closed */
 787         lck_mtx_unlock(bpf_mlock);
 788
 789         _FREE(d, M_DEVBUF);
 790
 791         return (0);
 792 }
 793
 794
 795 #define BPF_SLEEP bpf_sleep
 796
 797 static int
 798 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
 799 {
 800         u_int64_t abstime = 0;
 801
 802         if(timo)
 803                 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
 804
 805         return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
 806 }
 807
 808 static struct inpcb *
 809 bpf_findinpcb(struct inpcbinfo *pcbinfo, uint32_t flowhash)
 810 {
 811         struct inpcb *inp = NULL;
 812
 813         if (!flowhash) return (NULL);
 814
 815         lck_rw_lock_shared(pcbinfo->mtx);
 816         LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
 817                 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
 818                         if (inp->inp_flowhash == flowhash)
 819                                 break;
 820                         in_pcb_checkstate(inp, WNT_RELEASE, 0);
 821                 }
 822         }
 823         lck_rw_done(pcbinfo->mtx);
 824
 825         return (inp);
 826 }
 827
 828 /*
 829  * Rotate the packet buffers in descriptor d.  Move the store buffer
 830  * into the hold slot, and the free buffer into the store slot.
 831  * Zero the length of the new store buffer.
 832  */
 833 #define ROTATE_BUFFERS(d) \
 834         (d)->bd_hbuf = (d)->bd_sbuf; \
 835         (d)->bd_hlen = (d)->bd_slen; \
 836         (d)->bd_sbuf = (d)->bd_fbuf; \
 837         (d)->bd_slen = 0; \
 838         (d)->bd_fbuf = NULL;
 839 /*
 840  *  bpfread - read next chunk of packets from buffers
 841  */
 842 int
 843 bpfread(dev_t dev, struct uio *uio, int ioflag)
 844 {
 845         struct bpf_d *d;
 846         int timed_out;
 847         int error;
 848
 849         lck_mtx_lock(bpf_mlock);
 850
 851         d = bpf_dtab[minor(dev)];
 852         if (d == 0 || d == (void *)1) {
 853                 lck_mtx_unlock(bpf_mlock);
 854                 return (ENXIO);
 855         }
 856
 857         /*
 858          * Restrict application to use a buffer the same size as
 859          * as kernel buffers.
 860          */
 861         if (uio_resid(uio) != d->bd_bufsize) {
 862                 lck_mtx_unlock(bpf_mlock);
 863                 return (EINVAL);
 864         }
 865
 866         if (d->bd_state == BPF_WAITING)
 867                 bpf_stop_timer(d);
 868
 869         timed_out = (d->bd_state == BPF_TIMED_OUT);
 870         d->bd_state = BPF_IDLE;
 871
 872         /*
 873          * If the hold buffer is empty, then do a timed sleep, which
 874          * ends when the timeout expires or when enough packets
 875          * have arrived to fill the store buffer.
 876          */
 877         while (d->bd_hbuf == 0) {
 878                 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY))
 879                         && d->bd_slen != 0) {
 880                         /*
 881                          * We're in immediate mode, or are reading
 882                          * in non-blocking mode, or a timer was
 883                          * started before the read (e.g., by select()
 884                          * or poll()) and has expired and a packet(s)
 885                          * either arrived since the previous
 886                          * read or arrived while we were asleep.
 887                          * Rotate the buffers and return what's here.
 888                          */
 889                         ROTATE_BUFFERS(d);
 890                         break;
 891                 }
 892
 893                 /*
 894                  * No data is available, check to see if the bpf device
 895                  * is still pointed at a real interface.  If not, return
 896                  * ENXIO so that the userland process knows to rebind
 897                  * it before using it again.
 898                  */
 899                 if (d->bd_bif == NULL) {
 900                         lck_mtx_unlock(bpf_mlock);
 901                         return (ENXIO);
 902                 }
 903                 if (ioflag & IO_NDELAY) {
 904                         lck_mtx_unlock(bpf_mlock);
 905                         return (EWOULDBLOCK);
 906                 }
 907                 error = BPF_SLEEP(d, PRINET|PCATCH, "bpf",
 908                                   d->bd_rtout);
 909                 /*
 910                  * Make sure device is still opened
 911                  */
 912                 d = bpf_dtab[minor(dev)];
 913                 if (d == 0 || d == (void *)1) {
 914                         lck_mtx_unlock(bpf_mlock);
 915                         return (ENXIO);
 916                 }
 917                 if (error == EINTR || error == ERESTART) {
 918                         lck_mtx_unlock(bpf_mlock);
 919                         return (error);
 920                 }
 921                 if (error == EWOULDBLOCK) {
 922                         /*
 923                          * On a timeout, return what's in the buffer,
 924                          * which may be nothing.  If there is something
 925                          * in the store buffer, we can rotate the buffers.
 926                          */
 927                         if (d->bd_hbuf)
 928                                 /*
 929                                  * We filled up the buffer in between
 930                                  * getting the timeout and arriving
 931                                  * here, so we don't need to rotate.
 932                                  */
 933                                 break;
 934
 935                         if (d->bd_slen == 0) {
 936                                 lck_mtx_unlock(bpf_mlock);
 937                                 return (0);
 938                         }
 939                         ROTATE_BUFFERS(d);
 940                         break;
 941                 }
 942         }
 943         /*
 944          * At this point, we know we have something in the hold slot.
 945          */
 946
 947         /*
 948          * Before we move data to userland, we fill out the extended
 949          * header fields.
 950          */
 951         if (d->bd_extendedhdr) {
 952                 char *p;
 953
 954                 p = d->bd_hbuf;
 955                 while (p < d->bd_hbuf + d->bd_hlen) {
 956                         struct bpf_hdr_ext *ehp;
 957                         struct inpcb *inp;
 958                         uint32_t flowhash;
 959                         pid_t pid;
 960
 961                         ehp = (struct bpf_hdr_ext *)(void *)p;
 962                         if ((flowhash = ehp->bh_flowhash)) {
 963                                 if (ehp->bh_flags & BPF_HDR_EXT_FLAGS_TCP)
 964                                         inp = bpf_findinpcb(&tcbinfo, flowhash);
 965                                 else
 966                                         inp = bpf_findinpcb(&udbinfo, flowhash);
 967                                 if (inp) {
 968                                         socket_lock(inp->inp_socket, 0);
 969                                         pid = inp->inp_socket->last_pid;
 970                                         in_pcb_checkstate(inp, WNT_RELEASE, 1);
 971                                         socket_unlock(inp->inp_socket, 0);
 972                                         ehp->bh_pid = pid;
 973                                         proc_name(pid, ehp->bh_comm, MAXCOMLEN);
 974                                 }
 975                                 ehp->bh_flowhash = 0;
 976                         }
 977                         p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
 978                 }
 979         }
 980         /*
 981          * Move data from hold buffer into user space.
 982          * We know the entire buffer is transferred since
 983          * we checked above that the read buffer is bpf_bufsize bytes.
 984          */
 985         error = UIOMOVE(d->bd_hbuf, d->bd_hlen, UIO_READ, uio);
 986
 987         d->bd_fbuf = d->bd_hbuf;
 988         d->bd_hbuf = NULL;
 989         d->bd_hlen = 0;
 990         lck_mtx_unlock(bpf_mlock);
 991         return (error);
 992 }
 993
 994
 995 /*
 996  * If there are processes sleeping on this descriptor, wake them up.
 997  */
 998 static void
 999 bpf_wakeup(struct bpf_d *d)
1000 {
1001         if (d->bd_state == BPF_WAITING) {
1002                 bpf_stop_timer(d);
1003                 d->bd_state = BPF_IDLE;
1004         }
1005         wakeup((caddr_t)d);
1006         if (d->bd_async && d->bd_sig && d->bd_sigio)
1007                 pgsigio(d->bd_sigio, d->bd_sig);
1008
1009         selwakeup(&d->bd_sel);
1010         KNOTE(&d->bd_sel.si_note, 1);
1011 #ifndef __APPLE__
1012         /* XXX */
1013         d->bd_sel.si_pid = 0;
1014 #endif
1015 }
1016
1017
1018 static void
1019 bpf_timed_out(void *arg, __unused void *dummy)
1020 {
1021         struct bpf_d *d = (struct bpf_d *)arg;
1022
1023         lck_mtx_lock(bpf_mlock);
1024         if (d->bd_state == BPF_WAITING) {
1025                 /*
1026                  * There's a select or kqueue waiting for this; if there's
1027                  * now stuff to read, wake it up.
1028                  */
1029                 d->bd_state = BPF_TIMED_OUT;
1030                 if (d->bd_slen != 0)
1031                         bpf_wakeup(d);
1032         } else if (d->bd_state == BPF_DRAINING) {
1033                 /*
1034                  * A close is waiting for this to finish.
1035                  * Mark it as finished, and wake the close up.
1036                  */
1037                 d->bd_state = BPF_IDLE;
1038                 bpf_wakeup(d);
1039         }
1040         lck_mtx_unlock(bpf_mlock);
1041 }
1042
1043
1044
1045
1046
1047 /* keep in sync with bpf_movein above: */
1048 #define MAX_DATALINK_HDR_LEN    (sizeof(struct firewire_header))
1049
1050 int
1051 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1052 {
1053         struct bpf_d *d;
1054         struct ifnet *ifp;
1055         struct mbuf *m = NULL;
1056         int error;
1057         char              dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1058         int datlen = 0;
1059     int bif_dlt;
1060     int bd_hdrcmplt;
1061
1062         lck_mtx_lock(bpf_mlock);
1063
1064         d = bpf_dtab[minor(dev)];
1065         if (d == 0 || d == (void *)1) {
1066                 lck_mtx_unlock(bpf_mlock);
1067                 return (ENXIO);
1068         }
1069         if (d->bd_bif == 0) {
1070                 lck_mtx_unlock(bpf_mlock);
1071                 return (ENXIO);
1072         }
1073
1074         ifp = d->bd_bif->bif_ifp;
1075
1076         if ((ifp->if_flags & IFF_UP) == 0) {
1077                 lck_mtx_unlock(bpf_mlock);
1078                 return (ENETDOWN);
1079         }
1080         if (uio_resid(uio) == 0) {
1081                 lck_mtx_unlock(bpf_mlock);
1082                 return (0);
1083         }
1084         ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1085
1086         /*
1087          * fix for PR-6849527
1088          * geting variables onto stack before dropping lock for bpf_movein()
1089          */
1090         bif_dlt = (int)d->bd_bif->bif_dlt;
1091         bd_hdrcmplt  = d->bd_hdrcmplt;
1092
1093         /* bpf_movein allocating mbufs; drop lock */
1094         lck_mtx_unlock(bpf_mlock);
1095
1096         error = bpf_movein(uio, bif_dlt, &m,
1097         bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1098         &datlen);
1099
1100         if (error) {
1101                 return (error);
1102         }
1103
1104         /* taking the lock again and verifying whether device is open */
1105         lck_mtx_lock(bpf_mlock);
1106         d = bpf_dtab[minor(dev)];
1107         if (d == 0 || d == (void *)1) {
1108                 lck_mtx_unlock(bpf_mlock);
1109                 m_freem(m);
1110                 return (ENXIO);
1111         }
1112
1113         if (d->bd_bif == NULL) {
1114                 lck_mtx_unlock(bpf_mlock);
1115                 m_free(m);
1116                 return (ENXIO);
1117         }
1118
1119         if ((unsigned)datlen > ifp->if_mtu) {
1120                 lck_mtx_unlock(bpf_mlock);
1121                 m_freem(m);
1122                 return (EMSGSIZE);
1123         }
1124
1125
1126 #if CONFIG_MACF_NET
1127         mac_mbuf_label_associate_bpfdesc(d, m);
1128 #endif
1129
1130         bpf_set_packet_service_class(m, d->bd_traffic_class);
1131
1132         lck_mtx_unlock(bpf_mlock);
1133
1134         if (d->bd_hdrcmplt) {
1135                 if (d->bd_bif->bif_send)
1136                         error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1137                 else
1138                         error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1139         } else {
1140                 error = dlil_output(ifp, PF_INET, m, NULL,
1141                     (struct sockaddr *)dst_buf, 0, NULL);
1142         }
1143
1144         /*
1145          * The driver frees the mbuf.
1146          */
1147         return (error);
1148 }
1149
1150 /*
1151  * Reset a descriptor by flushing its packet buffer and clearing the
1152  * receive and drop counts.
1153  */
1154 static void
1155 reset_d(struct bpf_d *d)
1156 {
1157         if (d->bd_hbuf) {
1158                 /* Free the hold buffer. */
1159                 d->bd_fbuf = d->bd_hbuf;
1160                 d->bd_hbuf = NULL;
1161         }
1162         d->bd_slen = 0;
1163         d->bd_hlen = 0;
1164         d->bd_rcount = 0;
1165         d->bd_dcount = 0;
1166 }
1167
1168 /*
1169  *  FIONREAD            Check for read packet available.
1170  *  SIOCGIFADDR         Get interface address - convenient hook to driver.
1171  *  BIOCGBLEN           Get buffer len [for read()].
1172  *  BIOCSETF            Set ethernet read filter.
1173  *  BIOCFLUSH           Flush read packet buffer.
1174  *  BIOCPROMISC         Put interface into promiscuous mode.
1175  *  BIOCGDLT            Get link layer type.
1176  *  BIOCGETIF           Get interface name.
1177  *  BIOCSETIF           Set interface.
1178  *  BIOCSRTIMEOUT       Set read timeout.
1179  *  BIOCGRTIMEOUT       Get read timeout.
1180  *  BIOCGSTATS          Get packet stats.
1181  *  BIOCIMMEDIATE       Set immediate mode.
1182  *  BIOCVERSION         Get filter language version.
1183  *  BIOCGHDRCMPLT       Get "header already complete" flag
1184  *  BIOCSHDRCMPLT       Set "header already complete" flag
1185  *  BIOCGSEESENT        Get "see packets sent" flag
1186  *  BIOCSSEESENT        Set "see packets sent" flag
1187  *  BIOCSETTC           Set traffic class.
1188  *  BIOCGETTC           Get traffic class.
1189  *  BIOCSEXTHDR         Set "extended header" flag
1190  */
1191 /* ARGSUSED */
1192 int
1193 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1194     struct proc *p)
1195 {
1196         struct bpf_d *d;
1197         int error = 0, int_arg;
1198         struct ifreq ifr;
1199
1200         lck_mtx_lock(bpf_mlock);
1201
1202         d = bpf_dtab[minor(dev)];
1203         if (d == 0 || d == (void *)1) {
1204                 lck_mtx_unlock(bpf_mlock);
1205                 return (ENXIO);
1206         }
1207
1208         if (d->bd_state == BPF_WAITING)
1209                 bpf_stop_timer(d);
1210         d->bd_state = BPF_IDLE;
1211
1212         switch (cmd) {
1213
1214         default:
1215                 error = EINVAL;
1216                 break;
1217
1218         /*
1219          * Check for read packet available.
1220          */
1221         case FIONREAD:                  /* int */
1222                 {
1223                         int n;
1224
1225                         n = d->bd_slen;
1226                         if (d->bd_hbuf)
1227                                 n += d->bd_hlen;
1228
1229                         bcopy(&n, addr, sizeof (n));
1230                         break;
1231                 }
1232
1233         case SIOCGIFADDR:               /* struct ifreq */
1234                 {
1235                         struct ifnet *ifp;
1236
1237                         if (d->bd_bif == 0)
1238                                 error = EINVAL;
1239                         else {
1240                                 ifp = d->bd_bif->bif_ifp;
1241                                 error = ifnet_ioctl(ifp, 0, cmd, addr);
1242                         }
1243                         break;
1244                 }
1245
1246         /*
1247          * Get buffer len [for read()].
1248          */
1249         case BIOCGBLEN:                 /* u_int */
1250                 bcopy(&d->bd_bufsize, addr, sizeof (u_int));
1251                 break;
1252
1253         /*
1254          * Set buffer length.
1255          */
1256         case BIOCSBLEN:                 /* u_int */
1257                 if (d->bd_bif != 0)
1258                         error = EINVAL;
1259                 else {
1260                         u_int size;
1261
1262                         bcopy(addr, &size, sizeof (size));
1263
1264                         if (size > bpf_maxbufsize)
1265                                 size = bpf_maxbufsize;
1266                         else if (size < BPF_MINBUFSIZE)
1267                                 size = BPF_MINBUFSIZE;
1268                         bcopy(&size, addr, sizeof (size));
1269                         d->bd_bufsize = size;
1270                 }
1271                 break;
1272
1273         /*
1274          * Set link layer read filter.
1275          */
1276         case BIOCSETF32: {              /* struct bpf_program32 */
1277                 struct bpf_program32 prg32;
1278
1279                 bcopy(addr, &prg32, sizeof (prg32));
1280                 error = bpf_setf(d, prg32.bf_len,
1281                     CAST_USER_ADDR_T(prg32.bf_insns));
1282                 break;
1283         }
1284
1285         case BIOCSETF64: {              /* struct bpf_program64 */
1286                 struct bpf_program64 prg64;
1287
1288                 bcopy(addr, &prg64, sizeof (prg64));
1289                 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns);
1290                 break;
1291         }
1292
1293         /*
1294          * Flush read packet buffer.
1295          */
1296         case BIOCFLUSH:
1297                 reset_d(d);
1298                 break;
1299
1300         /*
1301          * Put interface into promiscuous mode.
1302          */
1303         case BIOCPROMISC:
1304                 if (d->bd_bif == 0) {
1305                         /*
1306                          * No interface attached yet.
1307                          */
1308                         error = EINVAL;
1309                         break;
1310                 }
1311                 if (d->bd_promisc == 0) {
1312                         lck_mtx_unlock(bpf_mlock);
1313                         error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1314                         lck_mtx_lock(bpf_mlock);
1315                         if (error == 0)
1316                                 d->bd_promisc = 1;
1317                 }
1318                 break;
1319
1320         /*
1321          * Get device parameters.
1322          */
1323         case BIOCGDLT:                  /* u_int */
1324                 if (d->bd_bif == 0)
1325                         error = EINVAL;
1326                 else
1327                         bcopy(&d->bd_bif->bif_dlt, addr, sizeof (u_int));
1328                 break;
1329
1330         /*
1331          * Get a list of supported data link types.
1332          */
1333         case BIOCGDLTLIST:              /* struct bpf_dltlist */
1334                 if (d->bd_bif == NULL) {
1335                         error = EINVAL;
1336                 } else {
1337                         error = bpf_getdltlist(d, addr, p);
1338                 }
1339                 break;
1340
1341         /*
1342          * Set data link type.
1343          */
1344         case BIOCSDLT:                  /* u_int */
1345                 if (d->bd_bif == NULL) {
1346                         error = EINVAL;
1347                 } else {
1348                         u_int dlt;
1349
1350                         bcopy(addr, &dlt, sizeof (dlt));
1351                         error = bpf_setdlt(d, dlt);
1352                 }
1353                 break;
1354
1355         /*
1356          * Get interface name.
1357          */
1358         case BIOCGETIF:                 /* struct ifreq */
1359                 if (d->bd_bif == 0)
1360                         error = EINVAL;
1361                 else {
1362                         struct ifnet *const ifp = d->bd_bif->bif_ifp;
1363
1364                         snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1365                             sizeof (ifr.ifr_name), "%s%d", ifp->if_name,
1366                             ifp->if_unit);
1367                 }
1368                 break;
1369
1370         /*
1371          * Set interface.
1372          */
1373         case BIOCSETIF: {               /* struct ifreq */
1374                 ifnet_t ifp;
1375
1376                 bcopy(addr, &ifr, sizeof (ifr));
1377                 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1378                 ifp = ifunit(ifr.ifr_name);
1379                 if (ifp == NULL)
1380                         error = ENXIO;
1381                 else
1382                         error = bpf_setif(d, ifp, 0);
1383                 break;
1384         }
1385
1386         /*
1387          * Set read timeout.
1388          */
1389         case BIOCSRTIMEOUT32: {         /* struct user32_timeval */
1390                 struct user32_timeval _tv;
1391                 struct timeval tv;
1392
1393                 bcopy(addr, &_tv, sizeof (_tv));
1394                 tv.tv_sec  = _tv.tv_sec;
1395                 tv.tv_usec = _tv.tv_usec;
1396
1397                 /*
1398                  * Subtract 1 tick from tvtohz() since this isn't
1399                  * a one-shot timer.
1400                  */
1401                 if ((error = itimerfix(&tv)) == 0)
1402                         d->bd_rtout = tvtohz(&tv) - 1;
1403                 break;
1404         }
1405
1406         case BIOCSRTIMEOUT64: {         /* struct user64_timeval */
1407                 struct user64_timeval _tv;
1408                 struct timeval tv;
1409
1410                 bcopy(addr, &_tv, sizeof (_tv));
1411                 tv.tv_sec  = _tv.tv_sec;
1412                 tv.tv_usec = _tv.tv_usec;
1413
1414                 /*
1415                  * Subtract 1 tick from tvtohz() since this isn't
1416                  * a one-shot timer.
1417                  */
1418                 if ((error = itimerfix(&tv)) == 0)
1419                         d->bd_rtout = tvtohz(&tv) - 1;
1420                 break;
1421         }
1422
1423         /*
1424          * Get read timeout.
1425          */
1426         case BIOCGRTIMEOUT32: {         /* struct user32_timeval */
1427                 struct user32_timeval tv;
1428
1429                 bzero(&tv, sizeof (tv));
1430                 tv.tv_sec = d->bd_rtout / hz;
1431                 tv.tv_usec = (d->bd_rtout % hz) * tick;
1432                 bcopy(&tv, addr, sizeof (tv));
1433                 break;
1434         }
1435
1436         case BIOCGRTIMEOUT64: {         /* struct user64_timeval */
1437                 struct user64_timeval tv;
1438
1439                 bzero(&tv, sizeof (tv));
1440                 tv.tv_sec = d->bd_rtout / hz;
1441                 tv.tv_usec = (d->bd_rtout % hz) * tick;
1442                 bcopy(&tv, addr, sizeof (tv));
1443                 break;
1444         }
1445
1446         /*
1447          * Get packet stats.
1448          */
1449         case BIOCGSTATS: {              /* struct bpf_stat */
1450                 struct bpf_stat bs;
1451
1452                 bzero(&bs, sizeof (bs));
1453                 bs.bs_recv = d->bd_rcount;
1454                 bs.bs_drop = d->bd_dcount;
1455                 bcopy(&bs, addr, sizeof (bs));
1456                 break;
1457         }
1458
1459         /*
1460          * Set immediate mode.
1461          */
1462         case BIOCIMMEDIATE:             /* u_int */
1463                 bcopy(addr, &d->bd_immediate, sizeof (u_int));
1464                 break;
1465
1466         case BIOCVERSION: {             /* struct bpf_version */
1467                 struct bpf_version bv;
1468
1469                 bzero(&bv, sizeof (bv));
1470                 bv.bv_major = BPF_MAJOR_VERSION;
1471                 bv.bv_minor = BPF_MINOR_VERSION;
1472                 bcopy(&bv, addr, sizeof (bv));
1473                 break;
1474         }
1475
1476         /*
1477          * Get "header already complete" flag
1478          */
1479         case BIOCGHDRCMPLT:             /* u_int */
1480                 bcopy(&d->bd_hdrcmplt, addr, sizeof (u_int));
1481                 break;
1482
1483         /*
1484          * Set "header already complete" flag
1485          */
1486         case BIOCSHDRCMPLT:             /* u_int */
1487                 bcopy(addr, &int_arg, sizeof (int_arg));
1488                 d->bd_hdrcmplt = int_arg ? 1 : 0;
1489                 break;
1490
1491         /*
1492          * Get "see sent packets" flag
1493          */
1494         case BIOCGSEESENT:              /* u_int */
1495                 bcopy(&d->bd_seesent, addr, sizeof (u_int));
1496                 break;
1497
1498         /*
1499          * Set "see sent packets" flag
1500          */
1501         case BIOCSSEESENT:              /* u_int */
1502                 bcopy(addr, &d->bd_seesent, sizeof (u_int));
1503                 break;
1504
1505         /*
1506          * Set traffic service class
1507          */
1508         case BIOCSETTC: {               /* int */
1509                 int tc;
1510
1511                 bcopy(addr, &tc, sizeof (int));
1512                 error = bpf_set_traffic_class(d, tc);
1513                 break;
1514         }
1515
1516         /*
1517          * Get traffic service class
1518          */
1519         case BIOCGETTC:                 /* int */
1520                 bcopy(&d->bd_traffic_class, addr, sizeof (int));
1521                 break;
1522
1523         case FIONBIO:           /* Non-blocking I/O; int */
1524                 break;
1525
1526         case FIOASYNC:          /* Send signal on receive packets; int */
1527                 bcopy(addr, &d->bd_async, sizeof (int));
1528                 break;
1529 #ifndef __APPLE__
1530         case FIOSETOWN:
1531                 error = fsetown(*(int *)addr, &d->bd_sigio);
1532                 break;
1533
1534         case FIOGETOWN:
1535                 *(int *)addr = fgetown(d->bd_sigio);
1536                 break;
1537
1538         /* This is deprecated, FIOSETOWN should be used instead. */
1539         case TIOCSPGRP:
1540                 error = fsetown(-(*(int *)addr), &d->bd_sigio);
1541                 break;
1542
1543         /* This is deprecated, FIOGETOWN should be used instead. */
1544         case TIOCGPGRP:
1545                 *(int *)addr = -fgetown(d->bd_sigio);
1546                 break;
1547 #endif
1548         case BIOCSRSIG: {       /* Set receive signal; u_int */
1549                 u_int sig;
1550
1551                 bcopy(addr, &sig, sizeof (u_int));
1552
1553                 if (sig >= NSIG)
1554                         error = EINVAL;
1555                 else
1556                         d->bd_sig = sig;
1557                 break;
1558         }
1559         case BIOCGRSIG:                 /* u_int */
1560                 bcopy(&d->bd_sig, addr, sizeof (u_int));
1561                 break;
1562         case BIOCSEXTHDR:
1563                 bcopy(addr, &d->bd_extendedhdr, sizeof (u_int));
1564                 break;
1565         }
1566
1567         lck_mtx_unlock(bpf_mlock);
1568
1569         return (error);
1570 }
1571
1572 /*
1573  * Set d's packet filter program to fp.  If this file already has a filter,
1574  * free it and replace it.  Returns EINVAL for bogus requests.
1575  */
1576 static int
1577 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns)
1578 {
1579         struct bpf_insn *fcode, *old;
1580         u_int flen, size;
1581
1582         old = d->bd_filter;
1583         if (bf_insns == USER_ADDR_NULL) {
1584                 if (bf_len != 0)
1585                         return (EINVAL);
1586                 d->bd_filter = NULL;
1587                 reset_d(d);
1588                 if (old != 0)
1589                         FREE((caddr_t)old, M_DEVBUF);
1590                 return (0);
1591         }
1592         flen = bf_len;
1593         if (flen > BPF_MAXINSNS)
1594                 return (EINVAL);
1595
1596         size = flen * sizeof(struct bpf_insn);
1597         fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
1598 #ifdef __APPLE__
1599         if (fcode == NULL)
1600                 return (ENOBUFS);
1601 #endif
1602         if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
1603             bpf_validate(fcode, (int)flen)) {
1604                 d->bd_filter = fcode;
1605                 reset_d(d);
1606                 if (old != 0)
1607                         FREE((caddr_t)old, M_DEVBUF);
1608
1609                 return (0);
1610         }
1611         FREE((caddr_t)fcode, M_DEVBUF);
1612         return (EINVAL);
1613 }
1614
1615 /*
1616  * Detach a file from its current interface (if attached at all) and attach
1617  * to the interface indicated by the name stored in ifr.
1618  * Return an errno or 0.
1619  */
1620 static int
1621 bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt)
1622 {
1623         struct bpf_if *bp;
1624         int error;
1625
1626         /*
1627          * Look through attached interfaces for the named one.
1628          */
1629         for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1630                 struct ifnet *ifp = bp->bif_ifp;
1631
1632                 if (ifp == 0 || ifp != theywant || (dlt != 0 && dlt != bp->bif_dlt))
1633                         continue;
1634                 /*
1635                  * We found the requested interface.
1636                  * Allocate the packet buffers if we need to.
1637                  * If we're already attached to requested interface,
1638                  * just flush the buffer.
1639                  */
1640                 if (d->bd_sbuf == 0) {
1641                         error = bpf_allocbufs(d);
1642                         if (error != 0)
1643                                 return (error);
1644                 }
1645                 if (bp != d->bd_bif) {
1646                         if (d->bd_bif)
1647                                 /*
1648                                  * Detach if attached to something else.
1649                                  */
1650                                 bpf_detachd(d);
1651
1652                         if (bpf_attachd(d, bp) != 0) {
1653                                 return ENXIO;
1654                         }
1655                 }
1656                 reset_d(d);
1657                 return (0);
1658         }
1659         /* Not found. */
1660         return (ENXIO);
1661 }
1662
1663
1664
1665 /*
1666  * Get a list of available data link type of the interface.
1667  */
1668 static int
1669 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
1670 {
1671         u_int           n;
1672         int             error;
1673         struct ifnet    *ifp;
1674         struct bpf_if   *bp;
1675         user_addr_t     dlist;
1676         struct bpf_dltlist bfl;
1677
1678         bcopy(addr, &bfl, sizeof (bfl));
1679         if (proc_is64bit(p)) {
1680                 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
1681         } else {
1682                 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
1683         }
1684
1685         ifp = d->bd_bif->bif_ifp;
1686         n = 0;
1687         error = 0;
1688         for (bp = bpf_iflist; bp; bp = bp->bif_next) {
1689                 if (bp->bif_ifp != ifp)
1690                         continue;
1691                 if (dlist != USER_ADDR_NULL) {
1692                         if (n >= bfl.bfl_len) {
1693                                 return (ENOMEM);
1694                         }
1695                         error = copyout(&bp->bif_dlt, dlist,
1696                             sizeof (bp->bif_dlt));
1697                         if (error != 0)
1698                                 break;
1699                         dlist += sizeof (bp->bif_dlt);
1700                 }
1701                 n++;
1702         }
1703         bfl.bfl_len = n;
1704         bcopy(&bfl, addr, sizeof (bfl));
1705
1706         return (error);
1707 }
1708
1709 /*
1710  * Set the data link type of a BPF instance.
1711  */
1712 static int
1713 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
1714
1715
1716 {
1717         int error, opromisc;
1718         struct ifnet *ifp;
1719         struct bpf_if *bp;
1720
1721         if (d->bd_bif->bif_dlt == dlt)
1722                 return (0);
1723         ifp = d->bd_bif->bif_ifp;
1724         for (bp = bpf_iflist; bp; bp = bp->bif_next) {
1725                 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1726                         break;
1727         }
1728         if (bp != NULL) {
1729                 opromisc = d->bd_promisc;
1730                 bpf_detachd(d);
1731                 error = bpf_attachd(d, bp);
1732                 if (error) {
1733                         printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
1734                                 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error);
1735                         return error;
1736                 }
1737                 reset_d(d);
1738                 if (opromisc) {
1739                         lck_mtx_unlock(bpf_mlock);
1740                         error = ifnet_set_promiscuous(bp->bif_ifp, 1);
1741                         lck_mtx_lock(bpf_mlock);
1742                         if (error)
1743                                 printf("bpf_setdlt: ifpromisc %s%d failed (%d)\n",
1744                                            ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error);
1745                         else
1746                                 d->bd_promisc = 1;
1747                 }
1748         }
1749         return (bp == NULL ? EINVAL : 0);
1750 }
1751
1752 static int
1753 bpf_set_traffic_class(struct bpf_d *d, int tc)
1754 {
1755         int error = 0;
1756
1757         if (!SO_VALID_TC(tc))
1758                 error = EINVAL;
1759         else
1760                 d->bd_traffic_class = tc;
1761
1762         return (error);
1763 }
1764
1765 static void
1766 bpf_set_packet_service_class(struct mbuf *m, int tc)
1767 {
1768         if (!(m->m_flags & M_PKTHDR))
1769                 return;
1770
1771         VERIFY(SO_VALID_TC(tc));
1772         (void) m_set_service_class(m, so_tc2msc(tc));
1773 }
1774
1775 /*
1776  * Support for select()
1777  *
1778  * Return true iff the specific operation will not block indefinitely.
1779  * Otherwise, return false but make a note that a selwakeup() must be done.
1780  */
1781 int
1782 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
1783 {
1784         struct bpf_d *d;
1785         int ret = 0;
1786
1787         lck_mtx_lock(bpf_mlock);
1788
1789         d = bpf_dtab[minor(dev)];
1790         if (d == 0 || d == (void *)1) {
1791                 lck_mtx_unlock(bpf_mlock);
1792                 return (ENXIO);
1793         }
1794
1795         if (d->bd_bif == NULL) {
1796                 lck_mtx_unlock(bpf_mlock);
1797                 return (ENXIO);
1798         }
1799
1800         switch (which) {
1801                 case FREAD:
1802                         if (d->bd_hlen != 0 ||
1803                                         ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1804                                          d->bd_slen != 0))
1805                                 ret = 1; /* read has data to return */
1806                         else {
1807                                 /*
1808                                  * Read has no data to return.
1809                                  * Make the select wait, and start a timer if
1810                                  * necessary.
1811                                  */
1812                                 selrecord(p, &d->bd_sel, wql);
1813                                 bpf_start_timer(d);
1814                         }
1815                         break;
1816
1817                 case FWRITE:
1818                         ret = 1; /* can't determine whether a write would block */
1819                         break;
1820         }
1821
1822         lck_mtx_unlock(bpf_mlock);
1823         return (ret);
1824 }
1825
1826
1827 /*
1828  * Support for kevent() system call.  Register EVFILT_READ filters and
1829  * reject all others.
1830  */
1831 int bpfkqfilter(dev_t dev, struct knote *kn);
1832 static void filt_bpfdetach(struct knote *);
1833 static int filt_bpfread(struct knote *, long);
1834
1835 static struct filterops bpfread_filtops = {
1836         .f_isfd = 1,
1837         .f_detach = filt_bpfdetach,
1838         .f_event = filt_bpfread,
1839 };
1840
1841 int
1842 bpfkqfilter(dev_t dev, struct knote *kn)
1843 {
1844         struct bpf_d *d;
1845
1846         /*
1847          * Is this device a bpf?
1848          */
1849         if (major(dev) != CDEV_MAJOR) {
1850                 return (EINVAL);
1851         }
1852
1853         if (kn->kn_filter != EVFILT_READ) {
1854                 return (EINVAL);
1855         }
1856
1857         lck_mtx_lock(bpf_mlock);
1858
1859         d = bpf_dtab[minor(dev)];
1860         if (d == 0 || d == (void *)1) {
1861                 lck_mtx_unlock(bpf_mlock);
1862                 return (ENXIO);
1863         }
1864
1865         if (d->bd_bif == NULL) {
1866                 lck_mtx_unlock(bpf_mlock);
1867                 return (ENXIO);
1868         }
1869
1870         kn->kn_hook = d;
1871         kn->kn_fop = &bpfread_filtops;
1872         KNOTE_ATTACH(&d->bd_sel.si_note, kn);
1873         lck_mtx_unlock(bpf_mlock);
1874         return 0;
1875 }
1876
1877 static void
1878 filt_bpfdetach(struct knote *kn)
1879 {
1880         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1881
1882         lck_mtx_lock(bpf_mlock);
1883         KNOTE_DETACH(&d->bd_sel.si_note, kn);
1884         lck_mtx_unlock(bpf_mlock);
1885 }
1886
1887 static int
1888 filt_bpfread(struct knote *kn, long hint)
1889 {
1890         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1891         int ready = 0;
1892
1893         if (hint == 0)
1894                 lck_mtx_lock(bpf_mlock);
1895
1896         if (d->bd_immediate) {
1897                 /*
1898                  * If there's data in the hold buffer, it's the
1899                  * amount of data a read will return.
1900                  *
1901                  * If there's no data in the hold buffer, but
1902                  * there's data in the store buffer, a read will
1903                  * immediately rotate the store buffer to the
1904                  * hold buffer, the amount of data in the store
1905                  * buffer is the amount of data a read will
1906                  * return.
1907                  *
1908                  * If there's no data in either buffer, we're not
1909                  * ready to read.
1910                  */
1911                 kn->kn_data = (d->bd_hlen == 0 ? d->bd_slen : d->bd_hlen);
1912                 int64_t lowwat = 1;
1913                 if (kn->kn_sfflags & NOTE_LOWAT)
1914                 {
1915                         if (kn->kn_sdata > d->bd_bufsize)
1916                                 lowwat = d->bd_bufsize;
1917                         else if (kn->kn_sdata > lowwat)
1918                                 lowwat = kn->kn_sdata;
1919                 }
1920                 ready = (kn->kn_data >= lowwat);
1921         } else {
1922                 /*
1923                  * If there's data in the hold buffer, it's the
1924                  * amount of data a read will return.
1925                  *
1926                  * If there's no data in the hold buffer, but
1927                  * there's data in the store buffer, if the
1928                  * timer has expired a read will immediately
1929                  * rotate the store buffer to the hold buffer,
1930                  * so the amount of data in the store buffer is
1931                  * the amount of data a read will return.
1932                  *
1933                  * If there's no data in either buffer, or there's
1934                  * no data in the hold buffer and the timer hasn't
1935                  * expired, we're not ready to read.
1936                  */
1937                 kn->kn_data = (d->bd_hlen == 0 && d->bd_state == BPF_TIMED_OUT ?
1938                                 d->bd_slen : d->bd_hlen);
1939                 ready = (kn->kn_data > 0);
1940         }
1941         if (!ready)
1942                 bpf_start_timer(d);
1943
1944         if (hint == 0)
1945                 lck_mtx_unlock(bpf_mlock);
1946         return (ready);
1947 }
1948
1949 /*
1950  * Copy data from an mbuf chain into a buffer.  This code is derived
1951  * from m_copydata in sys/uipc_mbuf.c.
1952  */
1953 static void
1954 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1955 {
1956         struct mbuf *m = (struct mbuf *)(uintptr_t)(src_arg);
1957         u_int count;
1958         u_char *dst;
1959
1960         dst = dst_arg;
1961         while (len > 0) {
1962                 if (m == 0)
1963                         panic("bpf_mcopy");
1964                 count = min(m->m_len, len);
1965                 bcopy(mbuf_data(m), dst, count);
1966                 m = m->m_next;
1967                 dst += count;
1968                 len -= count;
1969         }
1970 }
1971
1972 static inline void
1973 bpf_tap_imp(
1974         ifnet_t         ifp,
1975         u_int32_t       dlt,
1976         mbuf_t          m,
1977         void*           hdr,
1978         size_t          hlen,
1979         int             outbound)
1980 {
1981         struct bpf_if *bp;
1982         struct mbuf *savedm = m;
1983
1984         /*
1985          * It's possible that we get here after the bpf descriptor has been
1986          * detached from the interface; in such a case we simply return.
1987          * Lock ordering is important since we can be called asynchronously
1988          * (from the IOKit) to process an inbound packet; when that happens
1989          * we would have been holding its "gateLock" and will be acquiring
1990          * "bpf_mlock" upon entering this routine.  Due to that, we release
1991          * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
1992          * acquire "gateLock" in the IOKit), in order to avoid a deadlock
1993          * when a ifnet_set_promiscuous request simultaneously collides with
1994          * an inbound packet being passed into the tap callback.
1995          */
1996         lck_mtx_lock(bpf_mlock);
1997         if (ifp->if_bpf == NULL) {
1998                 lck_mtx_unlock(bpf_mlock);
1999                 return;
2000         }
2001         bp = ifp->if_bpf;
2002         for (bp = ifp->if_bpf; bp && bp->bif_ifp == ifp &&
2003                  (dlt != 0 && bp->bif_dlt != dlt); bp = bp->bif_next)
2004                 ;
2005         if (bp && bp->bif_ifp == ifp && bp->bif_dlist != NULL) {
2006                 struct bpf_d    *d;
2007                 struct m_hdr    hack_hdr;
2008                 u_int   pktlen = 0;
2009                 u_int   slen = 0;
2010                 struct mbuf *m0;
2011
2012                 if (hdr) {
2013                         /*
2014                          * This is gross. We mock up an mbuf that points to the
2015                          * header buffer. This means we don't have to copy the
2016                          * header. A number of interfaces prepended headers just
2017                          * for bpf by allocating an mbuf on the stack. We want to
2018                          * give developers an easy way to prepend a header for bpf.
2019                          * Since a developer allocating an mbuf on the stack is bad,
2020                          * we do even worse here, allocating only a header to point
2021                          * to a buffer the developer supplied. This makes assumptions
2022                          * that bpf_filter and catchpacket will not look at anything
2023                          * in the mbuf other than the header. This was true at the
2024                          * time this code was written.
2025                          */
2026                         hack_hdr.mh_next = m;
2027                         hack_hdr.mh_nextpkt = NULL;
2028                         hack_hdr.mh_len = hlen;
2029                         hack_hdr.mh_data = hdr;
2030                         hack_hdr.mh_type = m->m_type;
2031                         hack_hdr.mh_flags = 0;
2032
2033                         m = (mbuf_t)&hack_hdr;
2034                 }
2035
2036                 for (m0 = m; m0 != 0; m0 = m0->m_next)
2037                         pktlen += m0->m_len;
2038
2039                 for (d = bp->bif_dlist; d; d = d->bd_next) {
2040                         if (outbound && !d->bd_seesent)
2041                                 continue;
2042                         ++d->bd_rcount;
2043                         slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
2044                         if (slen != 0) {
2045 #if CONFIG_MACF_NET
2046                                 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0)
2047                                         continue;
2048 #endif
2049                                 catchpacket(d, (u_char *)m, savedm, pktlen,
2050                                     slen, outbound, bpf_mcopy);
2051                         }
2052                 }
2053         }
2054         lck_mtx_unlock(bpf_mlock);
2055 }
2056
2057 void
2058 bpf_tap_out(
2059         ifnet_t         ifp,
2060         u_int32_t       dlt,
2061         mbuf_t          m,
2062         void*           hdr,
2063         size_t          hlen)
2064 {
2065         bpf_tap_imp(ifp, dlt, m, hdr, hlen, 1);
2066 }
2067
2068 void
2069 bpf_tap_in(
2070         ifnet_t         ifp,
2071         u_int32_t       dlt,
2072         mbuf_t          m,
2073         void*           hdr,
2074         size_t          hlen)
2075 {
2076         bpf_tap_imp(ifp, dlt, m, hdr, hlen, 0);
2077 }
2078
2079 /* Callback registered with Ethernet driver. */
2080 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2081 {
2082         bpf_tap_imp(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2083
2084         return 0;
2085 }
2086
2087 /*
2088  * Move the packet data from interface memory (pkt) into the
2089  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
2090  * otherwise 0.  "copy" is the routine called to do the actual data
2091  * transfer.  bcopy is passed in to copy contiguous chunks, while
2092  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
2093  * pkt is really an mbuf.
2094  */
2095 static void
2096 catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen,
2097         u_int snaplen, int outbound,
2098         void (*cpfn)(const void *, void *, size_t))
2099 {
2100         struct bpf_hdr *hp;
2101         struct bpf_hdr_ext *ehp;
2102         int totlen, curlen;
2103         int hdrlen, caplen;
2104         int do_wakeup = 0;
2105         u_char *payload;
2106
2107         hdrlen = d->bd_extendedhdr ? d->bd_bif->bif_exthdrlen :
2108             d->bd_bif->bif_hdrlen;
2109         /*
2110          * Figure out how many bytes to move.  If the packet is
2111          * greater or equal to the snapshot length, transfer that
2112          * much.  Otherwise, transfer the whole packet (unless
2113          * we hit the buffer size limit).
2114          */
2115         totlen = hdrlen + min(snaplen, pktlen);
2116         if (totlen > d->bd_bufsize)
2117                 totlen = d->bd_bufsize;
2118
2119         /*
2120          * Round up the end of the previous packet to the next longword.
2121          */
2122         curlen = BPF_WORDALIGN(d->bd_slen);
2123         if (curlen + totlen > d->bd_bufsize) {
2124                 /*
2125                  * This packet will overflow the storage buffer.
2126                  * Rotate the buffers if we can, then wakeup any
2127                  * pending reads.
2128                  */
2129                 if (d->bd_fbuf == NULL) {
2130                         /*
2131                          * We haven't completed the previous read yet,
2132                          * so drop the packet.
2133                          */
2134                         ++d->bd_dcount;
2135                         return;
2136                 }
2137                 ROTATE_BUFFERS(d);
2138                 do_wakeup = 1;
2139                 curlen = 0;
2140         }
2141         else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
2142                 /*
2143                  * Immediate mode is set, or the read timeout has
2144                  * already expired during a select call. A packet
2145                  * arrived, so the reader should be woken up.
2146                  */
2147                 do_wakeup = 1;
2148
2149         /*
2150          * Append the bpf header.
2151          */
2152         struct timeval tv;
2153         microtime(&tv);
2154         if (d->bd_extendedhdr) {
2155                 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
2156                 memset(ehp, 0, sizeof(*ehp));
2157                 ehp->bh_tstamp.tv_sec = tv.tv_sec;
2158                 ehp->bh_tstamp.tv_usec = tv.tv_usec;
2159                 ehp->bh_datalen = pktlen;
2160                 ehp->bh_hdrlen = hdrlen;
2161                 ehp->bh_caplen = totlen - hdrlen;
2162                 if (outbound) {
2163                         if (m->m_pkthdr.m_fhflags & PF_TAG_FLOWHASH)
2164                                 ehp->bh_flowhash = m->m_pkthdr.m_flowhash;
2165                         ehp->bh_svc = so_svc2tc(m->m_pkthdr.svc);
2166                         ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2167                         if (m->m_pkthdr.m_fhflags & PF_TAG_TCP)
2168                                 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_TCP;
2169                 } else
2170                         ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2171                 payload = (u_char *)ehp + hdrlen;
2172                 caplen = ehp->bh_caplen;
2173         } else {
2174                 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
2175                 hp->bh_tstamp.tv_sec = tv.tv_sec;
2176                 hp->bh_tstamp.tv_usec = tv.tv_usec;
2177                 hp->bh_datalen = pktlen;
2178                 hp->bh_hdrlen = hdrlen;
2179                 hp->bh_caplen = totlen - hdrlen;
2180                 payload = (u_char *)hp + hdrlen;
2181                 caplen = hp->bh_caplen;
2182         }
2183         /*
2184          * Copy the packet data into the store buffer and update its length.
2185          */
2186         (*cpfn)(pkt, payload, caplen);
2187         d->bd_slen = curlen + totlen;
2188
2189         if (do_wakeup)
2190                 bpf_wakeup(d);
2191 }
2192
2193 /*
2194  * Initialize all nonzero fields of a descriptor.
2195  */
2196 static int
2197 bpf_allocbufs(struct bpf_d *d)
2198 {
2199         d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2200         if (d->bd_fbuf == 0)
2201                 return (ENOBUFS);
2202
2203         d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
2204         if (d->bd_sbuf == 0) {
2205                 FREE(d->bd_fbuf, M_DEVBUF);
2206                 return (ENOBUFS);
2207         }
2208         d->bd_slen = 0;
2209         d->bd_hlen = 0;
2210         return (0);
2211 }
2212
2213 /*
2214  * Free buffers currently in use by a descriptor.
2215  * Called on close.
2216  */
2217 static void
2218 bpf_freed(struct bpf_d *d)
2219 {
2220         /*
2221          * We don't need to lock out interrupts since this descriptor has
2222          * been detached from its interface and it yet hasn't been marked
2223          * free.
2224          */
2225         if (d->bd_sbuf != 0) {
2226                 FREE(d->bd_sbuf, M_DEVBUF);
2227                 if (d->bd_hbuf != 0)
2228                         FREE(d->bd_hbuf, M_DEVBUF);
2229                 if (d->bd_fbuf != 0)
2230                         FREE(d->bd_fbuf, M_DEVBUF);
2231         }
2232         if (d->bd_filter)
2233                 FREE((caddr_t)d->bd_filter, M_DEVBUF);
2234 }
2235
2236 /*
2237  * Attach an interface to bpf.  driverp is a pointer to a (struct bpf_if *)
2238  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
2239  * size of the link header (variable length headers not yet supported).
2240  */
2241 void
2242 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2243 {
2244         bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
2245 }
2246
2247 errno_t
2248 bpf_attach(
2249         ifnet_t                 ifp,
2250         u_int32_t               dlt,
2251         u_int32_t               hdrlen,
2252         bpf_send_func   send,
2253         bpf_tap_func    tap)
2254 {
2255         struct bpf_if *bp_new;
2256         struct bpf_if *bp_temp;
2257         struct bpf_if *bp_first = NULL;
2258
2259         bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF, M_WAIT);
2260         if (bp_new == 0)
2261                 panic("bpfattach");
2262
2263         lck_mtx_lock(bpf_mlock);
2264
2265         /*
2266          * Check if this interface/dlt is already attached, record first
2267          * attachment for this interface.
2268          */
2269         for (bp_temp = bpf_iflist; bp_temp && (bp_temp->bif_ifp != ifp ||
2270                  bp_temp->bif_dlt != dlt); bp_temp = bp_temp->bif_next) {
2271                  if (bp_temp->bif_ifp == ifp && bp_first == NULL)
2272                         bp_first = bp_temp;
2273         }
2274
2275         if (bp_temp != NULL) {
2276                 printf("bpfattach - %s%d with dlt %d is already attached\n",
2277                         ifp->if_name, ifp->if_unit, dlt);
2278                 FREE(bp_new, M_DEVBUF);
2279                 lck_mtx_unlock(bpf_mlock);
2280                 return EEXIST;
2281         }
2282
2283         bzero(bp_new, sizeof(*bp_new));
2284         bp_new->bif_ifp = ifp;
2285         bp_new->bif_dlt = dlt;
2286         bp_new->bif_send = send;
2287         bp_new->bif_tap = tap;
2288
2289         if (bp_first == NULL) {
2290                 /* No other entries for this ifp */
2291                 bp_new->bif_next = bpf_iflist;
2292                 bpf_iflist = bp_new;
2293         }
2294         else {
2295                 /* Add this after the first entry for this interface */
2296                 bp_new->bif_next = bp_first->bif_next;
2297                 bp_first->bif_next = bp_new;
2298         }
2299
2300         /*
2301          * Compute the length of the bpf header.  This is not necessarily
2302          * equal to SIZEOF_BPF_HDR because we want to insert spacing such
2303          * that the network layer header begins on a longword boundary (for
2304          * performance reasons and to alleviate alignment restrictions).
2305          */
2306         bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
2307         bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
2308             sizeof(struct bpf_hdr_ext)) - hdrlen;
2309
2310         /* Take a reference on the interface */
2311         ifnet_reference(ifp);
2312
2313         lck_mtx_unlock(bpf_mlock);
2314
2315 #ifndef __APPLE__
2316         if (bootverbose)
2317                 printf("bpf: %s%d attached\n", ifp->if_name, ifp->if_unit);
2318 #endif
2319
2320         return 0;
2321 }
2322
2323 /*
2324  * Detach bpf from an interface.  This involves detaching each descriptor
2325  * associated with the interface, and leaving bd_bif NULL.  Notify each
2326  * descriptor as it's detached so that any sleepers wake up and get
2327  * ENXIO.
2328  */
2329 void
2330 bpfdetach(struct ifnet *ifp)
2331 {
2332         struct bpf_if   *bp, *bp_prev, *bp_next;
2333         struct bpf_if   *bp_free = NULL;
2334         struct bpf_d    *d;
2335
2336
2337         lck_mtx_lock(bpf_mlock);
2338
2339         /* Locate BPF interface information */
2340         bp_prev = NULL;
2341         for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
2342                 bp_next = bp->bif_next;
2343                 if (ifp != bp->bif_ifp) {
2344                         bp_prev = bp;
2345                         continue;
2346                 }
2347
2348                 while ((d = bp->bif_dlist) != NULL) {
2349                         bpf_detachd(d);
2350                         bpf_wakeup(d);
2351                 }
2352
2353                 if (bp_prev) {
2354                         bp_prev->bif_next = bp->bif_next;
2355                 } else {
2356                         bpf_iflist = bp->bif_next;
2357                 }
2358
2359                 bp->bif_next = bp_free;
2360                 bp_free = bp;
2361
2362                 ifnet_release(ifp);
2363         }
2364
2365         lck_mtx_unlock(bpf_mlock);
2366
2367         FREE(bp, M_DEVBUF);
2368
2369 }
2370
2371 void
2372 bpf_init(__unused void *unused)
2373 {
2374 #ifdef __APPLE__
2375         int     i;
2376         int     maj;
2377
2378         if (bpf_devsw_installed == 0) {
2379                 bpf_devsw_installed = 1;
2380
2381         bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
2382
2383         bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
2384
2385         bpf_mlock_attr = lck_attr_alloc_init();
2386
2387         lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
2388
2389                 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
2390                 if (maj == -1) {
2391                         if (bpf_mlock_attr)
2392                                 lck_attr_free(bpf_mlock_attr);
2393                         if (bpf_mlock_grp)
2394                                 lck_grp_free(bpf_mlock_grp);
2395                         if (bpf_mlock_grp_attr)
2396                                 lck_grp_attr_free(bpf_mlock_grp_attr);
2397
2398                         bpf_mlock = NULL;
2399                         bpf_mlock_attr = NULL;
2400                         bpf_mlock_grp = NULL;
2401                         bpf_mlock_grp_attr = NULL;
2402                         bpf_devsw_installed = 0;
2403                         printf("bpf_init: failed to allocate a major number!\n");
2404                         return;
2405                 }
2406
2407                 for (i = 0 ; i < NBPFILTER; i++)
2408                         bpf_make_dev_t(maj);
2409         }
2410 #else
2411         cdevsw_add(&bpf_cdevsw);
2412 #endif
2413 }
2414
2415 #ifndef __APPLE__
2416 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
2417 #endif
2418
2419 #if CONFIG_MACF_NET
2420 struct label *
2421 mac_bpfdesc_label_get(struct bpf_d *d)
2422 {
2423
2424         return (d->bd_label);
2425 }
2426
2427 void
2428 mac_bpfdesc_label_set(struct bpf_d *d, struct label *label)
2429 {
2430
2431         d->bd_label = label;
2432 }
2433 #endif