]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/bpf.c
xnu-3789.60.24.tar.gz
[apple/xnu.git] / bsd / net / bpf.c
CommitLineData
1c79356b 1/*
813fb2f6 2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1990, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
66 *
9bccf70c 67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
1c79356b 68 */
2d21ac55
A
69/*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
1c79356b 75
9bccf70c 76#include "bpf.h"
1c79356b
A
77
78#ifndef __GNUC__
79#define inline
80#else
81#define inline __inline
82#endif
83
84#include <sys/param.h>
85#include <sys/systm.h>
86#include <sys/conf.h>
87#include <sys/malloc.h>
88#include <sys/mbuf.h>
89#include <sys/time.h>
90#include <sys/proc.h>
1c79356b
A
91#include <sys/signalvar.h>
92#include <sys/filio.h>
93#include <sys/sockio.h>
94#include <sys/ttycom.h>
95#include <sys/filedesc.h>
91447636 96#include <sys/uio_internal.h>
b0d623f7
A
97#include <sys/file_internal.h>
98#include <sys/event.h>
1c79356b 99
9bccf70c
A
100#include <sys/poll.h>
101
1c79356b 102#include <sys/socket.h>
316670eb 103#include <sys/socketvar.h>
1c79356b
A
104#include <sys/vnode.h>
105
106#include <net/if.h>
107#include <net/bpf.h>
108#include <net/bpfdesc.h>
109
110#include <netinet/in.h>
316670eb
A
111#include <netinet/in_pcb.h>
112#include <netinet/in_var.h>
113#include <netinet/ip_var.h>
114#include <netinet/tcp.h>
115#include <netinet/tcp_var.h>
116#include <netinet/udp.h>
117#include <netinet/udp_var.h>
1c79356b
A
118#include <netinet/if_ether.h>
119#include <sys/kernel.h>
120#include <sys/sysctl.h>
55e303ae 121#include <net/firewire.h>
1c79356b 122
1c79356b
A
123#include <miscfs/devfs/devfs.h>
124#include <net/dlil.h>
fe8ab488 125#include <net/pktap.h>
1c79356b 126
91447636 127#include <kern/locks.h>
6d2010ae 128#include <kern/thread_call.h>
91447636 129
2d21ac55
A
130#if CONFIG_MACF_NET
131#include <security/mac_framework.h>
132#endif /* MAC_NET */
91447636 133
2d21ac55 134extern int tvtohz(struct timeval *);
9bccf70c 135
1c79356b
A
136#define BPF_BUFSIZE 4096
137#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
1c79356b 138
55e303ae 139
1c79356b
A
140#define PRINET 26 /* interruptible */
141
142/*
143 * The default read buffer size is patchable.
144 */
91447636 145static unsigned int bpf_bufsize = BPF_BUFSIZE;
6d2010ae 146SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
1c79356b 147 &bpf_bufsize, 0, "");
6d2010ae
A
148__private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
149SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
9bccf70c 150 &bpf_maxbufsize, 0, "");
91447636 151static unsigned int bpf_maxdevices = 256;
6d2010ae 152SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
91447636 153 &bpf_maxdevices, 0, "");
fe8ab488
A
154/*
155 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
156 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
157 * explicitly to be able to use DLT_PKTAP.
158 */
159static unsigned int bpf_wantpktap = 0;
160SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
161 &bpf_wantpktap, 0, "");
1c79356b 162
3e170ce0
A
163static int bpf_debug = 0;
164SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
165 &bpf_debug, 0, "");
166
1c79356b
A
167/*
168 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
55e303ae 169 * bpf_dtab holds pointer to the descriptors, indexed by minor device #
1c79356b
A
170 */
171static struct bpf_if *bpf_iflist;
9bccf70c
A
172#ifdef __APPLE__
173/*
174 * BSD now stores the bpf_d in the dev_t which is a struct
175 * on their system. Our dev_t is an int, so we still store
176 * the bpf_d in a separate table indexed by minor device #.
91447636
A
177 *
178 * The value stored in bpf_dtab[n] represent three states:
179 * 0: device not opened
180 * 1: device opening or closing
181 * other: device <n> opened with pointer to storage
9bccf70c 182 */
55e303ae 183static struct bpf_d **bpf_dtab = NULL;
91447636
A
184static unsigned int bpf_dtab_size = 0;
185static unsigned int nbpfilter = 0;
186
316670eb
A
187decl_lck_mtx_data(static, bpf_mlock_data);
188static lck_mtx_t *bpf_mlock = &bpf_mlock_data;
91447636
A
189static lck_grp_t *bpf_mlock_grp;
190static lck_grp_attr_t *bpf_mlock_grp_attr;
191static lck_attr_t *bpf_mlock_attr;
55e303ae 192
39236c6e 193static mbuf_tag_id_t bpf_mtag_id;
55e303ae 194#endif /* __APPLE__ */
1c79356b 195
91447636 196static int bpf_allocbufs(struct bpf_d *);
2d21ac55 197static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
3e170ce0 198static int bpf_detachd(struct bpf_d *d, int);
91447636
A
199static void bpf_freed(struct bpf_d *);
200static void bpf_mcopy(const void *, void *, size_t);
201static int bpf_movein(struct uio *, int,
202 struct mbuf **, struct sockaddr *, int *);
3e170ce0 203static int bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt);
39236c6e
A
204static void bpf_timed_out(void *, void *);
205static void bpf_wakeup(struct bpf_d *);
316670eb
A
206static void catchpacket(struct bpf_d *, u_char *, struct mbuf *, u_int,
207 u_int, int, void (*)(const void *, void *, size_t));
91447636 208static void reset_d(struct bpf_d *);
3e170ce0 209static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
316670eb 210static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
3e170ce0 211static int bpf_setdlt(struct bpf_d *, u_int);
316670eb
A
212static int bpf_set_traffic_class(struct bpf_d *, int);
213static void bpf_set_packet_service_class(struct mbuf *, int);
1c79356b 214
3e170ce0
A
215static void bpf_acquire_d(struct bpf_d *);
216static void bpf_release_d(struct bpf_d *);
55e303ae
A
217
218static int bpf_devsw_installed;
219
91447636 220void bpf_init(void *unused);
2d21ac55 221static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
55e303ae 222
9bccf70c
A
223/*
224 * Darwin differs from BSD here, the following are static
225 * on BSD and not static on Darwin.
226 */
6d2010ae
A
227 d_open_t bpfopen;
228 d_close_t bpfclose;
229 d_read_t bpfread;
230 d_write_t bpfwrite;
316670eb
A
231 ioctl_fcn_t bpfioctl;
232 select_fcn_t bpfselect;
1c79356b 233
1c79356b 234
9bccf70c
A
235/* Darwin's cdevsw struct differs slightly from BSDs */
236#define CDEV_MAJOR 23
1c79356b 237static struct cdevsw bpf_cdevsw = {
6d2010ae
A
238 /* open */ bpfopen,
239 /* close */ bpfclose,
240 /* read */ bpfread,
241 /* write */ bpfwrite,
242 /* ioctl */ bpfioctl,
316670eb
A
243 /* stop */ eno_stop,
244 /* reset */ eno_reset,
245 /* tty */ NULL,
246 /* select */ bpfselect,
247 /* mmap */ eno_mmap,
248 /* strategy*/ eno_strat,
249 /* getc */ eno_getc,
250 /* putc */ eno_putc,
251 /* type */ 0
1c79356b
A
252};
253
55e303ae 254#define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
9bccf70c 255
1c79356b 256static int
91447636 257bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *sockp, int *datlen)
1c79356b
A
258{
259 struct mbuf *m;
260 int error;
261 int len;
2d21ac55 262 uint8_t sa_family;
1c79356b
A
263 int hlen;
264
2d21ac55 265 switch (linktype) {
91447636 266
2d21ac55
A
267#if SLIP
268 case DLT_SLIP:
269 sa_family = AF_INET;
270 hlen = 0;
271 break;
272#endif /* SLIP */
91447636 273
2d21ac55
A
274 case DLT_EN10MB:
275 sa_family = AF_UNSPEC;
276 /* XXX Would MAXLINKHDR be better? */
277 hlen = sizeof(struct ether_header);
278 break;
91447636 279
2d21ac55
A
280#if FDDI
281 case DLT_FDDI:
91447636 282 #if defined(__FreeBSD__) || defined(__bsdi__)
2d21ac55
A
283 sa_family = AF_IMPLINK;
284 hlen = 0;
91447636 285 #else
2d21ac55
A
286 sa_family = AF_UNSPEC;
287 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
288 hlen = 24;
91447636 289 #endif
2d21ac55
A
290 break;
291#endif /* FDDI */
91447636 292
2d21ac55
A
293 case DLT_RAW:
294 case DLT_NULL:
295 sa_family = AF_UNSPEC;
296 hlen = 0;
297 break;
91447636
A
298
299 #ifdef __FreeBSD__
2d21ac55
A
300 case DLT_ATM_RFC1483:
301 /*
302 * en atm driver requires 4-byte atm pseudo header.
303 * though it isn't standard, vpi:vci needs to be
304 * specified anyway.
305 */
306 sa_family = AF_UNSPEC;
307 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
308 break;
91447636 309 #endif
2d21ac55
A
310
311 case DLT_PPP:
312 sa_family = AF_UNSPEC;
313 hlen = 4; /* This should match PPP_HDRLEN */
314 break;
91447636 315
2d21ac55
A
316 case DLT_APPLE_IP_OVER_IEEE1394:
317 sa_family = AF_UNSPEC;
318 hlen = sizeof(struct firewire_header);
319 break;
b0d623f7
A
320
321 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
322 sa_family = AF_IEEE80211;
323 hlen = 0;
324 break;
316670eb 325
6d2010ae
A
326 case DLT_IEEE802_11_RADIO:
327 sa_family = AF_IEEE80211;
328 hlen = 0;
329 break;
b0d623f7 330
2d21ac55
A
331 default:
332 return (EIO);
55e303ae 333 }
2d21ac55 334
91447636
A
335 // LP64todo - fix this!
336 len = uio_resid(uio);
1c79356b
A
337 *datlen = len - hlen;
338 if ((unsigned)len > MCLBYTES)
339 return (EIO);
340
2d21ac55
A
341 if (sockp) {
342 /*
343 * Build a sockaddr based on the data link layer type.
344 * We do this at this level because the ethernet header
345 * is copied directly into the data field of the sockaddr.
346 * In the case of SLIP, there is no header and the packet
347 * is forwarded as is.
348 * Also, we are careful to leave room at the front of the mbuf
349 * for the link level header.
350 */
351 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
352 return (EIO);
353 }
354 sockp->sa_family = sa_family;
355 } else {
356 /*
357 * We're directly sending the packet data supplied by
358 * the user; we don't need to make room for the link
359 * header, and don't need the header length value any
360 * more, so set it to 0.
361 */
362 hlen = 0;
363 }
364
1c79356b
A
365 MGETHDR(m, M_WAIT, MT_DATA);
366 if (m == 0)
367 return (ENOBUFS);
91447636 368 if ((unsigned)len > MHLEN) {
1c79356b
A
369 MCLGET(m, M_WAIT);
370 if ((m->m_flags & M_EXT) == 0) {
1c79356b
A
371 error = ENOBUFS;
372 goto bad;
373 }
374 }
375 m->m_pkthdr.len = m->m_len = len;
376 m->m_pkthdr.rcvif = NULL;
377 *mp = m;
6d2010ae 378
1c79356b
A
379 /*
380 * Make room for link header.
381 */
382 if (hlen != 0) {
383 m->m_pkthdr.len -= hlen;
384 m->m_len -= hlen;
1c79356b 385 m->m_data += hlen; /* XXX */
1c79356b
A
386 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
387 if (error)
388 goto bad;
389 }
390 error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
6d2010ae
A
391 if (error)
392 goto bad;
393
394 /* Check for multicast destination */
395 switch (linktype) {
396 case DLT_EN10MB: {
397 struct ether_header *eh = mtod(m, struct ether_header *);
398
399 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
400 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0)
401 m->m_flags |= M_BCAST;
402 else
403 m->m_flags |= M_MCAST;
404 }
405 break;
406 }
407 }
408
409 return 0;
1c79356b
A
410 bad:
411 m_freem(m);
412 return (error);
413}
414
9bccf70c 415#ifdef __APPLE__
55e303ae
A
416
417/*
39236c6e
A
418 * The dynamic addition of a new device node must block all processes that
419 * are opening the last device so that no process will get an unexpected
420 * ENOENT
55e303ae 421 */
91447636
A
422static void
423bpf_make_dev_t(int maj)
55e303ae 424{
91447636
A
425 static int bpf_growing = 0;
426 unsigned int cur_size = nbpfilter, i;
55e303ae 427
91447636
A
428 if (nbpfilter >= bpf_maxdevices)
429 return;
55e303ae 430
91447636
A
431 while (bpf_growing) {
432 /* Wait until new device has been created */
433 (void)tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
434 }
435 if (nbpfilter > cur_size) {
436 /* other thread grew it already */
437 return;
438 }
439 bpf_growing = 1;
55e303ae 440
91447636
A
441 /* need to grow bpf_dtab first */
442 if (nbpfilter == bpf_dtab_size) {
443 int new_dtab_size;
444 struct bpf_d **new_dtab = NULL;
445 struct bpf_d **old_dtab = NULL;
446
447 new_dtab_size = bpf_dtab_size + NBPFILTER;
448 new_dtab = (struct bpf_d **)_MALLOC(sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
449 if (new_dtab == 0) {
450 printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
451 goto done;
452 }
453 if (bpf_dtab) {
454 bcopy(bpf_dtab, new_dtab,
455 sizeof(struct bpf_d *) * bpf_dtab_size);
456 }
457 bzero(new_dtab + bpf_dtab_size,
458 sizeof(struct bpf_d *) * NBPFILTER);
459 old_dtab = bpf_dtab;
460 bpf_dtab = new_dtab;
461 bpf_dtab_size = new_dtab_size;
462 if (old_dtab != NULL)
463 _FREE(old_dtab, M_DEVBUF);
55e303ae 464 }
91447636
A
465 i = nbpfilter++;
466 (void) devfs_make_node(makedev(maj, i),
467 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
468 "bpf%d", i);
469done:
470 bpf_growing = 0;
471 wakeup((caddr_t)&bpf_growing);
55e303ae
A
472}
473
9bccf70c 474#endif
1c79356b
A
475
476/*
477 * Attach file to the bpf interface, i.e. make d listen on bp.
1c79356b 478 */
2d21ac55 479static errno_t
91447636 480bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
1c79356b 481{
2d21ac55
A
482 int first = bp->bif_dlist == NULL;
483 int error = 0;
484
1c79356b
A
485 /*
486 * Point d at bp, and add d to the interface's list of listeners.
487 * Finally, point the driver's bpf cookie at the interface so
488 * it will divert packets to bpf.
489 */
490 d->bd_bif = bp;
491 d->bd_next = bp->bif_dlist;
492 bp->bif_dlist = d;
3e170ce0
A
493
494 /*
495 * Take a reference on the device even if an error is returned
496 * because we keep the device in the interface's list of listeners
497 */
498 bpf_acquire_d(d);
499
2d21ac55
A
500 if (first) {
501 /* Find the default bpf entry for this ifp */
502 if (bp->bif_ifp->if_bpf == NULL) {
fe8ab488 503 struct bpf_if *tmp, *primary = NULL;
2d21ac55 504
fe8ab488
A
505 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
506 if (tmp->bif_ifp != bp->bif_ifp)
507 continue;
508 primary = tmp;
509 /*
510 * Make DLT_PKTAP only if process knows how
511 * to deal with it, otherwise find another one
512 */
513 if (tmp->bif_dlt == DLT_PKTAP &&
514 !(d->bd_flags & BPF_WANT_PKTAP))
515 continue;
516 break;
517 }
2d21ac55
A
518 bp->bif_ifp->if_bpf = primary;
519 }
520
521 /* Only call dlil_set_bpf_tap for primary dlt */
522 if (bp->bif_ifp->if_bpf == bp)
7e4a7d39 523 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT, bpf_tap_callback);
2d21ac55
A
524
525 if (bp->bif_tap)
7e4a7d39 526 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT);
2d21ac55 527 }
1c79356b 528
3e170ce0
A
529 /*
530 * Reset the detach flags in case we previously detached an interface
531 */
532 d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
533
fe8ab488
A
534 if (bp->bif_ifp->if_bpf != NULL &&
535 bp->bif_ifp->if_bpf->bif_dlt == DLT_PKTAP)
536 d->bd_flags |= BPF_FINALIZE_PKTAP;
537 else
538 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
539
2d21ac55 540 return error;
1c79356b
A
541}
542
543/*
544 * Detach a file from its interface.
3e170ce0
A
545 *
546 * Return 1 if was closed by some thread, 0 otherwise
1c79356b 547 */
3e170ce0
A
548static int
549bpf_detachd(struct bpf_d *d, int closing)
1c79356b
A
550{
551 struct bpf_d **p;
552 struct bpf_if *bp;
553 struct ifnet *ifp;
554
3e170ce0
A
555 /*
556 * Some other thread already detached
557 */
558 if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0)
559 goto done;
560 /*
561 * This thread is doing the detach
562 */
563 d->bd_flags |= BPF_DETACHING;
564
1c79356b 565 ifp = d->bd_bif->bif_ifp;
1c79356b 566 bp = d->bd_bif;
3e170ce0
A
567
568 if (bpf_debug != 0)
569 printf("%s: %llx %s%s\n",
570 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
571 if_name(ifp), closing ? " closing" : "");
572
2d21ac55
A
573 /* Remove d from the interface's descriptor list. */
574 p = &bp->bif_dlist;
575 while (*p != d) {
576 p = &(*p)->bd_next;
577 if (*p == 0)
578 panic("bpf_detachd: descriptor not in list");
579 }
580 *p = (*p)->bd_next;
581 if (bp->bif_dlist == 0) {
582 /*
583 * Let the driver know that there are no more listeners.
584 */
585 /* Only call dlil_set_bpf_tap for primary dlt */
586 if (bp->bif_ifp->if_bpf == bp)
587 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
588 if (bp->bif_tap)
589 bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
590
591 for (bp = bpf_iflist; bp; bp = bp->bif_next)
592 if (bp->bif_ifp == ifp && bp->bif_dlist != 0)
593 break;
594 if (bp == NULL)
595 ifp->if_bpf = NULL;
596 }
597 d->bd_bif = NULL;
1c79356b
A
598 /*
599 * Check if this descriptor had requested promiscuous mode.
600 * If so, turn it off.
601 */
602 if (d->bd_promisc) {
603 d->bd_promisc = 0;
2d21ac55
A
604 lck_mtx_unlock(bpf_mlock);
605 if (ifnet_set_promiscuous(ifp, 0)) {
1c79356b
A
606 /*
607 * Something is really wrong if we were able to put
608 * the driver into promiscuous mode, but can't
609 * take it out.
9bccf70c 610 * Most likely the network interface is gone.
1c79356b 611 */
3e170ce0 612 printf("%s: ifnet_set_promiscuous failed\n", __func__);
2d21ac55
A
613 }
614 lck_mtx_lock(bpf_mlock);
1c79356b 615 }
3e170ce0
A
616
617 /*
618 * Wake up other thread that are waiting for this thread to finish
619 * detaching
620 */
621 d->bd_flags &= ~BPF_DETACHING;
622 d->bd_flags |= BPF_DETACHED;
623 /*
624 * Note that We've kept the reference because we may have dropped
625 * the lock when turning off promiscuous mode
626 */
627 bpf_release_d(d);
628
629done:
630 /*
631 * When closing makes sure no other thread refer to the bpf_d
632 */
633 if (bpf_debug != 0)
634 printf("%s: %llx done\n",
635 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
636 /*
637 * Let the caller know the bpf_d is closed
638 */
639 if ((d->bd_flags & BPF_CLOSING))
640 return (1);
641 else
642 return (0);
1c79356b
A
643}
644
645
6d2010ae
A
646/*
647 * Start asynchronous timer, if necessary.
648 * Must be called with bpf_mlock held.
649 */
650static void
651bpf_start_timer(struct bpf_d *d)
652{
653 uint64_t deadline;
654 struct timeval tv;
655
656 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
657 tv.tv_sec = d->bd_rtout / hz;
658 tv.tv_usec = (d->bd_rtout % hz) * tick;
659
39236c6e
A
660 clock_interval_to_deadline(
661 (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
662 NSEC_PER_USEC, &deadline);
6d2010ae
A
663 /*
664 * The state is BPF_IDLE, so the timer hasn't
665 * been started yet, and hasn't gone off yet;
666 * there is no thread call scheduled, so this
667 * won't change the schedule.
668 *
669 * XXX - what if, by the time it gets entered,
670 * the deadline has already passed?
671 */
672 thread_call_enter_delayed(d->bd_thread_call, deadline);
673 d->bd_state = BPF_WAITING;
674 }
675}
676
677/*
678 * Cancel asynchronous timer.
679 * Must be called with bpf_mlock held.
680 */
681static boolean_t
682bpf_stop_timer(struct bpf_d *d)
683{
684 /*
685 * If the timer has already gone off, this does nothing.
686 * Our caller is expected to set d->bd_state to BPF_IDLE,
687 * with the bpf_mlock, after we are called. bpf_timed_out()
688 * also grabs bpf_mlock, so, if the timer has gone off and
689 * bpf_timed_out() hasn't finished, it's waiting for the
690 * lock; when this thread releases the lock, it will
691 * find the state is BPF_IDLE, and just release the
692 * lock and return.
693 */
694 return (thread_call_cancel(d->bd_thread_call));
695}
696
3e170ce0
A
697void
698bpf_acquire_d(struct bpf_d *d)
699{
700 void *lr_saved = __builtin_return_address(0);
701
702 lck_mtx_assert(bpf_mlock, LCK_MTX_ASSERT_OWNED);
703
704 d->bd_refcnt += 1;
705
706 d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
707 d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
708}
709
710void
711bpf_release_d(struct bpf_d *d)
712{
713 void *lr_saved = __builtin_return_address(0);
714
715 lck_mtx_assert(bpf_mlock, LCK_MTX_ASSERT_OWNED);
716
717 if (d->bd_refcnt <= 0)
718 panic("%s: %p refcnt <= 0", __func__, d);
719
720 d->bd_refcnt -= 1;
6d2010ae 721
3e170ce0
A
722 d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
723 d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
724
725 if (d->bd_refcnt == 0) {
726 /* Assert the device is detached */
727 if ((d->bd_flags & BPF_DETACHED) == 0)
728 panic("%s: %p BPF_DETACHED not set", __func__, d);
729
730 _FREE(d, M_DEVBUF);
731 }
732}
6d2010ae 733
1c79356b
A
734/*
735 * Open ethernet device. Returns ENXIO for illegal minor device number,
736 * EBUSY if file is open by another process.
737 */
738/* ARGSUSED */
2d21ac55 739int
b0d623f7 740bpfopen(dev_t dev, int flags, __unused int fmt,
2d21ac55 741 __unused struct proc *p)
1c79356b 742{
2d21ac55 743 struct bpf_d *d;
1c79356b 744
2d21ac55
A
745 lck_mtx_lock(bpf_mlock);
746 if ((unsigned int) minor(dev) >= nbpfilter) {
747 lck_mtx_unlock(bpf_mlock);
1c79356b 748 return (ENXIO);
2d21ac55 749 }
91447636
A
750 /*
751 * New device nodes are created on demand when opening the last one.
752 * The programming model is for processes to loop on the minor starting at 0
753 * as long as EBUSY is returned. The loop stops when either the open succeeds or
754 * an error other that EBUSY is returned. That means that bpf_make_dev_t() must
755 * block all processes that are opening the last node. If not all
756 * processes are blocked, they could unexpectedly get ENOENT and abort their
757 * opening loop.
758 */
759 if ((unsigned int) minor(dev) == (nbpfilter - 1))
760 bpf_make_dev_t(major(dev));
9bccf70c 761
1c79356b 762 /*
9bccf70c 763 * Each minor can be opened by only one process. If the requested
1c79356b 764 * minor is in use, return EBUSY.
91447636
A
765 *
766 * Important: bpfopen() and bpfclose() have to check and set the status of a device
767 * in the same lockin context otherwise the device may be leaked because the vnode use count
768 * will be unpextectly greater than 1 when close() is called.
1c79356b 769 */
2d21ac55 770 if (bpf_dtab[minor(dev)] == 0) {
91447636 771 bpf_dtab[minor(dev)] = (void *)1; /* Mark opening */
2d21ac55
A
772 } else {
773 lck_mtx_unlock(bpf_mlock);
91447636 774 return (EBUSY);
2d21ac55 775 }
3e170ce0
A
776 d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
777 M_WAIT | M_ZERO);
91447636
A
778 if (d == NULL) {
779 /* this really is a catastrophic failure */
780 printf("bpfopen: malloc bpf_d failed\n");
2d21ac55
A
781 bpf_dtab[minor(dev)] = NULL;
782 lck_mtx_unlock(bpf_mlock);
91447636 783 return ENOMEM;
1c79356b 784 }
3e170ce0 785
91447636 786 /* Mark "in use" and do most initialization. */
3e170ce0 787 bpf_acquire_d(d);
1c79356b
A
788 d->bd_bufsize = bpf_bufsize;
789 d->bd_sig = SIGIO;
9bccf70c 790 d->bd_seesent = 1;
b0d623f7 791 d->bd_oflags = flags;
6d2010ae 792 d->bd_state = BPF_IDLE;
316670eb 793 d->bd_traffic_class = SO_TC_BE;
3e170ce0 794 d->bd_flags |= BPF_DETACHED;
fe8ab488
A
795 if (bpf_wantpktap)
796 d->bd_flags |= BPF_WANT_PKTAP;
797 else
798 d->bd_flags &= ~BPF_WANT_PKTAP;
3e170ce0 799 d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
6d2010ae
A
800 if (d->bd_thread_call == NULL) {
801 printf("bpfopen: malloc thread call failed\n");
802 bpf_dtab[minor(dev)] = NULL;
3e170ce0 803 bpf_release_d(d);
6d2010ae 804 lck_mtx_unlock(bpf_mlock);
3e170ce0
A
805
806 return (ENOMEM);
6d2010ae 807 }
2d21ac55
A
808#if CONFIG_MACF_NET
809 mac_bpfdesc_label_init(d);
810 mac_bpfdesc_label_associate(kauth_cred_get(), d);
811#endif
91447636 812 bpf_dtab[minor(dev)] = d; /* Mark opened */
2d21ac55 813 lck_mtx_unlock(bpf_mlock);
55e303ae 814
1c79356b
A
815 return (0);
816}
817
818/*
819 * Close the descriptor by detaching it from its interface,
820 * deallocating its buffers, and marking it free.
821 */
822/* ARGSUSED */
2d21ac55
A
823int
824bpfclose(dev_t dev, __unused int flags, __unused int fmt,
825 __unused struct proc *p)
1c79356b 826{
2d21ac55
A
827 struct bpf_d *d;
828
829 /* Take BPF lock to ensure no other thread is using the device */
830 lck_mtx_lock(bpf_mlock);
1c79356b 831
55e303ae 832 d = bpf_dtab[minor(dev)];
2d21ac55
A
833 if (d == 0 || d == (void *)1) {
834 lck_mtx_unlock(bpf_mlock);
91447636 835 return (ENXIO);
3e170ce0
A
836 }
837
838 /*
839 * Other threads may call bpd_detachd() if we drop the bpf_mlock
840 */
841 d->bd_flags |= BPF_CLOSING;
842
843 if (bpf_debug != 0)
844 printf("%s: %llx\n",
845 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
846
91447636 847 bpf_dtab[minor(dev)] = (void *)1; /* Mark closing */
55e303ae 848
6d2010ae
A
849 /*
850 * Deal with any in-progress timeouts.
851 */
852 switch (d->bd_state) {
853 case BPF_IDLE:
854 /*
855 * Not waiting for a timeout, and no timeout happened.
856 */
857 break;
858
859 case BPF_WAITING:
860 /*
861 * Waiting for a timeout.
862 * Cancel any timer that has yet to go off,
863 * and mark the state as "closing".
864 * Then drop the lock to allow any timers that
865 * *have* gone off to run to completion, and wait
866 * for them to finish.
867 */
868 if (!bpf_stop_timer(d)) {
869 /*
870 * There was no pending call, so the call must
871 * have been in progress. Wait for the call to
872 * complete; we have to drop the lock while
873 * waiting. to let the in-progrss call complete
874 */
875 d->bd_state = BPF_DRAINING;
876 while (d->bd_state == BPF_DRAINING)
877 msleep((caddr_t)d, bpf_mlock, PRINET,
878 "bpfdraining", NULL);
879 }
880 d->bd_state = BPF_IDLE;
881 break;
882
883 case BPF_TIMED_OUT:
884 /*
885 * Timer went off, and the timeout routine finished.
886 */
887 d->bd_state = BPF_IDLE;
888 break;
889
890 case BPF_DRAINING:
891 /*
892 * Another thread is blocked on a close waiting for
893 * a timeout to finish.
894 * This "shouldn't happen", as the first thread to enter
895 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
896 * all subsequent threads should see that and fail with
897 * ENXIO.
898 */
899 panic("Two threads blocked in a BPF close");
900 break;
901 }
902
1c79356b 903 if (d->bd_bif)
3e170ce0 904 bpf_detachd(d, 1);
0b4e3aa0 905 selthreadclear(&d->bd_sel);
2d21ac55
A
906#if CONFIG_MACF_NET
907 mac_bpfdesc_label_destroy(d);
908#endif
6d2010ae 909 thread_call_free(d->bd_thread_call);
39236c6e
A
910
911 while (d->bd_hbuf_read)
912 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
913
1c79356b 914 bpf_freed(d);
91447636 915
2d21ac55
A
916 /* Mark free in same context as bpfopen comes to check */
917 bpf_dtab[minor(dev)] = NULL; /* Mark closed */
3e170ce0
A
918
919 bpf_release_d(d);
920
91447636 921 lck_mtx_unlock(bpf_mlock);
3e170ce0 922
1c79356b
A
923 return (0);
924}
925
1c79356b 926
91447636 927#define BPF_SLEEP bpf_sleep
1c79356b 928
91447636
A
929static int
930bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
1c79356b 931{
6d2010ae 932 u_int64_t abstime = 0;
1c79356b 933
6d2010ae
A
934 if(timo)
935 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
91447636 936
6d2010ae 937 return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
1c79356b 938}
1c79356b
A
939
940/*
941 * Rotate the packet buffers in descriptor d. Move the store buffer
942 * into the hold slot, and the free buffer into the store slot.
943 * Zero the length of the new store buffer.
944 */
945#define ROTATE_BUFFERS(d) \
39236c6e
A
946 if (d->bd_hbuf_read) \
947 panic("rotating bpf buffers during read"); \
1c79356b
A
948 (d)->bd_hbuf = (d)->bd_sbuf; \
949 (d)->bd_hlen = (d)->bd_slen; \
3e170ce0 950 (d)->bd_hcnt = (d)->bd_scnt; \
1c79356b
A
951 (d)->bd_sbuf = (d)->bd_fbuf; \
952 (d)->bd_slen = 0; \
3e170ce0 953 (d)->bd_scnt = 0; \
2d21ac55 954 (d)->bd_fbuf = NULL;
1c79356b
A
955/*
956 * bpfread - read next chunk of packets from buffers
957 */
2d21ac55 958int
91447636 959bpfread(dev_t dev, struct uio *uio, int ioflag)
1c79356b 960{
2d21ac55 961 struct bpf_d *d;
39236c6e
A
962 caddr_t hbuf;
963 int timed_out, hbuf_len;
1c79356b 964 int error;
fe8ab488 965 int flags;
2d21ac55
A
966
967 lck_mtx_lock(bpf_mlock);
1c79356b 968
55e303ae 969 d = bpf_dtab[minor(dev)];
3e170ce0 970 if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55 971 lck_mtx_unlock(bpf_mlock);
91447636 972 return (ENXIO);
2d21ac55 973 }
55e303ae 974
3e170ce0
A
975 bpf_acquire_d(d);
976
1c79356b
A
977 /*
978 * Restrict application to use a buffer the same size as
979 * as kernel buffers.
980 */
b0d623f7 981 if (uio_resid(uio) != d->bd_bufsize) {
3e170ce0 982 bpf_release_d(d);
91447636 983 lck_mtx_unlock(bpf_mlock);
1c79356b
A
984 return (EINVAL);
985 }
6d2010ae
A
986
987 if (d->bd_state == BPF_WAITING)
988 bpf_stop_timer(d);
989
990 timed_out = (d->bd_state == BPF_TIMED_OUT);
991 d->bd_state = BPF_IDLE;
1c79356b 992
39236c6e
A
993 while (d->bd_hbuf_read)
994 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
3e170ce0
A
995
996 if ((d->bd_flags & BPF_CLOSING) != 0) {
997 bpf_release_d(d);
39236c6e
A
998 lck_mtx_unlock(bpf_mlock);
999 return (ENXIO);
1000 }
1c79356b
A
1001 /*
1002 * If the hold buffer is empty, then do a timed sleep, which
1003 * ends when the timeout expires or when enough packets
1004 * have arrived to fill the store buffer.
1005 */
1006 while (d->bd_hbuf == 0) {
6d2010ae
A
1007 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY))
1008 && d->bd_slen != 0) {
1c79356b 1009 /*
6d2010ae
A
1010 * We're in immediate mode, or are reading
1011 * in non-blocking mode, or a timer was
1012 * started before the read (e.g., by select()
1013 * or poll()) and has expired and a packet(s)
1014 * either arrived since the previous
1c79356b
A
1015 * read or arrived while we were asleep.
1016 * Rotate the buffers and return what's here.
1017 */
1018 ROTATE_BUFFERS(d);
1019 break;
1020 }
9bccf70c
A
1021
1022 /*
1023 * No data is available, check to see if the bpf device
1024 * is still pointed at a real interface. If not, return
1025 * ENXIO so that the userland process knows to rebind
1026 * it before using it again.
1027 */
1028 if (d->bd_bif == NULL) {
3e170ce0 1029 bpf_release_d(d);
91447636 1030 lck_mtx_unlock(bpf_mlock);
9bccf70c
A
1031 return (ENXIO);
1032 }
b0d623f7 1033 if (ioflag & IO_NDELAY) {
3e170ce0 1034 bpf_release_d(d);
b0d623f7
A
1035 lck_mtx_unlock(bpf_mlock);
1036 return (EWOULDBLOCK);
1037 }
1038 error = BPF_SLEEP(d, PRINET|PCATCH, "bpf",
1039 d->bd_rtout);
2d21ac55
A
1040 /*
1041 * Make sure device is still opened
1042 */
3e170ce0
A
1043 if ((d->bd_flags & BPF_CLOSING) != 0) {
1044 bpf_release_d(d);
2d21ac55
A
1045 lck_mtx_unlock(bpf_mlock);
1046 return (ENXIO);
1047 }
39236c6e
A
1048
1049 while (d->bd_hbuf_read)
1050 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1051
3e170ce0
A
1052 if ((d->bd_flags & BPF_CLOSING) != 0) {
1053 bpf_release_d(d);
39236c6e
A
1054 lck_mtx_unlock(bpf_mlock);
1055 return (ENXIO);
1056 }
fe8ab488 1057
1c79356b 1058 if (error == EINTR || error == ERESTART) {
fe8ab488
A
1059 if (d->bd_hbuf) {
1060 /*
1061 * Because we msleep, the hold buffer might
1062 * be filled when we wake up. Avoid rotating
1063 * in this case.
1064 */
1065 break;
1066 }
39236c6e
A
1067 if (d->bd_slen) {
1068 /*
1069 * Sometimes we may be interrupted often and
1070 * the sleep above will not timeout.
1071 * Regardless, we should rotate the buffers
1072 * if there's any new data pending and
1073 * return it.
1074 */
1075 ROTATE_BUFFERS(d);
1076 break;
1077 }
3e170ce0 1078 bpf_release_d(d);
91447636 1079 lck_mtx_unlock(bpf_mlock);
1c79356b
A
1080 return (error);
1081 }
1082 if (error == EWOULDBLOCK) {
1083 /*
1084 * On a timeout, return what's in the buffer,
1085 * which may be nothing. If there is something
1086 * in the store buffer, we can rotate the buffers.
1087 */
1088 if (d->bd_hbuf)
1089 /*
1090 * We filled up the buffer in between
1091 * getting the timeout and arriving
1092 * here, so we don't need to rotate.
1093 */
1094 break;
1095
1096 if (d->bd_slen == 0) {
3e170ce0 1097 bpf_release_d(d);
91447636 1098 lck_mtx_unlock(bpf_mlock);
1c79356b
A
1099 return (0);
1100 }
1101 ROTATE_BUFFERS(d);
1102 break;
1103 }
1104 }
1105 /*
1106 * At this point, we know we have something in the hold slot.
1107 */
1c79356b 1108
fe8ab488
A
1109 /*
1110 * Set the hold buffer read. So we do not
1111 * rotate the buffers until the hold buffer
1112 * read is complete. Also to avoid issues resulting
1113 * from page faults during disk sleep (<rdar://problem/13436396>).
1114 */
1115 d->bd_hbuf_read = 1;
1116 hbuf = d->bd_hbuf;
1117 hbuf_len = d->bd_hlen;
1118 flags = d->bd_flags;
1119 lck_mtx_unlock(bpf_mlock);
1120
39236c6e 1121#ifdef __APPLE__
316670eb
A
1122 /*
1123 * Before we move data to userland, we fill out the extended
1124 * header fields.
1125 */
fe8ab488 1126 if (flags & BPF_EXTENDED_HDR) {
316670eb
A
1127 char *p;
1128
fe8ab488
A
1129 p = hbuf;
1130 while (p < hbuf + hbuf_len) {
316670eb 1131 struct bpf_hdr_ext *ehp;
39236c6e
A
1132 uint32_t flowid;
1133 struct so_procinfo soprocinfo;
1134 int found = 0;
316670eb
A
1135
1136 ehp = (struct bpf_hdr_ext *)(void *)p;
39236c6e
A
1137 if ((flowid = ehp->bh_flowid)) {
1138 if (ehp->bh_proto == IPPROTO_TCP)
1139 found = inp_findinpcb_procinfo(&tcbinfo,
1140 flowid, &soprocinfo);
1141 else if (ehp->bh_proto == IPPROTO_UDP)
1142 found = inp_findinpcb_procinfo(&udbinfo,
1143 flowid, &soprocinfo);
fe8ab488 1144 if (found == 1) {
39236c6e
A
1145 ehp->bh_pid = soprocinfo.spi_pid;
1146 proc_name(ehp->bh_pid, ehp->bh_comm, MAXCOMLEN);
316670eb 1147 }
39236c6e 1148 ehp->bh_flowid = 0;
316670eb 1149 }
fe8ab488
A
1150 if (flags & BPF_FINALIZE_PKTAP) {
1151 struct pktap_header *pktaphdr;
1152
1153 pktaphdr = (struct pktap_header *)(void *)
1154 (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1155
1156 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1157 pktap_finalize_proc_info(pktaphdr);
1158
1159 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1160 ehp->bh_tstamp.tv_sec =
1161 pktaphdr->pth_tstamp.tv_sec;
1162 ehp->bh_tstamp.tv_usec =
1163 pktaphdr->pth_tstamp.tv_usec;
1164 }
1165 }
316670eb
A
1166 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1167 }
fe8ab488
A
1168 } else if (flags & BPF_FINALIZE_PKTAP) {
1169 char *p;
1170
1171 p = hbuf;
1172 while (p < hbuf + hbuf_len) {
1173 struct bpf_hdr *hp;
1174 struct pktap_header *pktaphdr;
1175
1176 hp = (struct bpf_hdr *)(void *)p;
1177 pktaphdr = (struct pktap_header *)(void *)
1178 (p + BPF_WORDALIGN(hp->bh_hdrlen));
1179
1180 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
1181 pktap_finalize_proc_info(pktaphdr);
1182
1183 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1184 hp->bh_tstamp.tv_sec =
1185 pktaphdr->pth_tstamp.tv_sec;
1186 hp->bh_tstamp.tv_usec =
1187 pktaphdr->pth_tstamp.tv_usec;
1188 }
1189
1190 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1191 }
316670eb 1192 }
39236c6e 1193#endif
39236c6e 1194
1c79356b
A
1195 /*
1196 * Move data from hold buffer into user space.
1197 * We know the entire buffer is transferred since
1198 * we checked above that the read buffer is bpf_bufsize bytes.
1199 */
39236c6e
A
1200 error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1201
1202 lck_mtx_lock(bpf_mlock);
1203 /*
1204 * Make sure device is still opened
1205 */
3e170ce0
A
1206 if ((d->bd_flags & BPF_CLOSING) != 0) {
1207 bpf_release_d(d);
39236c6e
A
1208 lck_mtx_unlock(bpf_mlock);
1209 return (ENXIO);
1210 }
1211
1212 d->bd_hbuf_read = 0;
1c79356b 1213 d->bd_fbuf = d->bd_hbuf;
2d21ac55 1214 d->bd_hbuf = NULL;
1c79356b 1215 d->bd_hlen = 0;
3e170ce0 1216 d->bd_hcnt = 0;
39236c6e 1217 wakeup((caddr_t)d);
3e170ce0
A
1218
1219 bpf_release_d(d);
91447636 1220 lck_mtx_unlock(bpf_mlock);
1c79356b 1221 return (error);
39236c6e 1222
1c79356b
A
1223}
1224
1225
1226/*
1227 * If there are processes sleeping on this descriptor, wake them up.
1228 */
91447636
A
1229static void
1230bpf_wakeup(struct bpf_d *d)
1c79356b 1231{
6d2010ae
A
1232 if (d->bd_state == BPF_WAITING) {
1233 bpf_stop_timer(d);
1234 d->bd_state = BPF_IDLE;
1235 }
1c79356b
A
1236 wakeup((caddr_t)d);
1237 if (d->bd_async && d->bd_sig && d->bd_sigio)
2d21ac55 1238 pgsigio(d->bd_sigio, d->bd_sig);
1c79356b 1239
1c79356b 1240 selwakeup(&d->bd_sel);
3e170ce0
A
1241 if ((d->bd_flags & BPF_KNOTE))
1242 KNOTE(&d->bd_sel.si_note, 1);
1c79356b
A
1243}
1244
6d2010ae
A
1245
1246static void
1247bpf_timed_out(void *arg, __unused void *dummy)
1248{
1249 struct bpf_d *d = (struct bpf_d *)arg;
1250
1251 lck_mtx_lock(bpf_mlock);
1252 if (d->bd_state == BPF_WAITING) {
1253 /*
1254 * There's a select or kqueue waiting for this; if there's
1255 * now stuff to read, wake it up.
1256 */
1257 d->bd_state = BPF_TIMED_OUT;
1258 if (d->bd_slen != 0)
1259 bpf_wakeup(d);
1260 } else if (d->bd_state == BPF_DRAINING) {
1261 /*
1262 * A close is waiting for this to finish.
1263 * Mark it as finished, and wake the close up.
1264 */
1265 d->bd_state = BPF_IDLE;
1266 bpf_wakeup(d);
1267 }
1268 lck_mtx_unlock(bpf_mlock);
1269}
1270
1271
1272
1273
1274
55e303ae
A
1275/* keep in sync with bpf_movein above: */
1276#define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
1277
2d21ac55 1278int
91447636 1279bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1c79356b 1280{
2d21ac55 1281 struct bpf_d *d;
1c79356b 1282 struct ifnet *ifp;
2d21ac55 1283 struct mbuf *m = NULL;
91447636 1284 int error;
55e303ae 1285 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
b0d623f7 1286 int datlen = 0;
39236c6e
A
1287 int bif_dlt;
1288 int bd_hdrcmplt;
1c79356b 1289
2d21ac55
A
1290 lck_mtx_lock(bpf_mlock);
1291
55e303ae 1292 d = bpf_dtab[minor(dev)];
3e170ce0 1293 if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55 1294 lck_mtx_unlock(bpf_mlock);
91447636 1295 return (ENXIO);
2d21ac55 1296 }
3e170ce0
A
1297
1298 bpf_acquire_d(d);
1299
1c79356b 1300 if (d->bd_bif == 0) {
3e170ce0 1301 bpf_release_d(d);
91447636 1302 lck_mtx_unlock(bpf_mlock);
2d21ac55 1303 return (ENXIO);
1c79356b
A
1304 }
1305
1306 ifp = d->bd_bif->bif_ifp;
1307
6d2010ae 1308 if ((ifp->if_flags & IFF_UP) == 0) {
3e170ce0 1309 bpf_release_d(d);
6d2010ae
A
1310 lck_mtx_unlock(bpf_mlock);
1311 return (ENETDOWN);
1312 }
b0d623f7 1313 if (uio_resid(uio) == 0) {
3e170ce0 1314 bpf_release_d(d);
91447636 1315 lck_mtx_unlock(bpf_mlock);
2d21ac55 1316 return (0);
1c79356b 1317 }
55e303ae 1318 ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
6d2010ae 1319
316670eb
A
1320 /*
1321 * fix for PR-6849527
1322 * geting variables onto stack before dropping lock for bpf_movein()
1323 */
1324 bif_dlt = (int)d->bd_bif->bif_dlt;
1325 bd_hdrcmplt = d->bd_hdrcmplt;
1326
6d2010ae 1327 /* bpf_movein allocating mbufs; drop lock */
316670eb 1328 lck_mtx_unlock(bpf_mlock);
6d2010ae
A
1329
1330 error = bpf_movein(uio, bif_dlt, &m,
316670eb
A
1331 bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1332 &datlen);
1333
3e170ce0
A
1334 /* take the lock again */
1335 lck_mtx_lock(bpf_mlock);
316670eb 1336 if (error) {
3e170ce0
A
1337 bpf_release_d(d);
1338 lck_mtx_unlock(bpf_mlock);
2d21ac55 1339 return (error);
1c79356b
A
1340 }
1341
3e170ce0
A
1342 /* verify the device is still open */
1343 if ((d->bd_flags & BPF_CLOSING) != 0) {
1344 bpf_release_d(d);
91447636 1345 lck_mtx_unlock(bpf_mlock);
2d21ac55 1346 m_freem(m);
6d2010ae 1347 return (ENXIO);
2d21ac55 1348 }
6d2010ae
A
1349
1350 if (d->bd_bif == NULL) {
3e170ce0 1351 bpf_release_d(d);
6d2010ae
A
1352 lck_mtx_unlock(bpf_mlock);
1353 m_free(m);
1354 return (ENXIO);
1355 }
1356
1357 if ((unsigned)datlen > ifp->if_mtu) {
3e170ce0 1358 bpf_release_d(d);
2d21ac55
A
1359 lck_mtx_unlock(bpf_mlock);
1360 m_freem(m);
6d2010ae 1361 return (EMSGSIZE);
1c79356b
A
1362 }
1363
6d2010ae 1364
2d21ac55
A
1365#if CONFIG_MACF_NET
1366 mac_mbuf_label_associate_bpfdesc(d, m);
1367#endif
316670eb
A
1368
1369 bpf_set_packet_service_class(m, d->bd_traffic_class);
1370
91447636
A
1371 lck_mtx_unlock(bpf_mlock);
1372
3e170ce0
A
1373 /*
1374 * The driver frees the mbuf.
1375 */
55e303ae 1376 if (d->bd_hdrcmplt) {
2d21ac55
A
1377 if (d->bd_bif->bif_send)
1378 error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1379 else
316670eb
A
1380 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1381 } else {
1382 error = dlil_output(ifp, PF_INET, m, NULL,
1383 (struct sockaddr *)dst_buf, 0, NULL);
91447636 1384 }
6d2010ae 1385
3e170ce0
A
1386 lck_mtx_lock(bpf_mlock);
1387 bpf_release_d(d);
1388 lck_mtx_unlock(bpf_mlock);
1389
1c79356b
A
1390 return (error);
1391}
1392
1393/*
1394 * Reset a descriptor by flushing its packet buffer and clearing the
2d21ac55 1395 * receive and drop counts.
1c79356b
A
1396 */
1397static void
91447636 1398reset_d(struct bpf_d *d)
1c79356b 1399{
39236c6e
A
1400 if (d->bd_hbuf_read)
1401 panic("resetting buffers during read");
1402
1c79356b
A
1403 if (d->bd_hbuf) {
1404 /* Free the hold buffer. */
1405 d->bd_fbuf = d->bd_hbuf;
2d21ac55 1406 d->bd_hbuf = NULL;
1c79356b
A
1407 }
1408 d->bd_slen = 0;
1409 d->bd_hlen = 0;
3e170ce0
A
1410 d->bd_scnt = 0;
1411 d->bd_hcnt = 0;
1c79356b
A
1412 d->bd_rcount = 0;
1413 d->bd_dcount = 0;
1414}
1415
1416/*
1417 * FIONREAD Check for read packet available.
1418 * SIOCGIFADDR Get interface address - convenient hook to driver.
1419 * BIOCGBLEN Get buffer len [for read()].
1420 * BIOCSETF Set ethernet read filter.
1421 * BIOCFLUSH Flush read packet buffer.
1422 * BIOCPROMISC Put interface into promiscuous mode.
1423 * BIOCGDLT Get link layer type.
1424 * BIOCGETIF Get interface name.
1425 * BIOCSETIF Set interface.
1426 * BIOCSRTIMEOUT Set read timeout.
1427 * BIOCGRTIMEOUT Get read timeout.
1428 * BIOCGSTATS Get packet stats.
1429 * BIOCIMMEDIATE Set immediate mode.
1430 * BIOCVERSION Get filter language version.
9bccf70c
A
1431 * BIOCGHDRCMPLT Get "header already complete" flag
1432 * BIOCSHDRCMPLT Set "header already complete" flag
1433 * BIOCGSEESENT Get "see packets sent" flag
1434 * BIOCSSEESENT Set "see packets sent" flag
316670eb
A
1435 * BIOCSETTC Set traffic class.
1436 * BIOCGETTC Get traffic class.
1437 * BIOCSEXTHDR Set "extended header" flag
3e170ce0
A
1438 * BIOCSHEADDROP Drop head of the buffer if user is not reading
1439 * BIOCGHEADDROP Get "head-drop" flag
1c79356b
A
1440 */
1441/* ARGSUSED */
9bccf70c 1442int
2d21ac55 1443bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
b0d623f7 1444 struct proc *p)
1c79356b 1445{
2d21ac55 1446 struct bpf_d *d;
fe8ab488
A
1447 int error = 0;
1448 u_int int_arg;
316670eb 1449 struct ifreq ifr;
2d21ac55
A
1450
1451 lck_mtx_lock(bpf_mlock);
1c79356b 1452
55e303ae 1453 d = bpf_dtab[minor(dev)];
3e170ce0 1454 if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55 1455 lck_mtx_unlock(bpf_mlock);
91447636 1456 return (ENXIO);
2d21ac55 1457 }
1c79356b 1458
3e170ce0
A
1459 bpf_acquire_d(d);
1460
6d2010ae
A
1461 if (d->bd_state == BPF_WAITING)
1462 bpf_stop_timer(d);
1463 d->bd_state = BPF_IDLE;
1464
1c79356b
A
1465 switch (cmd) {
1466
1467 default:
1468 error = EINVAL;
1469 break;
1470
1471 /*
1472 * Check for read packet available.
1473 */
316670eb 1474 case FIONREAD: /* int */
1c79356b
A
1475 {
1476 int n;
1477
1c79356b 1478 n = d->bd_slen;
39236c6e 1479 if (d->bd_hbuf && d->bd_hbuf_read == 0)
1c79356b 1480 n += d->bd_hlen;
1c79356b 1481
316670eb 1482 bcopy(&n, addr, sizeof (n));
1c79356b
A
1483 break;
1484 }
1485
316670eb 1486 case SIOCGIFADDR: /* struct ifreq */
1c79356b
A
1487 {
1488 struct ifnet *ifp;
1489
1490 if (d->bd_bif == 0)
1491 error = EINVAL;
1492 else {
1493 ifp = d->bd_bif->bif_ifp;
2d21ac55 1494 error = ifnet_ioctl(ifp, 0, cmd, addr);
1c79356b
A
1495 }
1496 break;
1497 }
1498
1499 /*
1500 * Get buffer len [for read()].
1501 */
316670eb
A
1502 case BIOCGBLEN: /* u_int */
1503 bcopy(&d->bd_bufsize, addr, sizeof (u_int));
1c79356b
A
1504 break;
1505
1506 /*
1507 * Set buffer length.
1508 */
316670eb 1509 case BIOCSBLEN: /* u_int */
1c79356b
A
1510 if (d->bd_bif != 0)
1511 error = EINVAL;
1512 else {
316670eb
A
1513 u_int size;
1514
1515 bcopy(addr, &size, sizeof (size));
1c79356b 1516
813fb2f6
A
1517 /*
1518 * Allow larger buffer in head drop mode with the
1519 * assumption the capture is in standby mode to
1520 * keep a cache of recent traffic
1521 */
1522 if (d->bd_headdrop != 0 && size > 2 * bpf_maxbufsize)
1523 size = 2 * bpf_maxbufsize;
1524 else if (size > bpf_maxbufsize)
316670eb 1525 size = bpf_maxbufsize;
1c79356b 1526 else if (size < BPF_MINBUFSIZE)
316670eb
A
1527 size = BPF_MINBUFSIZE;
1528 bcopy(&size, addr, sizeof (size));
1c79356b
A
1529 d->bd_bufsize = size;
1530 }
1c79356b
A
1531 break;
1532
1533 /*
1534 * Set link layer read filter.
1535 */
39236c6e
A
1536 case BIOCSETF32:
1537 case BIOCSETFNR32: { /* struct bpf_program32 */
316670eb
A
1538 struct bpf_program32 prg32;
1539
1540 bcopy(addr, &prg32, sizeof (prg32));
1541 error = bpf_setf(d, prg32.bf_len,
3e170ce0 1542 CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1c79356b 1543 break;
2d21ac55 1544 }
b0d623f7 1545
39236c6e
A
1546 case BIOCSETF64:
1547 case BIOCSETFNR64: { /* struct bpf_program64 */
316670eb
A
1548 struct bpf_program64 prg64;
1549
1550 bcopy(addr, &prg64, sizeof (prg64));
3e170ce0 1551 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
b0d623f7
A
1552 break;
1553 }
1554
1c79356b
A
1555 /*
1556 * Flush read packet buffer.
1557 */
1558 case BIOCFLUSH:
39236c6e
A
1559 while (d->bd_hbuf_read) {
1560 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1561 }
3e170ce0
A
1562 if ((d->bd_flags & BPF_CLOSING) != 0) {
1563 error = ENXIO;
1564 break;
1565 }
1c79356b 1566 reset_d(d);
1c79356b
A
1567 break;
1568
1569 /*
1570 * Put interface into promiscuous mode.
1571 */
1572 case BIOCPROMISC:
1573 if (d->bd_bif == 0) {
1574 /*
1575 * No interface attached yet.
1576 */
1577 error = EINVAL;
1578 break;
1579 }
1c79356b 1580 if (d->bd_promisc == 0) {
2d21ac55 1581 lck_mtx_unlock(bpf_mlock);
91447636 1582 error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
2d21ac55 1583 lck_mtx_lock(bpf_mlock);
1c79356b
A
1584 if (error == 0)
1585 d->bd_promisc = 1;
1586 }
1c79356b
A
1587 break;
1588
1589 /*
1590 * Get device parameters.
1591 */
316670eb 1592 case BIOCGDLT: /* u_int */
1c79356b
A
1593 if (d->bd_bif == 0)
1594 error = EINVAL;
1595 else
316670eb 1596 bcopy(&d->bd_bif->bif_dlt, addr, sizeof (u_int));
1c79356b
A
1597 break;
1598
2d21ac55
A
1599 /*
1600 * Get a list of supported data link types.
1601 */
316670eb 1602 case BIOCGDLTLIST: /* struct bpf_dltlist */
b0d623f7
A
1603 if (d->bd_bif == NULL) {
1604 error = EINVAL;
1605 } else {
316670eb 1606 error = bpf_getdltlist(d, addr, p);
b0d623f7
A
1607 }
1608 break;
2d21ac55
A
1609
1610 /*
1611 * Set data link type.
1612 */
316670eb
A
1613 case BIOCSDLT: /* u_int */
1614 if (d->bd_bif == NULL) {
1615 error = EINVAL;
1616 } else {
1617 u_int dlt;
1618
1619 bcopy(addr, &dlt, sizeof (dlt));
3e170ce0 1620 error = bpf_setdlt(d, dlt);
316670eb
A
1621 }
1622 break;
2d21ac55 1623
1c79356b 1624 /*
9bccf70c 1625 * Get interface name.
1c79356b 1626 */
316670eb 1627 case BIOCGETIF: /* struct ifreq */
1c79356b
A
1628 if (d->bd_bif == 0)
1629 error = EINVAL;
9bccf70c
A
1630 else {
1631 struct ifnet *const ifp = d->bd_bif->bif_ifp;
9bccf70c 1632
316670eb 1633 snprintf(((struct ifreq *)(void *)addr)->ifr_name,
39236c6e 1634 sizeof (ifr.ifr_name), "%s", if_name(ifp));
9bccf70c 1635 }
1c79356b
A
1636 break;
1637
1638 /*
1639 * Set interface.
1640 */
316670eb 1641 case BIOCSETIF: { /* struct ifreq */
2d21ac55 1642 ifnet_t ifp;
316670eb
A
1643
1644 bcopy(addr, &ifr, sizeof (ifr));
1645 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1646 ifp = ifunit(ifr.ifr_name);
2d21ac55
A
1647 if (ifp == NULL)
1648 error = ENXIO;
1649 else
3e170ce0 1650 error = bpf_setif(d, ifp, 0);
1c79356b 1651 break;
2d21ac55 1652 }
1c79356b
A
1653
1654 /*
1655 * Set read timeout.
1656 */
39236c6e 1657 case BIOCSRTIMEOUT32: { /* struct user32_timeval */
316670eb
A
1658 struct user32_timeval _tv;
1659 struct timeval tv;
b0d623f7 1660
316670eb
A
1661 bcopy(addr, &_tv, sizeof (_tv));
1662 tv.tv_sec = _tv.tv_sec;
1663 tv.tv_usec = _tv.tv_usec;
1664
1665 /*
1666 * Subtract 1 tick from tvtohz() since this isn't
1667 * a one-shot timer.
1668 */
1669 if ((error = itimerfix(&tv)) == 0)
1670 d->bd_rtout = tvtohz(&tv) - 1;
1671 break;
1672 }
1673
39236c6e 1674 case BIOCSRTIMEOUT64: { /* struct user64_timeval */
316670eb
A
1675 struct user64_timeval _tv;
1676 struct timeval tv;
1677
1678 bcopy(addr, &_tv, sizeof (_tv));
1679 tv.tv_sec = _tv.tv_sec;
1680 tv.tv_usec = _tv.tv_usec;
1681
1682 /*
1683 * Subtract 1 tick from tvtohz() since this isn't
1684 * a one-shot timer.
1685 */
1686 if ((error = itimerfix(&tv)) == 0)
1687 d->bd_rtout = tvtohz(&tv) - 1;
1688 break;
1689 }
1c79356b 1690
39236c6e 1691 /*
1c79356b
A
1692 * Get read timeout.
1693 */
316670eb
A
1694 case BIOCGRTIMEOUT32: { /* struct user32_timeval */
1695 struct user32_timeval tv;
1c79356b 1696
316670eb
A
1697 bzero(&tv, sizeof (tv));
1698 tv.tv_sec = d->bd_rtout / hz;
1699 tv.tv_usec = (d->bd_rtout % hz) * tick;
1700 bcopy(&tv, addr, sizeof (tv));
1701 break;
1702 }
6d2010ae 1703
316670eb
A
1704 case BIOCGRTIMEOUT64: { /* struct user64_timeval */
1705 struct user64_timeval tv;
6d2010ae 1706
316670eb
A
1707 bzero(&tv, sizeof (tv));
1708 tv.tv_sec = d->bd_rtout / hz;
1709 tv.tv_usec = (d->bd_rtout % hz) * tick;
1710 bcopy(&tv, addr, sizeof (tv));
1711 break;
1712 }
1c79356b
A
1713
1714 /*
1715 * Get packet stats.
1716 */
316670eb
A
1717 case BIOCGSTATS: { /* struct bpf_stat */
1718 struct bpf_stat bs;
1c79356b 1719
316670eb
A
1720 bzero(&bs, sizeof (bs));
1721 bs.bs_recv = d->bd_rcount;
1722 bs.bs_drop = d->bd_dcount;
1723 bcopy(&bs, addr, sizeof (bs));
1724 break;
1725 }
1c79356b
A
1726
1727 /*
1728 * Set immediate mode.
1729 */
316670eb 1730 case BIOCIMMEDIATE: /* u_int */
3e170ce0 1731 d->bd_immediate = *(u_int *)(void *)addr;
1c79356b
A
1732 break;
1733
316670eb
A
1734 case BIOCVERSION: { /* struct bpf_version */
1735 struct bpf_version bv;
1c79356b 1736
316670eb
A
1737 bzero(&bv, sizeof (bv));
1738 bv.bv_major = BPF_MAJOR_VERSION;
1739 bv.bv_minor = BPF_MINOR_VERSION;
1740 bcopy(&bv, addr, sizeof (bv));
1741 break;
1742 }
1c79356b 1743
9bccf70c
A
1744 /*
1745 * Get "header already complete" flag
1746 */
316670eb
A
1747 case BIOCGHDRCMPLT: /* u_int */
1748 bcopy(&d->bd_hdrcmplt, addr, sizeof (u_int));
9bccf70c
A
1749 break;
1750
1751 /*
1752 * Set "header already complete" flag
1753 */
316670eb
A
1754 case BIOCSHDRCMPLT: /* u_int */
1755 bcopy(addr, &int_arg, sizeof (int_arg));
1756 d->bd_hdrcmplt = int_arg ? 1 : 0;
9bccf70c
A
1757 break;
1758
1759 /*
1760 * Get "see sent packets" flag
1761 */
316670eb
A
1762 case BIOCGSEESENT: /* u_int */
1763 bcopy(&d->bd_seesent, addr, sizeof (u_int));
9bccf70c
A
1764 break;
1765
1766 /*
1767 * Set "see sent packets" flag
1768 */
316670eb
A
1769 case BIOCSSEESENT: /* u_int */
1770 bcopy(addr, &d->bd_seesent, sizeof (u_int));
1771 break;
1772
1773 /*
1774 * Set traffic service class
1775 */
1776 case BIOCSETTC: { /* int */
1777 int tc;
1778
1779 bcopy(addr, &tc, sizeof (int));
1780 error = bpf_set_traffic_class(d, tc);
9bccf70c 1781 break;
316670eb 1782 }
9bccf70c 1783
316670eb
A
1784 /*
1785 * Get traffic service class
1786 */
1787 case BIOCGETTC: /* int */
1788 bcopy(&d->bd_traffic_class, addr, sizeof (int));
1c79356b
A
1789 break;
1790
316670eb
A
1791 case FIONBIO: /* Non-blocking I/O; int */
1792 break;
1793
1794 case FIOASYNC: /* Send signal on receive packets; int */
1795 bcopy(addr, &d->bd_async, sizeof (int));
1c79356b 1796 break;
9bccf70c 1797#ifndef __APPLE__
1c79356b
A
1798 case FIOSETOWN:
1799 error = fsetown(*(int *)addr, &d->bd_sigio);
1800 break;
1801
1802 case FIOGETOWN:
1803 *(int *)addr = fgetown(d->bd_sigio);
1804 break;
1805
1806 /* This is deprecated, FIOSETOWN should be used instead. */
1807 case TIOCSPGRP:
1808 error = fsetown(-(*(int *)addr), &d->bd_sigio);
1809 break;
1810
1811 /* This is deprecated, FIOGETOWN should be used instead. */
1812 case TIOCGPGRP:
1813 *(int *)addr = -fgetown(d->bd_sigio);
1814 break;
1815#endif
316670eb
A
1816 case BIOCSRSIG: { /* Set receive signal; u_int */
1817 u_int sig;
1c79356b 1818
316670eb 1819 bcopy(addr, &sig, sizeof (u_int));
1c79356b 1820
316670eb
A
1821 if (sig >= NSIG)
1822 error = EINVAL;
1823 else
1824 d->bd_sig = sig;
1c79356b
A
1825 break;
1826 }
316670eb
A
1827 case BIOCGRSIG: /* u_int */
1828 bcopy(&d->bd_sig, addr, sizeof (u_int));
1829 break;
39236c6e 1830#ifdef __APPLE__
fe8ab488
A
1831 case BIOCSEXTHDR: /* u_int */
1832 bcopy(addr, &int_arg, sizeof (int_arg));
1833 if (int_arg)
1834 d->bd_flags |= BPF_EXTENDED_HDR;
1835 else
1836 d->bd_flags &= ~BPF_EXTENDED_HDR;
316670eb 1837 break;
39236c6e
A
1838
1839 case BIOCGIFATTACHCOUNT: { /* struct ifreq */
1840 ifnet_t ifp;
1841 struct bpf_if *bp;
1842
1843 bcopy(addr, &ifr, sizeof (ifr));
1844 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1845 ifp = ifunit(ifr.ifr_name);
1846 if (ifp == NULL) {
1847 error = ENXIO;
1848 break;
1849 }
1850 ifr.ifr_intval = 0;
1851 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1852 struct bpf_d *bpf_d;
1853
1854 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp)
1855 continue;
1856 for (bpf_d = bp->bif_dlist; bpf_d; bpf_d = bpf_d->bd_next) {
1857 ifr.ifr_intval += 1;
1858 }
1859 }
1860 bcopy(&ifr, addr, sizeof (ifr));
1861 break;
1862 }
fe8ab488
A
1863 case BIOCGWANTPKTAP: /* u_int */
1864 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
1865 bcopy(&int_arg, addr, sizeof (int_arg));
1866 break;
1867
1868 case BIOCSWANTPKTAP: /* u_int */
1869 bcopy(addr, &int_arg, sizeof (int_arg));
1870 if (int_arg)
1871 d->bd_flags |= BPF_WANT_PKTAP;
1872 else
1873 d->bd_flags &= ~BPF_WANT_PKTAP;
1874 break;
39236c6e 1875#endif
3e170ce0
A
1876
1877 case BIOCSHEADDROP:
1878 bcopy(addr, &int_arg, sizeof (int_arg));
1879 d->bd_headdrop = int_arg ? 1 : 0;
1880 break;
1881
1882 case BIOCGHEADDROP:
1883 bcopy(&d->bd_headdrop, addr, sizeof (int));
1884 break;
316670eb
A
1885 }
1886
3e170ce0 1887 bpf_release_d(d);
91447636 1888 lck_mtx_unlock(bpf_mlock);
b0d623f7 1889
1c79356b
A
1890 return (error);
1891}
1892
1893/*
1894 * Set d's packet filter program to fp. If this file already has a filter,
1895 * free it and replace it. Returns EINVAL for bogus requests.
1896 */
1897static int
3e170ce0
A
1898bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
1899 u_long cmd)
1c79356b
A
1900{
1901 struct bpf_insn *fcode, *old;
1902 u_int flen, size;
1c79356b 1903
39236c6e
A
1904 while (d->bd_hbuf_read)
1905 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1906
3e170ce0 1907 if ((d->bd_flags & BPF_CLOSING) != 0)
39236c6e
A
1908 return (ENXIO);
1909
1c79356b 1910 old = d->bd_filter;
2d21ac55
A
1911 if (bf_insns == USER_ADDR_NULL) {
1912 if (bf_len != 0)
1c79356b 1913 return (EINVAL);
2d21ac55 1914 d->bd_filter = NULL;
1c79356b 1915 reset_d(d);
1c79356b
A
1916 if (old != 0)
1917 FREE((caddr_t)old, M_DEVBUF);
1918 return (0);
1919 }
2d21ac55 1920 flen = bf_len;
1c79356b
A
1921 if (flen > BPF_MAXINSNS)
1922 return (EINVAL);
1923
91447636 1924 size = flen * sizeof(struct bpf_insn);
1c79356b 1925 fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
9bccf70c 1926#ifdef __APPLE__
0b4e3aa0
A
1927 if (fcode == NULL)
1928 return (ENOBUFS);
9bccf70c 1929#endif
2d21ac55 1930 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
1c79356b 1931 bpf_validate(fcode, (int)flen)) {
1c79356b 1932 d->bd_filter = fcode;
39236c6e
A
1933
1934 if (cmd == BIOCSETF32 || cmd == BIOCSETF64)
1935 reset_d(d);
1936
1c79356b
A
1937 if (old != 0)
1938 FREE((caddr_t)old, M_DEVBUF);
1939
1940 return (0);
1941 }
1942 FREE((caddr_t)fcode, M_DEVBUF);
1943 return (EINVAL);
1944}
1945
1946/*
1947 * Detach a file from its current interface (if attached at all) and attach
1948 * to the interface indicated by the name stored in ifr.
1949 * Return an errno or 0.
1950 */
1951static int
3e170ce0 1952bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt)
1c79356b
A
1953{
1954 struct bpf_if *bp;
2d21ac55 1955 int error;
39236c6e
A
1956
1957 while (d->bd_hbuf_read)
1958 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1959
3e170ce0 1960 if ((d->bd_flags & BPF_CLOSING) != 0)
39236c6e
A
1961 return (ENXIO);
1962
1c79356b
A
1963 /*
1964 * Look through attached interfaces for the named one.
1965 */
1966 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1967 struct ifnet *ifp = bp->bif_ifp;
1968
2d21ac55 1969 if (ifp == 0 || ifp != theywant || (dlt != 0 && dlt != bp->bif_dlt))
1c79356b 1970 continue;
fe8ab488
A
1971 /*
1972 * If the process knows how to deal with DLT_PKTAP, use it
1973 * by default
1974 */
1975 if (dlt == 0 && bp->bif_dlt == DLT_PKTAP &&
1976 !(d->bd_flags & BPF_WANT_PKTAP))
1977 continue;
1c79356b
A
1978 /*
1979 * We found the requested interface.
813fb2f6
A
1980 * Allocate the packet buffers.
1981 */
1982 error = bpf_allocbufs(d);
1983 if (error != 0)
1984 return (error);
1985 /*
1986 * Detach if attached to something else.
1c79356b 1987 */
1c79356b 1988 if (bp != d->bd_bif) {
813fb2f6 1989 if (d->bd_bif != NULL) {
3e170ce0
A
1990 if (bpf_detachd(d, 0) != 0)
1991 return (ENXIO);
2d21ac55 1992 }
3e170ce0
A
1993 if (bpf_attachd(d, bp) != 0)
1994 return (ENXIO);
1c79356b
A
1995 }
1996 reset_d(d);
1c79356b
A
1997 return (0);
1998 }
1999 /* Not found. */
2000 return (ENXIO);
2001}
2002
2d21ac55
A
2003
2004
2005/*
2006 * Get a list of available data link type of the interface.
2007 */
2008static int
316670eb 2009bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2d21ac55 2010{
b0d623f7
A
2011 u_int n;
2012 int error;
2d21ac55
A
2013 struct ifnet *ifp;
2014 struct bpf_if *bp;
b0d623f7 2015 user_addr_t dlist;
316670eb 2016 struct bpf_dltlist bfl;
b0d623f7 2017
316670eb 2018 bcopy(addr, &bfl, sizeof (bfl));
b0d623f7 2019 if (proc_is64bit(p)) {
316670eb 2020 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
b0d623f7 2021 } else {
316670eb 2022 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2d21ac55 2023 }
b0d623f7 2024
2d21ac55
A
2025 ifp = d->bd_bif->bif_ifp;
2026 n = 0;
2027 error = 0;
fe8ab488 2028
2d21ac55
A
2029 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2030 if (bp->bif_ifp != ifp)
2031 continue;
fe8ab488
A
2032 /*
2033 * Return DLT_PKTAP only to processes that know how to handle it
2034 */
2035 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
2036 continue;
b0d623f7 2037 if (dlist != USER_ADDR_NULL) {
316670eb 2038 if (n >= bfl.bfl_len) {
2d21ac55
A
2039 return (ENOMEM);
2040 }
b0d623f7
A
2041 error = copyout(&bp->bif_dlt, dlist,
2042 sizeof (bp->bif_dlt));
316670eb
A
2043 if (error != 0)
2044 break;
b0d623f7 2045 dlist += sizeof (bp->bif_dlt);
2d21ac55
A
2046 }
2047 n++;
2048 }
316670eb
A
2049 bfl.bfl_len = n;
2050 bcopy(&bfl, addr, sizeof (bfl));
2051
2d21ac55
A
2052 return (error);
2053}
2054
2055/*
2056 * Set the data link type of a BPF instance.
2057 */
2058static int
3e170ce0 2059bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2d21ac55
A
2060{
2061 int error, opromisc;
2062 struct ifnet *ifp;
2063 struct bpf_if *bp;
2064
2065 if (d->bd_bif->bif_dlt == dlt)
2066 return (0);
39236c6e
A
2067
2068 while (d->bd_hbuf_read)
2069 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2070
3e170ce0 2071 if ((d->bd_flags & BPF_CLOSING) != 0)
39236c6e 2072 return (ENXIO);
fe8ab488 2073
2d21ac55
A
2074 ifp = d->bd_bif->bif_ifp;
2075 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2076 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2077 break;
2078 }
2079 if (bp != NULL) {
2080 opromisc = d->bd_promisc;
3e170ce0
A
2081 if (bpf_detachd(d, 0) != 0)
2082 return (ENXIO);
2d21ac55
A
2083 error = bpf_attachd(d, bp);
2084 if (error) {
2085 printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
2086 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error);
2087 return error;
2088 }
2089 reset_d(d);
2090 if (opromisc) {
2091 lck_mtx_unlock(bpf_mlock);
2092 error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2093 lck_mtx_lock(bpf_mlock);
3e170ce0
A
2094 if (error) {
2095 printf("%s: ifpromisc %s%d failed (%d)\n",
2096 __func__, ifnet_name(bp->bif_ifp),
2097 ifnet_unit(bp->bif_ifp), error);
2098 } else {
2d21ac55 2099 d->bd_promisc = 1;
3e170ce0 2100 }
2d21ac55
A
2101 }
2102 }
2103 return (bp == NULL ? EINVAL : 0);
2104}
2105
316670eb
A
2106static int
2107bpf_set_traffic_class(struct bpf_d *d, int tc)
2108{
2109 int error = 0;
2110
2111 if (!SO_VALID_TC(tc))
2112 error = EINVAL;
2113 else
2114 d->bd_traffic_class = tc;
2115
2116 return (error);
2117}
2118
2119static void
2120bpf_set_packet_service_class(struct mbuf *m, int tc)
2121{
2122 if (!(m->m_flags & M_PKTHDR))
2123 return;
2124
2125 VERIFY(SO_VALID_TC(tc));
2126 (void) m_set_service_class(m, so_tc2msc(tc));
2127}
2128
1c79356b 2129/*
b0d623f7 2130 * Support for select()
1c79356b
A
2131 *
2132 * Return true iff the specific operation will not block indefinitely.
2133 * Otherwise, return false but make a note that a selwakeup() must be done.
2134 */
2135int
6d2010ae 2136bpfselect(dev_t dev, int which, void * wql, struct proc *p)
1c79356b 2137{
2d21ac55 2138 struct bpf_d *d;
6d2010ae 2139 int ret = 0;
1c79356b 2140
2d21ac55
A
2141 lck_mtx_lock(bpf_mlock);
2142
55e303ae 2143 d = bpf_dtab[minor(dev)];
3e170ce0 2144 if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55 2145 lck_mtx_unlock(bpf_mlock);
91447636 2146 return (ENXIO);
2d21ac55 2147 }
55e303ae 2148
3e170ce0
A
2149 bpf_acquire_d(d);
2150
9bccf70c 2151 if (d->bd_bif == NULL) {
3e170ce0 2152 bpf_release_d(d);
91447636 2153 lck_mtx_unlock(bpf_mlock);
9bccf70c
A
2154 return (ENXIO);
2155 }
2156
39236c6e
A
2157 while (d->bd_hbuf_read)
2158 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
3e170ce0
A
2159
2160 if ((d->bd_flags & BPF_CLOSING) != 0) {
2161 bpf_release_d(d);
39236c6e
A
2162 lck_mtx_unlock(bpf_mlock);
2163 return (ENXIO);
2164 }
2165
6d2010ae
A
2166 switch (which) {
2167 case FREAD:
2168 if (d->bd_hlen != 0 ||
2169 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
2170 d->bd_slen != 0))
2171 ret = 1; /* read has data to return */
2172 else {
2173 /*
2174 * Read has no data to return.
2175 * Make the select wait, and start a timer if
2176 * necessary.
2177 */
2178 selrecord(p, &d->bd_sel, wql);
2179 bpf_start_timer(d);
2180 }
2181 break;
2182
2183 case FWRITE:
2184 ret = 1; /* can't determine whether a write would block */
2185 break;
9bccf70c 2186 }
91447636 2187
3e170ce0 2188 bpf_release_d(d);
91447636 2189 lck_mtx_unlock(bpf_mlock);
3e170ce0 2190
6d2010ae 2191 return (ret);
1c79356b
A
2192}
2193
6d2010ae 2194
b0d623f7
A
2195/*
2196 * Support for kevent() system call. Register EVFILT_READ filters and
2197 * reject all others.
2198 */
2199int bpfkqfilter(dev_t dev, struct knote *kn);
2200static void filt_bpfdetach(struct knote *);
2201static int filt_bpfread(struct knote *, long);
39037602
A
2202static int filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev);
2203static int filt_bpfprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev);
b0d623f7 2204
39037602 2205struct filterops bpfread_filtops = {
b0d623f7
A
2206 .f_isfd = 1,
2207 .f_detach = filt_bpfdetach,
2208 .f_event = filt_bpfread,
39037602
A
2209 .f_touch = filt_bpftouch,
2210 .f_process = filt_bpfprocess,
b0d623f7
A
2211};
2212
b0d623f7 2213static int
39037602 2214filt_bpfread_common(struct knote *kn, struct bpf_d *d)
b0d623f7 2215{
b0d623f7
A
2216 int ready = 0;
2217
b0d623f7 2218 if (d->bd_immediate) {
6d2010ae
A
2219 /*
2220 * If there's data in the hold buffer, it's the
2221 * amount of data a read will return.
2222 *
2223 * If there's no data in the hold buffer, but
2224 * there's data in the store buffer, a read will
2225 * immediately rotate the store buffer to the
2226 * hold buffer, the amount of data in the store
2227 * buffer is the amount of data a read will
2228 * return.
2229 *
2230 * If there's no data in either buffer, we're not
2231 * ready to read.
2232 */
39236c6e
A
2233 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read)
2234 ? d->bd_slen : d->bd_hlen);
6d2010ae
A
2235 int64_t lowwat = 1;
2236 if (kn->kn_sfflags & NOTE_LOWAT)
2237 {
2238 if (kn->kn_sdata > d->bd_bufsize)
2239 lowwat = d->bd_bufsize;
2240 else if (kn->kn_sdata > lowwat)
2241 lowwat = kn->kn_sdata;
2242 }
2243 ready = (kn->kn_data >= lowwat);
b0d623f7 2244 } else {
6d2010ae
A
2245 /*
2246 * If there's data in the hold buffer, it's the
2247 * amount of data a read will return.
2248 *
2249 * If there's no data in the hold buffer, but
2250 * there's data in the store buffer, if the
2251 * timer has expired a read will immediately
2252 * rotate the store buffer to the hold buffer,
2253 * so the amount of data in the store buffer is
2254 * the amount of data a read will return.
2255 *
2256 * If there's no data in either buffer, or there's
2257 * no data in the hold buffer and the timer hasn't
2258 * expired, we're not ready to read.
2259 */
39236c6e 2260 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read) && d->bd_state == BPF_TIMED_OUT ?
6d2010ae 2261 d->bd_slen : d->bd_hlen);
b0d623f7
A
2262 ready = (kn->kn_data > 0);
2263 }
6d2010ae
A
2264 if (!ready)
2265 bpf_start_timer(d);
b0d623f7 2266
b0d623f7
A
2267 return (ready);
2268}
2269
39037602
A
2270int
2271bpfkqfilter(dev_t dev, struct knote *kn)
2272{
2273 struct bpf_d *d;
2274 int res;
2275
2276 /*
2277 * Is this device a bpf?
2278 */
2279 if (major(dev) != CDEV_MAJOR ||
2280 kn->kn_filter != EVFILT_READ) {
2281 kn->kn_flags = EV_ERROR;
2282 kn->kn_data = EINVAL;
2283 return 0;
2284 }
2285
2286 lck_mtx_lock(bpf_mlock);
2287
2288 d = bpf_dtab[minor(dev)];
2289
2290 if (d == 0 ||
2291 d == (void *)1 ||
2292 d->bd_bif == NULL ||
2293 (d->bd_flags & BPF_CLOSING) != 0) {
2294 lck_mtx_unlock(bpf_mlock);
2295 kn->kn_flags = EV_ERROR;
2296 kn->kn_data = ENXIO;
2297 return 0;
2298 }
2299
2300 kn->kn_hook = d;
2301 kn->kn_filtid = EVFILTID_BPFREAD;
2302 KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2303 d->bd_flags |= BPF_KNOTE;
2304
2305 /* capture the current state */
2306 res = filt_bpfread_common(kn, d);
2307
2308 lck_mtx_unlock(bpf_mlock);
2309
2310 return (res);
2311}
2312
2313static void
2314filt_bpfdetach(struct knote *kn)
2315{
2316 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2317
2318 lck_mtx_lock(bpf_mlock);
2319 if (d->bd_flags & BPF_KNOTE) {
2320 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2321 d->bd_flags &= ~BPF_KNOTE;
2322 }
2323 lck_mtx_unlock(bpf_mlock);
2324}
2325
2326static int
2327filt_bpfread(struct knote *kn, long hint)
2328{
2329#pragma unused(hint)
2330 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2331
2332 return filt_bpfread_common(kn, d);
2333}
2334
2335static int
2336filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev)
2337{
2338 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2339 int res;
2340
2341 lck_mtx_lock(bpf_mlock);
2342
2343 /* save off the lowat threshold and flag */
2344 kn->kn_sdata = kev->data;
2345 kn->kn_sfflags = kev->fflags;
2346 if ((kn->kn_status & KN_UDATA_SPECIFIC) == 0)
2347 kn->kn_udata = kev->udata;
2348
2349 /* output data will be re-generated here */
2350 res = filt_bpfread_common(kn, d);
2351
2352 lck_mtx_unlock(bpf_mlock);
2353
2354 return res;
2355}
2356
2357static int
2358filt_bpfprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev)
2359{
2360#pragma unused(data)
2361 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2362 int res;
2363
2364 lck_mtx_lock(bpf_mlock);
2365 res = filt_bpfread_common(kn, d);
2366 if (res) {
2367 *kev = kn->kn_kevent;
2368 }
2369 lck_mtx_unlock(bpf_mlock);
2370
2371 return res;
2372}
2373
1c79356b
A
2374/*
2375 * Copy data from an mbuf chain into a buffer. This code is derived
2376 * from m_copydata in sys/uipc_mbuf.c.
2377 */
2378static void
91447636 2379bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1c79356b 2380{
316670eb 2381 struct mbuf *m = (struct mbuf *)(uintptr_t)(src_arg);
91447636 2382 u_int count;
1c79356b
A
2383 u_char *dst;
2384
1c79356b
A
2385 dst = dst_arg;
2386 while (len > 0) {
2387 if (m == 0)
2388 panic("bpf_mcopy");
2389 count = min(m->m_len, len);
2d21ac55 2390 bcopy(mbuf_data(m), dst, count);
1c79356b
A
2391 m = m->m_next;
2392 dst += count;
2393 len -= count;
2394 }
2395}
2396
2d21ac55
A
2397static inline void
2398bpf_tap_imp(
2399 ifnet_t ifp,
2400 u_int32_t dlt,
2401 mbuf_t m,
2402 void* hdr,
2403 size_t hlen,
316670eb 2404 int outbound)
1c79356b 2405{
91447636 2406 struct bpf_if *bp;
316670eb 2407 struct mbuf *savedm = m;
1c79356b 2408
2d21ac55
A
2409 /*
2410 * It's possible that we get here after the bpf descriptor has been
2411 * detached from the interface; in such a case we simply return.
2412 * Lock ordering is important since we can be called asynchronously
2413 * (from the IOKit) to process an inbound packet; when that happens
2414 * we would have been holding its "gateLock" and will be acquiring
2415 * "bpf_mlock" upon entering this routine. Due to that, we release
2416 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2417 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2418 * when a ifnet_set_promiscuous request simultaneously collides with
2419 * an inbound packet being passed into the tap callback.
2420 */
91447636 2421 lck_mtx_lock(bpf_mlock);
2d21ac55
A
2422 if (ifp->if_bpf == NULL) {
2423 lck_mtx_unlock(bpf_mlock);
2424 return;
2425 }
91447636 2426 bp = ifp->if_bpf;
2d21ac55
A
2427 for (bp = ifp->if_bpf; bp && bp->bif_ifp == ifp &&
2428 (dlt != 0 && bp->bif_dlt != dlt); bp = bp->bif_next)
2429 ;
2430 if (bp && bp->bif_ifp == ifp && bp->bif_dlist != NULL) {
2431 struct bpf_d *d;
2432 struct m_hdr hack_hdr;
2433 u_int pktlen = 0;
2434 u_int slen = 0;
2435 struct mbuf *m0;
2436
2437 if (hdr) {
2438 /*
2439 * This is gross. We mock up an mbuf that points to the
2440 * header buffer. This means we don't have to copy the
2441 * header. A number of interfaces prepended headers just
2442 * for bpf by allocating an mbuf on the stack. We want to
2443 * give developers an easy way to prepend a header for bpf.
2444 * Since a developer allocating an mbuf on the stack is bad,
2445 * we do even worse here, allocating only a header to point
2446 * to a buffer the developer supplied. This makes assumptions
2447 * that bpf_filter and catchpacket will not look at anything
2448 * in the mbuf other than the header. This was true at the
2449 * time this code was written.
2450 */
2451 hack_hdr.mh_next = m;
2452 hack_hdr.mh_nextpkt = NULL;
2453 hack_hdr.mh_len = hlen;
2454 hack_hdr.mh_data = hdr;
2455 hack_hdr.mh_type = m->m_type;
2456 hack_hdr.mh_flags = 0;
2457
3e170ce0 2458 __IGNORE_WCASTALIGN(m = (mbuf_t)&hack_hdr);
2d21ac55
A
2459 }
2460
2461 for (m0 = m; m0 != 0; m0 = m0->m_next)
2462 pktlen += m0->m_len;
2463
2464 for (d = bp->bif_dlist; d; d = d->bd_next) {
2465 if (outbound && !d->bd_seesent)
91447636
A
2466 continue;
2467 ++d->bd_rcount;
2468 slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
2d21ac55
A
2469 if (slen != 0) {
2470#if CONFIG_MACF_NET
2471 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0)
2472 continue;
2473#endif
316670eb
A
2474 catchpacket(d, (u_char *)m, savedm, pktlen,
2475 slen, outbound, bpf_mcopy);
2d21ac55 2476 }
91447636 2477 }
1c79356b 2478 }
91447636 2479 lck_mtx_unlock(bpf_mlock);
1c79356b
A
2480}
2481
2d21ac55
A
2482void
2483bpf_tap_out(
2484 ifnet_t ifp,
2485 u_int32_t dlt,
2486 mbuf_t m,
2487 void* hdr,
2488 size_t hlen)
2489{
2490 bpf_tap_imp(ifp, dlt, m, hdr, hlen, 1);
2491}
2492
2493void
2494bpf_tap_in(
2495 ifnet_t ifp,
2496 u_int32_t dlt,
2497 mbuf_t m,
2498 void* hdr,
2499 size_t hlen)
2500{
2501 bpf_tap_imp(ifp, dlt, m, hdr, hlen, 0);
2502}
2503
2504/* Callback registered with Ethernet driver. */
2505static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2506{
2507 bpf_tap_imp(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2508
2509 return 0;
2510}
2511
1c79356b
A
2512/*
2513 * Move the packet data from interface memory (pkt) into the
2514 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
2515 * otherwise 0. "copy" is the routine called to do the actual data
2516 * transfer. bcopy is passed in to copy contiguous chunks, while
2517 * bpf_mcopy is passed in to copy mbuf chains. In the latter case,
2518 * pkt is really an mbuf.
2519 */
2520static void
316670eb
A
2521catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen,
2522 u_int snaplen, int outbound,
91447636 2523 void (*cpfn)(const void *, void *, size_t))
1c79356b 2524{
2d21ac55 2525 struct bpf_hdr *hp;
316670eb 2526 struct bpf_hdr_ext *ehp;
2d21ac55 2527 int totlen, curlen;
316670eb 2528 int hdrlen, caplen;
6d2010ae 2529 int do_wakeup = 0;
316670eb 2530 u_char *payload;
39236c6e
A
2531 struct timeval tv;
2532 struct m_tag *mt = NULL;
2533 struct bpf_mtag *bt = NULL;
316670eb 2534
fe8ab488 2535 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
316670eb 2536 d->bd_bif->bif_hdrlen;
1c79356b
A
2537 /*
2538 * Figure out how many bytes to move. If the packet is
2539 * greater or equal to the snapshot length, transfer that
2540 * much. Otherwise, transfer the whole packet (unless
2541 * we hit the buffer size limit).
2542 */
2543 totlen = hdrlen + min(snaplen, pktlen);
2544 if (totlen > d->bd_bufsize)
2545 totlen = d->bd_bufsize;
2546
2547 /*
2548 * Round up the end of the previous packet to the next longword.
2549 */
2550 curlen = BPF_WORDALIGN(d->bd_slen);
2551 if (curlen + totlen > d->bd_bufsize) {
2552 /*
2553 * This packet will overflow the storage buffer.
2554 * Rotate the buffers if we can, then wakeup any
2555 * pending reads.
813fb2f6
A
2556 *
2557 * We cannot rotate buffers if a read is in progress
2558 * so drop the packet
1c79356b 2559 */
813fb2f6
A
2560 if (d->bd_hbuf_read) {
2561 ++d->bd_dcount;
2562 return;
2563 }
2564
6d2010ae 2565 if (d->bd_fbuf == NULL) {
3e170ce0
A
2566 if (d->bd_headdrop == 0) {
2567 /*
2568 * We haven't completed the previous read yet,
2569 * so drop the packet.
2570 */
2571 ++d->bd_dcount;
2572 return;
2573 }
1c79356b 2574 /*
3e170ce0 2575 * Drop the hold buffer as it contains older packets
1c79356b 2576 */
3e170ce0
A
2577 d->bd_dcount += d->bd_hcnt;
2578 d->bd_fbuf = d->bd_hbuf;
2579 ROTATE_BUFFERS(d);
2580 } else {
2581 ROTATE_BUFFERS(d);
1c79356b 2582 }
6d2010ae 2583 do_wakeup = 1;
1c79356b
A
2584 curlen = 0;
2585 }
6d2010ae 2586 else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1c79356b 2587 /*
6d2010ae
A
2588 * Immediate mode is set, or the read timeout has
2589 * already expired during a select call. A packet
2590 * arrived, so the reader should be woken up.
1c79356b 2591 */
6d2010ae 2592 do_wakeup = 1;
1c79356b
A
2593
2594 /*
2595 * Append the bpf header.
2596 */
b0d623f7 2597 microtime(&tv);
fe8ab488 2598 if (d->bd_flags & BPF_EXTENDED_HDR) {
316670eb
A
2599 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
2600 memset(ehp, 0, sizeof(*ehp));
2601 ehp->bh_tstamp.tv_sec = tv.tv_sec;
2602 ehp->bh_tstamp.tv_usec = tv.tv_usec;
2603 ehp->bh_datalen = pktlen;
2604 ehp->bh_hdrlen = hdrlen;
2605 ehp->bh_caplen = totlen - hdrlen;
39236c6e
A
2606 mt = m_tag_locate(m, bpf_mtag_id, 0, NULL);
2607 if (mt && mt->m_tag_len >= sizeof(*bt)) {
2608 bt = (struct bpf_mtag *)(mt + 1);
2609 ehp->bh_pid = bt->bt_pid;
2610 strlcpy(ehp->bh_comm, bt->bt_comm,
2611 sizeof(ehp->bh_comm));
2612 ehp->bh_svc = so_svc2tc(bt->bt_svc);
2613 if (bt->bt_direction == BPF_MTAG_DIR_OUT)
2614 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
2615 else
2616 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2617 m_tag_delete(m, mt);
2618 } else if (outbound) {
2619 /* only do lookups on non-raw INPCB */
2620 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID|
2621 PKTF_FLOW_LOCALSRC|PKTF_FLOW_RAWSOCK)) ==
2622 (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) &&
2623 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
2624 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
2625 ehp->bh_proto = m->m_pkthdr.pkt_proto;
2626 }
2627 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
316670eb 2628 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
39037602
A
2629 if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT)
2630 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
2631 if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ)
2632 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
2633 if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT)
2634 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
2635 if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
2636 ehp->bh_unsent_bytes =
2637 m->m_pkthdr.bufstatus_if;
2638 ehp->bh_unsent_snd =
2639 m->m_pkthdr.bufstatus_sndbuf;
2640 }
39236c6e 2641 } else
316670eb
A
2642 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
2643 payload = (u_char *)ehp + hdrlen;
2644 caplen = ehp->bh_caplen;
2645 } else {
2646 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
2647 hp->bh_tstamp.tv_sec = tv.tv_sec;
2648 hp->bh_tstamp.tv_usec = tv.tv_usec;
2649 hp->bh_datalen = pktlen;
2650 hp->bh_hdrlen = hdrlen;
2651 hp->bh_caplen = totlen - hdrlen;
2652 payload = (u_char *)hp + hdrlen;
2653 caplen = hp->bh_caplen;
2654 }
1c79356b
A
2655 /*
2656 * Copy the packet data into the store buffer and update its length.
2657 */
316670eb 2658 (*cpfn)(pkt, payload, caplen);
1c79356b 2659 d->bd_slen = curlen + totlen;
3e170ce0 2660 d->bd_scnt += 1;
6d2010ae
A
2661
2662 if (do_wakeup)
2663 bpf_wakeup(d);
1c79356b
A
2664}
2665
2666/*
2667 * Initialize all nonzero fields of a descriptor.
2668 */
2669static int
91447636 2670bpf_allocbufs(struct bpf_d *d)
1c79356b 2671{
813fb2f6
A
2672 if (d->bd_sbuf != NULL) {
2673 FREE(d->bd_sbuf, M_DEVBUF);
2674 d->bd_sbuf = NULL;
2675 }
2676 if (d->bd_hbuf != NULL) {
2677 FREE(d->bd_hbuf, M_DEVBUF);
2678 d->bd_hbuf = NULL;
2679 }
2680 if (d->bd_fbuf != NULL) {
2681 FREE(d->bd_fbuf, M_DEVBUF);
2682 d->bd_fbuf = NULL;
2683 }
2684
1c79356b 2685 d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
813fb2f6 2686 if (d->bd_fbuf == NULL)
1c79356b
A
2687 return (ENOBUFS);
2688
2689 d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
813fb2f6 2690 if (d->bd_sbuf == NULL) {
1c79356b 2691 FREE(d->bd_fbuf, M_DEVBUF);
813fb2f6 2692 d->bd_fbuf = NULL;
1c79356b
A
2693 return (ENOBUFS);
2694 }
2695 d->bd_slen = 0;
2696 d->bd_hlen = 0;
3e170ce0
A
2697 d->bd_scnt = 0;
2698 d->bd_hcnt = 0;
1c79356b
A
2699 return (0);
2700}
2701
2702/*
2703 * Free buffers currently in use by a descriptor.
2704 * Called on close.
2705 */
2706static void
91447636 2707bpf_freed(struct bpf_d *d)
1c79356b
A
2708{
2709 /*
2710 * We don't need to lock out interrupts since this descriptor has
2711 * been detached from its interface and it yet hasn't been marked
2712 * free.
2713 */
39236c6e
A
2714 if (d->bd_hbuf_read)
2715 panic("bpf buffer freed during read");
2716
1c79356b
A
2717 if (d->bd_sbuf != 0) {
2718 FREE(d->bd_sbuf, M_DEVBUF);
39236c6e 2719 if (d->bd_hbuf != 0)
1c79356b
A
2720 FREE(d->bd_hbuf, M_DEVBUF);
2721 if (d->bd_fbuf != 0)
2722 FREE(d->bd_fbuf, M_DEVBUF);
2723 }
2724 if (d->bd_filter)
2725 FREE((caddr_t)d->bd_filter, M_DEVBUF);
1c79356b
A
2726}
2727
2728/*
2729 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
2730 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
2731 * size of the link header (variable length headers not yet supported).
2732 */
2733void
91447636 2734bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1c79356b 2735{
2d21ac55
A
2736 bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
2737}
2738
2739errno_t
2740bpf_attach(
2741 ifnet_t ifp,
2742 u_int32_t dlt,
2743 u_int32_t hdrlen,
2744 bpf_send_func send,
2745 bpf_tap_func tap)
2746{
2747 struct bpf_if *bp_new;
2748 struct bpf_if *bp_temp;
2749 struct bpf_if *bp_first = NULL;
2750
3e170ce0
A
2751 bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF,
2752 M_WAIT | M_ZERO);
2d21ac55 2753 if (bp_new == 0)
1c79356b
A
2754 panic("bpfattach");
2755
91447636
A
2756 lck_mtx_lock(bpf_mlock);
2757
2d21ac55
A
2758 /*
2759 * Check if this interface/dlt is already attached, record first
2760 * attachment for this interface.
2761 */
2762 for (bp_temp = bpf_iflist; bp_temp && (bp_temp->bif_ifp != ifp ||
2763 bp_temp->bif_dlt != dlt); bp_temp = bp_temp->bif_next) {
2764 if (bp_temp->bif_ifp == ifp && bp_first == NULL)
2765 bp_first = bp_temp;
2766 }
2767
2768 if (bp_temp != NULL) {
39236c6e
A
2769 printf("bpfattach - %s with dlt %d is already attached\n",
2770 if_name(ifp), dlt);
2d21ac55
A
2771 FREE(bp_new, M_DEVBUF);
2772 lck_mtx_unlock(bpf_mlock);
2773 return EEXIST;
2774 }
2775
2d21ac55
A
2776 bp_new->bif_ifp = ifp;
2777 bp_new->bif_dlt = dlt;
2778 bp_new->bif_send = send;
2779 bp_new->bif_tap = tap;
2780
2781 if (bp_first == NULL) {
2782 /* No other entries for this ifp */
2783 bp_new->bif_next = bpf_iflist;
2784 bpf_iflist = bp_new;
2785 }
2786 else {
2787 /* Add this after the first entry for this interface */
2788 bp_new->bif_next = bp_first->bif_next;
2789 bp_first->bif_next = bp_new;
2790 }
2791
1c79356b
A
2792 /*
2793 * Compute the length of the bpf header. This is not necessarily
2794 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
2795 * that the network layer header begins on a longword boundary (for
2796 * performance reasons and to alleviate alignment restrictions).
2797 */
2d21ac55 2798 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
316670eb
A
2799 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
2800 sizeof(struct bpf_hdr_ext)) - hdrlen;
91447636
A
2801
2802 /* Take a reference on the interface */
2d21ac55 2803 ifnet_reference(ifp);
91447636
A
2804
2805 lck_mtx_unlock(bpf_mlock);
1c79356b 2806
55e303ae 2807#ifndef __APPLE__
1c79356b 2808 if (bootverbose)
39236c6e 2809 printf("bpf: %s attached\n", if_name(ifp));
1c79356b 2810#endif
2d21ac55
A
2811
2812 return 0;
1c79356b
A
2813}
2814
9bccf70c
A
2815/*
2816 * Detach bpf from an interface. This involves detaching each descriptor
2817 * associated with the interface, and leaving bd_bif NULL. Notify each
2818 * descriptor as it's detached so that any sleepers wake up and get
2819 * ENXIO.
2820 */
2821void
91447636 2822bpfdetach(struct ifnet *ifp)
9bccf70c 2823{
2d21ac55 2824 struct bpf_if *bp, *bp_prev, *bp_next;
9bccf70c 2825 struct bpf_d *d;
9bccf70c 2826
3e170ce0
A
2827 if (bpf_debug != 0)
2828 printf("%s: %s\n",
2829 __func__, if_name(ifp));
2830
91447636 2831 lck_mtx_lock(bpf_mlock);
9bccf70c 2832
fe8ab488
A
2833 /*
2834 * Build the list of devices attached to that interface
2835 * that we need to free while keeping the lock to maintain
2836 * the integrity of the interface list
2837 */
9bccf70c 2838 bp_prev = NULL;
2d21ac55
A
2839 for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
2840 bp_next = bp->bif_next;
fe8ab488 2841
2d21ac55
A
2842 if (ifp != bp->bif_ifp) {
2843 bp_prev = bp;
2844 continue;
2845 }
fe8ab488
A
2846 /* Unlink from the interface list */
2847 if (bp_prev)
2848 bp_prev->bif_next = bp->bif_next;
2849 else
2850 bpf_iflist = bp->bif_next;
2851
3e170ce0 2852 /* Detach the devices attached to the interface */
2d21ac55 2853 while ((d = bp->bif_dlist) != NULL) {
3e170ce0
A
2854 /*
2855 * Take an extra reference to prevent the device
2856 * from being freed when bpf_detachd() releases
2857 * the reference for the interface list
2858 */
2859 bpf_acquire_d(d);
2860 bpf_detachd(d, 0);
2d21ac55 2861 bpf_wakeup(d);
3e170ce0 2862 bpf_release_d(d);
2d21ac55 2863 }
2d21ac55 2864 ifnet_release(ifp);
9bccf70c
A
2865 }
2866
91447636 2867 lck_mtx_unlock(bpf_mlock);
9bccf70c
A
2868}
2869
1c79356b 2870void
91447636 2871bpf_init(__unused void *unused)
1c79356b 2872{
9bccf70c 2873#ifdef __APPLE__
1c79356b 2874 int i;
9bccf70c 2875 int maj;
1c79356b 2876
91447636 2877 if (bpf_devsw_installed == 0) {
9bccf70c 2878 bpf_devsw_installed = 1;
39236c6e
A
2879 bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
2880 bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
2881 bpf_mlock_attr = lck_attr_alloc_init();
2882 lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
9bccf70c
A
2883 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
2884 if (maj == -1) {
91447636
A
2885 if (bpf_mlock_attr)
2886 lck_attr_free(bpf_mlock_attr);
2887 if (bpf_mlock_grp)
2888 lck_grp_free(bpf_mlock_grp);
2889 if (bpf_mlock_grp_attr)
2890 lck_grp_attr_free(bpf_mlock_grp_attr);
2891
2d21ac55
A
2892 bpf_mlock = NULL;
2893 bpf_mlock_attr = NULL;
2894 bpf_mlock_grp = NULL;
2895 bpf_mlock_grp_attr = NULL;
91447636 2896 bpf_devsw_installed = 0;
9bccf70c 2897 printf("bpf_init: failed to allocate a major number!\n");
55e303ae 2898 return;
9bccf70c 2899 }
91447636 2900
55e303ae
A
2901 for (i = 0 ; i < NBPFILTER; i++)
2902 bpf_make_dev_t(maj);
39236c6e
A
2903
2904 VERIFY(mbuf_tag_id_find(BPF_CONTROL_NAME, &bpf_mtag_id) == 0);
9bccf70c
A
2905 }
2906#else
2907 cdevsw_add(&bpf_cdevsw);
2908#endif
1c79356b
A
2909}
2910
9bccf70c 2911#ifndef __APPLE__
1c79356b 2912SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
1c79356b 2913#endif
9bccf70c 2914
2d21ac55
A
2915#if CONFIG_MACF_NET
2916struct label *
2917mac_bpfdesc_label_get(struct bpf_d *d)
9bccf70c 2918{
9bccf70c 2919
2d21ac55 2920 return (d->bd_label);
9bccf70c
A
2921}
2922
2923void
2d21ac55 2924mac_bpfdesc_label_set(struct bpf_d *d, struct label *label)
9bccf70c 2925{
9bccf70c 2926
2d21ac55 2927 d->bd_label = label;
9bccf70c 2928}
2d21ac55 2929#endif