]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/bpf.c
xnu-6153.121.1.tar.gz
[apple/xnu.git] / bsd / net / bpf.c
CommitLineData
1c79356b 1/*
cb323159 2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
d9a64523 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
d9a64523 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
d9a64523 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
d9a64523 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1990, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
d9a64523 65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
1c79356b 66 *
9bccf70c 67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
1c79356b 68 */
2d21ac55
A
69/*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
1c79356b 75
9bccf70c 76#include "bpf.h"
1c79356b
A
77
78#ifndef __GNUC__
0a7de745 79#define inline
1c79356b 80#else
0a7de745 81#define inline __inline
1c79356b
A
82#endif
83
84#include <sys/param.h>
85#include <sys/systm.h>
86#include <sys/conf.h>
87#include <sys/malloc.h>
88#include <sys/mbuf.h>
89#include <sys/time.h>
90#include <sys/proc.h>
1c79356b
A
91#include <sys/signalvar.h>
92#include <sys/filio.h>
93#include <sys/sockio.h>
94#include <sys/ttycom.h>
95#include <sys/filedesc.h>
91447636 96#include <sys/uio_internal.h>
b0d623f7
A
97#include <sys/file_internal.h>
98#include <sys/event.h>
1c79356b 99
9bccf70c
A
100#include <sys/poll.h>
101
1c79356b 102#include <sys/socket.h>
316670eb 103#include <sys/socketvar.h>
1c79356b
A
104#include <sys/vnode.h>
105
106#include <net/if.h>
107#include <net/bpf.h>
108#include <net/bpfdesc.h>
109
110#include <netinet/in.h>
d9a64523
A
111#include <netinet/ip.h>
112#include <netinet/ip6.h>
316670eb
A
113#include <netinet/in_pcb.h>
114#include <netinet/in_var.h>
115#include <netinet/ip_var.h>
116#include <netinet/tcp.h>
117#include <netinet/tcp_var.h>
118#include <netinet/udp.h>
119#include <netinet/udp_var.h>
1c79356b 120#include <netinet/if_ether.h>
d9a64523
A
121#include <netinet/isakmp.h>
122#include <netinet6/esp.h>
1c79356b
A
123#include <sys/kernel.h>
124#include <sys/sysctl.h>
55e303ae 125#include <net/firewire.h>
1c79356b 126
1c79356b
A
127#include <miscfs/devfs/devfs.h>
128#include <net/dlil.h>
fe8ab488 129#include <net/pktap.h>
1c79356b 130
91447636 131#include <kern/locks.h>
6d2010ae 132#include <kern/thread_call.h>
5ba3f43e 133#include <libkern/section_keywords.h>
91447636 134
2d21ac55
A
135#if CONFIG_MACF_NET
136#include <security/mac_framework.h>
137#endif /* MAC_NET */
91447636 138
d9a64523
A
139#include <os/log.h>
140
2d21ac55 141extern int tvtohz(struct timeval *);
9bccf70c 142
0a7de745
A
143#define BPF_BUFSIZE 4096
144#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
1c79356b 145
0a7de745 146#define PRINET 26 /* interruptible */
55e303ae 147
d9a64523
A
148#define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
149#define ESP_HDR_SIZE sizeof(struct newesp)
1c79356b 150
5ba3f43e
A
151typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
152
1c79356b
A
153/*
154 * The default read buffer size is patchable.
155 */
91447636 156static unsigned int bpf_bufsize = BPF_BUFSIZE;
6d2010ae 157SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 158 &bpf_bufsize, 0, "");
cb323159
A
159
160static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
161extern const int copysize_limit_panic;
162#define BPF_MAXSIZE_CAP (copysize_limit_panic >> 1)
6d2010ae 163__private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
cb323159
A
164SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
165 &bpf_maxbufsize, 0,
166 sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
167
91447636 168static unsigned int bpf_maxdevices = 256;
6d2010ae 169SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 170 &bpf_maxdevices, 0, "");
fe8ab488
A
171/*
172 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
173 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
174 * explicitly to be able to use DLT_PKTAP.
175 */
5ba3f43e
A
176#if CONFIG_EMBEDDED
177static unsigned int bpf_wantpktap = 1;
178#else
fe8ab488 179static unsigned int bpf_wantpktap = 0;
5ba3f43e 180#endif
fe8ab488 181SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 182 &bpf_wantpktap, 0, "");
1c79356b 183
3e170ce0
A
184static int bpf_debug = 0;
185SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 186 &bpf_debug, 0, "");
3e170ce0 187
1c79356b
A
188/*
189 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
55e303ae 190 * bpf_dtab holds pointer to the descriptors, indexed by minor device #
1c79356b 191 */
0a7de745 192static struct bpf_if *bpf_iflist;
9bccf70c
A
193#ifdef __APPLE__
194/*
195 * BSD now stores the bpf_d in the dev_t which is a struct
196 * on their system. Our dev_t is an int, so we still store
197 * the bpf_d in a separate table indexed by minor device #.
91447636
A
198 *
199 * The value stored in bpf_dtab[n] represent three states:
d9a64523
A
200 * NULL: device not opened
201 * BPF_DEV_RESERVED: device opening or closing
91447636 202 * other: device <n> opened with pointer to storage
9bccf70c 203 */
0a7de745
A
204#define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
205static struct bpf_d **bpf_dtab = NULL;
91447636 206static unsigned int bpf_dtab_size = 0;
0a7de745 207static unsigned int nbpfilter = 0;
91447636 208
316670eb 209decl_lck_mtx_data(static, bpf_mlock_data);
0a7de745
A
210static lck_mtx_t *bpf_mlock = &bpf_mlock_data;
211static lck_grp_t *bpf_mlock_grp;
212static lck_grp_attr_t *bpf_mlock_grp_attr;
213static lck_attr_t *bpf_mlock_attr;
55e303ae 214
55e303ae 215#endif /* __APPLE__ */
1c79356b 216
0a7de745
A
217static int bpf_allocbufs(struct bpf_d *);
218static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
219static int bpf_detachd(struct bpf_d *d, int);
220static void bpf_freed(struct bpf_d *);
221static int bpf_movein(struct uio *, int,
222 struct mbuf **, struct sockaddr *, int *);
223static int bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool);
224static void bpf_timed_out(void *, void *);
225static void bpf_wakeup(struct bpf_d *);
226static u_int get_pkt_trunc_len(u_char *, u_int);
227static void catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
228static void reset_d(struct bpf_d *);
229static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
230static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
231static int bpf_setdlt(struct bpf_d *, u_int);
232static int bpf_set_traffic_class(struct bpf_d *, int);
233static void bpf_set_packet_service_class(struct mbuf *, int);
234
235static void bpf_acquire_d(struct bpf_d *);
236static void bpf_release_d(struct bpf_d *);
237
238static int bpf_devsw_installed;
55e303ae 239
91447636 240void bpf_init(void *unused);
2d21ac55 241static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
55e303ae 242
9bccf70c
A
243/*
244 * Darwin differs from BSD here, the following are static
245 * on BSD and not static on Darwin.
246 */
0a7de745
A
247d_open_t bpfopen;
248d_close_t bpfclose;
249d_read_t bpfread;
250d_write_t bpfwrite;
251ioctl_fcn_t bpfioctl;
252select_fcn_t bpfselect;
1c79356b 253
9bccf70c 254/* Darwin's cdevsw struct differs slightly from BSDs */
0a7de745 255#define CDEV_MAJOR 23
1c79356b 256static struct cdevsw bpf_cdevsw = {
cb323159
A
257 .d_open = bpfopen,
258 .d_close = bpfclose,
259 .d_read = bpfread,
260 .d_write = bpfwrite,
261 .d_ioctl = bpfioctl,
262 .d_stop = eno_stop,
263 .d_reset = eno_reset,
264 .d_ttys = NULL,
265 .d_select = bpfselect,
266 .d_mmap = eno_mmap,
267 .d_strategy = eno_strat,
268 .d_reserved_1 = eno_getc,
269 .d_reserved_2 = eno_putc,
270 .d_type = 0
1c79356b
A
271};
272
0a7de745 273#define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
9bccf70c 274
1c79356b 275static int
d9a64523
A
276bpf_movein(struct uio *uio, int linktype, struct mbuf **mp,
277 struct sockaddr *sockp, int *datlen)
1c79356b
A
278{
279 struct mbuf *m;
280 int error;
281 int len;
2d21ac55 282 uint8_t sa_family;
1c79356b
A
283 int hlen;
284
2d21ac55 285 switch (linktype) {
2d21ac55
A
286#if SLIP
287 case DLT_SLIP:
288 sa_family = AF_INET;
289 hlen = 0;
290 break;
291#endif /* SLIP */
d9a64523 292
2d21ac55
A
293 case DLT_EN10MB:
294 sa_family = AF_UNSPEC;
295 /* XXX Would MAXLINKHDR be better? */
296 hlen = sizeof(struct ether_header);
297 break;
d9a64523 298
2d21ac55
A
299#if FDDI
300 case DLT_FDDI:
d9a64523 301#if defined(__FreeBSD__) || defined(__bsdi__)
2d21ac55
A
302 sa_family = AF_IMPLINK;
303 hlen = 0;
d9a64523 304#else
2d21ac55
A
305 sa_family = AF_UNSPEC;
306 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
307 hlen = 24;
d9a64523 308#endif
2d21ac55
A
309 break;
310#endif /* FDDI */
d9a64523 311
2d21ac55
A
312 case DLT_RAW:
313 case DLT_NULL:
314 sa_family = AF_UNSPEC;
315 hlen = 0;
316 break;
d9a64523
A
317
318#ifdef __FreeBSD__
2d21ac55
A
319 case DLT_ATM_RFC1483:
320 /*
321 * en atm driver requires 4-byte atm pseudo header.
322 * though it isn't standard, vpi:vci needs to be
323 * specified anyway.
324 */
325 sa_family = AF_UNSPEC;
0a7de745 326 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
2d21ac55 327 break;
d9a64523 328#endif
2d21ac55
A
329
330 case DLT_PPP:
331 sa_family = AF_UNSPEC;
0a7de745 332 hlen = 4; /* This should match PPP_HDRLEN */
2d21ac55 333 break;
d9a64523 334
2d21ac55
A
335 case DLT_APPLE_IP_OVER_IEEE1394:
336 sa_family = AF_UNSPEC;
337 hlen = sizeof(struct firewire_header);
338 break;
b0d623f7 339
0a7de745 340 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
b0d623f7
A
341 sa_family = AF_IEEE80211;
342 hlen = 0;
343 break;
316670eb 344
6d2010ae
A
345 case DLT_IEEE802_11_RADIO:
346 sa_family = AF_IEEE80211;
347 hlen = 0;
348 break;
b0d623f7 349
2d21ac55 350 default:
0a7de745 351 return EIO;
55e303ae 352 }
2d21ac55 353
91447636
A
354 // LP64todo - fix this!
355 len = uio_resid(uio);
1c79356b 356 *datlen = len - hlen;
0a7de745
A
357 if ((unsigned)len > MCLBYTES) {
358 return EIO;
359 }
1c79356b 360
2d21ac55
A
361 if (sockp) {
362 /*
363 * Build a sockaddr based on the data link layer type.
364 * We do this at this level because the ethernet header
365 * is copied directly into the data field of the sockaddr.
366 * In the case of SLIP, there is no header and the packet
367 * is forwarded as is.
368 * Also, we are careful to leave room at the front of the mbuf
369 * for the link level header.
370 */
371 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
0a7de745 372 return EIO;
2d21ac55
A
373 }
374 sockp->sa_family = sa_family;
375 } else {
376 /*
377 * We're directly sending the packet data supplied by
378 * the user; we don't need to make room for the link
379 * header, and don't need the header length value any
380 * more, so set it to 0.
381 */
382 hlen = 0;
383 }
d9a64523 384
1c79356b 385 MGETHDR(m, M_WAIT, MT_DATA);
0a7de745
A
386 if (m == 0) {
387 return ENOBUFS;
388 }
91447636 389 if ((unsigned)len > MHLEN) {
1c79356b
A
390 MCLGET(m, M_WAIT);
391 if ((m->m_flags & M_EXT) == 0) {
1c79356b
A
392 error = ENOBUFS;
393 goto bad;
394 }
395 }
396 m->m_pkthdr.len = m->m_len = len;
397 m->m_pkthdr.rcvif = NULL;
398 *mp = m;
d9a64523 399
1c79356b
A
400 /*
401 * Make room for link header.
402 */
403 if (hlen != 0) {
404 m->m_pkthdr.len -= hlen;
405 m->m_len -= hlen;
1c79356b 406 m->m_data += hlen; /* XXX */
1c79356b 407 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
0a7de745 408 if (error) {
1c79356b 409 goto bad;
0a7de745 410 }
1c79356b
A
411 }
412 error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
0a7de745 413 if (error) {
6d2010ae 414 goto bad;
0a7de745 415 }
d9a64523 416
6d2010ae
A
417 /* Check for multicast destination */
418 switch (linktype) {
0a7de745
A
419 case DLT_EN10MB: {
420 struct ether_header *eh;
421
422 eh = mtod(m, struct ether_header *);
423 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
424 if (_ether_cmp(etherbroadcastaddr,
425 eh->ether_dhost) == 0) {
426 m->m_flags |= M_BCAST;
427 } else {
428 m->m_flags |= M_MCAST;
6d2010ae 429 }
6d2010ae 430 }
0a7de745
A
431 break;
432 }
6d2010ae 433 }
d9a64523 434
0a7de745 435 return 0;
d9a64523 436bad:
1c79356b 437 m_freem(m);
0a7de745 438 return error;
1c79356b
A
439}
440
9bccf70c 441#ifdef __APPLE__
55e303ae
A
442
443/*
39236c6e
A
444 * The dynamic addition of a new device node must block all processes that
445 * are opening the last device so that no process will get an unexpected
d9a64523 446 * ENOENT
55e303ae 447 */
91447636
A
448static void
449bpf_make_dev_t(int maj)
55e303ae 450{
0a7de745
A
451 static int bpf_growing = 0;
452 unsigned int cur_size = nbpfilter, i;
55e303ae 453
0a7de745 454 if (nbpfilter >= bpf_maxdevices) {
91447636 455 return;
0a7de745 456 }
55e303ae 457
91447636
A
458 while (bpf_growing) {
459 /* Wait until new device has been created */
d9a64523 460 (void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
91447636
A
461 }
462 if (nbpfilter > cur_size) {
463 /* other thread grew it already */
464 return;
465 }
466 bpf_growing = 1;
d9a64523 467
91447636
A
468 /* need to grow bpf_dtab first */
469 if (nbpfilter == bpf_dtab_size) {
470 int new_dtab_size;
471 struct bpf_d **new_dtab = NULL;
472 struct bpf_d **old_dtab = NULL;
d9a64523
A
473
474 new_dtab_size = bpf_dtab_size + NBPFILTER;
475 new_dtab = (struct bpf_d **)_MALLOC(
0a7de745 476 sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
91447636
A
477 if (new_dtab == 0) {
478 printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
479 goto done;
480 }
481 if (bpf_dtab) {
d9a64523
A
482 bcopy(bpf_dtab, new_dtab,
483 sizeof(struct bpf_d *) * bpf_dtab_size);
91447636 484 }
d9a64523
A
485 bzero(new_dtab + bpf_dtab_size,
486 sizeof(struct bpf_d *) * NBPFILTER);
91447636
A
487 old_dtab = bpf_dtab;
488 bpf_dtab = new_dtab;
489 bpf_dtab_size = new_dtab_size;
0a7de745 490 if (old_dtab != NULL) {
91447636 491 _FREE(old_dtab, M_DEVBUF);
0a7de745 492 }
55e303ae 493 }
91447636
A
494 i = nbpfilter++;
495 (void) devfs_make_node(makedev(maj, i),
0a7de745
A
496 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
497 "bpf%d", i);
91447636
A
498done:
499 bpf_growing = 0;
500 wakeup((caddr_t)&bpf_growing);
55e303ae
A
501}
502
9bccf70c 503#endif
1c79356b
A
504
505/*
506 * Attach file to the bpf interface, i.e. make d listen on bp.
1c79356b 507 */
2d21ac55 508static errno_t
91447636 509bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
1c79356b 510{
2d21ac55 511 int first = bp->bif_dlist == NULL;
0a7de745 512 int error = 0;
d9a64523 513
1c79356b
A
514 /*
515 * Point d at bp, and add d to the interface's list of listeners.
516 * Finally, point the driver's bpf cookie at the interface so
517 * it will divert packets to bpf.
518 */
519 d->bd_bif = bp;
520 d->bd_next = bp->bif_dlist;
521 bp->bif_dlist = d;
3e170ce0
A
522
523 /*
524 * Take a reference on the device even if an error is returned
525 * because we keep the device in the interface's list of listeners
526 */
527 bpf_acquire_d(d);
528
2d21ac55
A
529 if (first) {
530 /* Find the default bpf entry for this ifp */
531 if (bp->bif_ifp->if_bpf == NULL) {
0a7de745 532 struct bpf_if *tmp, *primary = NULL;
d9a64523 533
fe8ab488 534 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
5ba3f43e
A
535 if (tmp->bif_ifp == bp->bif_ifp) {
536 primary = tmp;
537 break;
538 }
fe8ab488 539 }
2d21ac55
A
540 bp->bif_ifp->if_bpf = primary;
541 }
2d21ac55 542 /* Only call dlil_set_bpf_tap for primary dlt */
0a7de745 543 if (bp->bif_ifp->if_bpf == bp) {
d9a64523
A
544 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
545 bpf_tap_callback);
0a7de745 546 }
5ba3f43e 547
0a7de745 548 if (bp->bif_tap != NULL) {
d9a64523
A
549 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
550 BPF_TAP_INPUT_OUTPUT);
0a7de745 551 }
2d21ac55 552 }
1c79356b 553
3e170ce0
A
554 /*
555 * Reset the detach flags in case we previously detached an interface
556 */
557 d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
558
5ba3f43e 559 if (bp->bif_dlt == DLT_PKTAP) {
fe8ab488 560 d->bd_flags |= BPF_FINALIZE_PKTAP;
5ba3f43e 561 } else {
fe8ab488 562 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
5ba3f43e 563 }
0a7de745 564 return error;
1c79356b
A
565}
566
567/*
568 * Detach a file from its interface.
3e170ce0
A
569 *
570 * Return 1 if was closed by some thread, 0 otherwise
1c79356b 571 */
3e170ce0
A
572static int
573bpf_detachd(struct bpf_d *d, int closing)
1c79356b
A
574{
575 struct bpf_d **p;
576 struct bpf_if *bp;
577 struct ifnet *ifp;
578
a39ff7e2 579 int bpf_closed = d->bd_flags & BPF_CLOSING;
3e170ce0
A
580 /*
581 * Some other thread already detached
582 */
0a7de745 583 if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
3e170ce0 584 goto done;
0a7de745 585 }
3e170ce0
A
586 /*
587 * This thread is doing the detach
588 */
589 d->bd_flags |= BPF_DETACHING;
590
1c79356b 591 ifp = d->bd_bif->bif_ifp;
1c79356b 592 bp = d->bd_bif;
3e170ce0 593
0a7de745 594 if (bpf_debug != 0) {
3e170ce0
A
595 printf("%s: %llx %s%s\n",
596 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
597 if_name(ifp), closing ? " closing" : "");
0a7de745 598 }
3e170ce0 599
2d21ac55
A
600 /* Remove d from the interface's descriptor list. */
601 p = &bp->bif_dlist;
602 while (*p != d) {
603 p = &(*p)->bd_next;
0a7de745 604 if (*p == 0) {
2d21ac55 605 panic("bpf_detachd: descriptor not in list");
0a7de745 606 }
2d21ac55
A
607 }
608 *p = (*p)->bd_next;
609 if (bp->bif_dlist == 0) {
610 /*
611 * Let the driver know that there are no more listeners.
612 */
613 /* Only call dlil_set_bpf_tap for primary dlt */
0a7de745 614 if (bp->bif_ifp->if_bpf == bp) {
2d21ac55 615 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
0a7de745
A
616 }
617 if (bp->bif_tap) {
2d21ac55 618 bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
0a7de745 619 }
d9a64523 620
0a7de745
A
621 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
622 if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
2d21ac55 623 break;
0a7de745
A
624 }
625 }
626 if (bp == NULL) {
2d21ac55 627 ifp->if_bpf = NULL;
0a7de745 628 }
2d21ac55
A
629 }
630 d->bd_bif = NULL;
1c79356b
A
631 /*
632 * Check if this descriptor had requested promiscuous mode.
633 * If so, turn it off.
634 */
635 if (d->bd_promisc) {
636 d->bd_promisc = 0;
2d21ac55
A
637 lck_mtx_unlock(bpf_mlock);
638 if (ifnet_set_promiscuous(ifp, 0)) {
1c79356b
A
639 /*
640 * Something is really wrong if we were able to put
641 * the driver into promiscuous mode, but can't
642 * take it out.
9bccf70c 643 * Most likely the network interface is gone.
1c79356b 644 */
3e170ce0 645 printf("%s: ifnet_set_promiscuous failed\n", __func__);
2d21ac55
A
646 }
647 lck_mtx_lock(bpf_mlock);
1c79356b 648 }
3e170ce0
A
649
650 /*
651 * Wake up other thread that are waiting for this thread to finish
652 * detaching
653 */
654 d->bd_flags &= ~BPF_DETACHING;
655 d->bd_flags |= BPF_DETACHED;
a39ff7e2
A
656
657 /* Refresh the local variable as d could have been modified */
658 bpf_closed = d->bd_flags & BPF_CLOSING;
3e170ce0
A
659 /*
660 * Note that We've kept the reference because we may have dropped
661 * the lock when turning off promiscuous mode
662 */
663 bpf_release_d(d);
664
665done:
666 /*
667 * When closing makes sure no other thread refer to the bpf_d
668 */
0a7de745 669 if (bpf_debug != 0) {
3e170ce0
A
670 printf("%s: %llx done\n",
671 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
0a7de745 672 }
3e170ce0
A
673 /*
674 * Let the caller know the bpf_d is closed
675 */
0a7de745
A
676 if (bpf_closed) {
677 return 1;
678 } else {
679 return 0;
680 }
1c79356b
A
681}
682
6d2010ae
A
683/*
684 * Start asynchronous timer, if necessary.
685 * Must be called with bpf_mlock held.
686 */
687static void
688bpf_start_timer(struct bpf_d *d)
689{
690 uint64_t deadline;
691 struct timeval tv;
692
693 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
694 tv.tv_sec = d->bd_rtout / hz;
695 tv.tv_usec = (d->bd_rtout % hz) * tick;
696
39236c6e 697 clock_interval_to_deadline(
0a7de745
A
698 (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
699 NSEC_PER_USEC, &deadline);
6d2010ae 700 /*
d9a64523 701 * The state is BPF_IDLE, so the timer hasn't
6d2010ae
A
702 * been started yet, and hasn't gone off yet;
703 * there is no thread call scheduled, so this
704 * won't change the schedule.
705 *
706 * XXX - what if, by the time it gets entered,
707 * the deadline has already passed?
708 */
709 thread_call_enter_delayed(d->bd_thread_call, deadline);
710 d->bd_state = BPF_WAITING;
711 }
712}
713
714/*
715 * Cancel asynchronous timer.
716 * Must be called with bpf_mlock held.
717 */
718static boolean_t
719bpf_stop_timer(struct bpf_d *d)
720{
721 /*
722 * If the timer has already gone off, this does nothing.
723 * Our caller is expected to set d->bd_state to BPF_IDLE,
724 * with the bpf_mlock, after we are called. bpf_timed_out()
d9a64523 725 * also grabs bpf_mlock, so, if the timer has gone off and
6d2010ae 726 * bpf_timed_out() hasn't finished, it's waiting for the
d9a64523
A
727 * lock; when this thread releases the lock, it will
728 * find the state is BPF_IDLE, and just release the
6d2010ae
A
729 * lock and return.
730 */
0a7de745 731 return thread_call_cancel(d->bd_thread_call);
6d2010ae
A
732}
733
3e170ce0
A
734void
735bpf_acquire_d(struct bpf_d *d)
736{
737 void *lr_saved = __builtin_return_address(0);
738
5ba3f43e 739 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
3e170ce0
A
740
741 d->bd_refcnt += 1;
742
743 d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
744 d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
745}
746
747void
748bpf_release_d(struct bpf_d *d)
749{
750 void *lr_saved = __builtin_return_address(0);
751
5ba3f43e 752 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
3e170ce0 753
0a7de745 754 if (d->bd_refcnt <= 0) {
3e170ce0 755 panic("%s: %p refcnt <= 0", __func__, d);
0a7de745 756 }
3e170ce0
A
757
758 d->bd_refcnt -= 1;
6d2010ae 759
3e170ce0
A
760 d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
761 d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
762
763 if (d->bd_refcnt == 0) {
764 /* Assert the device is detached */
0a7de745 765 if ((d->bd_flags & BPF_DETACHED) == 0) {
3e170ce0 766 panic("%s: %p BPF_DETACHED not set", __func__, d);
0a7de745 767 }
3e170ce0
A
768
769 _FREE(d, M_DEVBUF);
770 }
771}
6d2010ae 772
1c79356b
A
773/*
774 * Open ethernet device. Returns ENXIO for illegal minor device number,
775 * EBUSY if file is open by another process.
776 */
777/* ARGSUSED */
2d21ac55 778int
b0d623f7 779bpfopen(dev_t dev, int flags, __unused int fmt,
0a7de745 780 struct proc *p)
1c79356b 781{
2d21ac55 782 struct bpf_d *d;
1c79356b 783
2d21ac55
A
784 lck_mtx_lock(bpf_mlock);
785 if ((unsigned int) minor(dev) >= nbpfilter) {
786 lck_mtx_unlock(bpf_mlock);
0a7de745 787 return ENXIO;
2d21ac55 788 }
d9a64523
A
789 /*
790 * New device nodes are created on demand when opening the last one.
791 * The programming model is for processes to loop on the minor starting
792 * at 0 as long as EBUSY is returned. The loop stops when either the
793 * open succeeds or an error other that EBUSY is returned. That means
794 * that bpf_make_dev_t() must block all processes that are opening the
795 * last node. If not all processes are blocked, they could unexpectedly
796 * get ENOENT and abort their opening loop.
91447636 797 */
0a7de745 798 if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
91447636 799 bpf_make_dev_t(major(dev));
0a7de745 800 }
9bccf70c 801
1c79356b 802 /*
d9a64523 803 * Each minor can be opened by only one process. If the requested
1c79356b 804 * minor is in use, return EBUSY.
91447636 805 *
d9a64523
A
806 * Important: bpfopen() and bpfclose() have to check and set the status
807 * of a device in the same lockin context otherwise the device may be
808 * leaked because the vnode use count will be unpextectly greater than 1
809 * when close() is called.
1c79356b 810 */
d9a64523
A
811 if (bpf_dtab[minor(dev)] == NULL) {
812 /* Reserve while opening */
813 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
2d21ac55
A
814 } else {
815 lck_mtx_unlock(bpf_mlock);
0a7de745 816 return EBUSY;
2d21ac55 817 }
3e170ce0
A
818 d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
819 M_WAIT | M_ZERO);
91447636
A
820 if (d == NULL) {
821 /* this really is a catastrophic failure */
822 printf("bpfopen: malloc bpf_d failed\n");
2d21ac55
A
823 bpf_dtab[minor(dev)] = NULL;
824 lck_mtx_unlock(bpf_mlock);
0a7de745 825 return ENOMEM;
1c79356b 826 }
3e170ce0 827
91447636 828 /* Mark "in use" and do most initialization. */
3e170ce0 829 bpf_acquire_d(d);
1c79356b
A
830 d->bd_bufsize = bpf_bufsize;
831 d->bd_sig = SIGIO;
9bccf70c 832 d->bd_seesent = 1;
b0d623f7 833 d->bd_oflags = flags;
6d2010ae 834 d->bd_state = BPF_IDLE;
316670eb 835 d->bd_traffic_class = SO_TC_BE;
3e170ce0 836 d->bd_flags |= BPF_DETACHED;
0a7de745 837 if (bpf_wantpktap) {
fe8ab488 838 d->bd_flags |= BPF_WANT_PKTAP;
0a7de745 839 } else {
fe8ab488 840 d->bd_flags &= ~BPF_WANT_PKTAP;
0a7de745 841 }
3e170ce0 842 d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
6d2010ae
A
843 if (d->bd_thread_call == NULL) {
844 printf("bpfopen: malloc thread call failed\n");
845 bpf_dtab[minor(dev)] = NULL;
3e170ce0 846 bpf_release_d(d);
6d2010ae 847 lck_mtx_unlock(bpf_mlock);
3e170ce0 848
0a7de745 849 return ENOMEM;
6d2010ae 850 }
d9a64523
A
851 d->bd_opened_by = p;
852 uuid_generate(d->bd_uuid);
853
2d21ac55
A
854#if CONFIG_MACF_NET
855 mac_bpfdesc_label_init(d);
856 mac_bpfdesc_label_associate(kauth_cred_get(), d);
857#endif
d9a64523 858 bpf_dtab[minor(dev)] = d; /* Mark opened */
2d21ac55 859 lck_mtx_unlock(bpf_mlock);
55e303ae 860
0a7de745 861 return 0;
1c79356b
A
862}
863
864/*
865 * Close the descriptor by detaching it from its interface,
866 * deallocating its buffers, and marking it free.
867 */
868/* ARGSUSED */
2d21ac55
A
869int
870bpfclose(dev_t dev, __unused int flags, __unused int fmt,
d9a64523 871 __unused struct proc *p)
1c79356b 872{
2d21ac55
A
873 struct bpf_d *d;
874
875 /* Take BPF lock to ensure no other thread is using the device */
876 lck_mtx_lock(bpf_mlock);
1c79356b 877
55e303ae 878 d = bpf_dtab[minor(dev)];
d9a64523 879 if (d == NULL || d == BPF_DEV_RESERVED) {
2d21ac55 880 lck_mtx_unlock(bpf_mlock);
0a7de745 881 return ENXIO;
3e170ce0
A
882 }
883
884 /*
885 * Other threads may call bpd_detachd() if we drop the bpf_mlock
886 */
887 d->bd_flags |= BPF_CLOSING;
888
0a7de745 889 if (bpf_debug != 0) {
3e170ce0
A
890 printf("%s: %llx\n",
891 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
0a7de745 892 }
3e170ce0 893
d9a64523 894 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
55e303ae 895
6d2010ae
A
896 /*
897 * Deal with any in-progress timeouts.
898 */
899 switch (d->bd_state) {
0a7de745
A
900 case BPF_IDLE:
901 /*
902 * Not waiting for a timeout, and no timeout happened.
903 */
904 break;
6d2010ae 905
0a7de745
A
906 case BPF_WAITING:
907 /*
908 * Waiting for a timeout.
909 * Cancel any timer that has yet to go off,
910 * and mark the state as "closing".
911 * Then drop the lock to allow any timers that
912 * *have* gone off to run to completion, and wait
913 * for them to finish.
914 */
915 if (!bpf_stop_timer(d)) {
6d2010ae 916 /*
0a7de745
A
917 * There was no pending call, so the call must
918 * have been in progress. Wait for the call to
919 * complete; we have to drop the lock while
920 * waiting. to let the in-progrss call complete
6d2010ae 921 */
0a7de745
A
922 d->bd_state = BPF_DRAINING;
923 while (d->bd_state == BPF_DRAINING) {
924 msleep((caddr_t)d, bpf_mlock, PRINET,
925 "bpfdraining", NULL);
6d2010ae 926 }
0a7de745
A
927 }
928 d->bd_state = BPF_IDLE;
929 break;
6d2010ae 930
0a7de745
A
931 case BPF_TIMED_OUT:
932 /*
933 * Timer went off, and the timeout routine finished.
934 */
935 d->bd_state = BPF_IDLE;
936 break;
6d2010ae 937
0a7de745
A
938 case BPF_DRAINING:
939 /*
940 * Another thread is blocked on a close waiting for
941 * a timeout to finish.
942 * This "shouldn't happen", as the first thread to enter
943 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
944 * all subsequent threads should see that and fail with
945 * ENXIO.
946 */
947 panic("Two threads blocked in a BPF close");
948 break;
6d2010ae
A
949 }
950
0a7de745 951 if (d->bd_bif) {
3e170ce0 952 bpf_detachd(d, 1);
0a7de745 953 }
0b4e3aa0 954 selthreadclear(&d->bd_sel);
2d21ac55
A
955#if CONFIG_MACF_NET
956 mac_bpfdesc_label_destroy(d);
957#endif
6d2010ae 958 thread_call_free(d->bd_thread_call);
39236c6e 959
0a7de745 960 while (d->bd_hbuf_read != 0) {
39236c6e 961 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745 962 }
39236c6e 963
1c79356b 964 bpf_freed(d);
91447636 965
2d21ac55 966 /* Mark free in same context as bpfopen comes to check */
0a7de745 967 bpf_dtab[minor(dev)] = NULL; /* Mark closed */
3e170ce0
A
968
969 bpf_release_d(d);
970
91447636 971 lck_mtx_unlock(bpf_mlock);
3e170ce0 972
0a7de745 973 return 0;
1c79356b
A
974}
975
0a7de745 976#define BPF_SLEEP bpf_sleep
1c79356b 977
91447636
A
978static int
979bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
1c79356b 980{
6d2010ae 981 u_int64_t abstime = 0;
1c79356b 982
0a7de745 983 if (timo != 0) {
6d2010ae 984 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
0a7de745 985 }
d9a64523 986
0a7de745 987 return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
d9a64523
A
988}
989
990static void
991bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
992{
993 if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
994 struct pktap_v2_hdr *pktap_v2_hdr;
995
996 pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
997
0a7de745 998 if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
d9a64523 999 pktap_v2_finalize_proc_info(pktap_v2_hdr);
0a7de745 1000 }
d9a64523 1001 } else {
0a7de745 1002 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
d9a64523 1003 pktap_finalize_proc_info(pktaphdr);
0a7de745 1004 }
d9a64523
A
1005
1006 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1007 hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
1008 hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
1009 }
1010 }
1c79356b 1011}
1c79356b
A
1012
1013/*
1014 * Rotate the packet buffers in descriptor d. Move the store buffer
1015 * into the hold slot, and the free buffer into the store slot.
1016 * Zero the length of the new store buffer.
1017 */
0a7de745 1018#define ROTATE_BUFFERS(d) \
d9a64523 1019 if (d->bd_hbuf_read != 0) \
0a7de745 1020 panic("rotating bpf buffers during read"); \
1c79356b
A
1021 (d)->bd_hbuf = (d)->bd_sbuf; \
1022 (d)->bd_hlen = (d)->bd_slen; \
3e170ce0 1023 (d)->bd_hcnt = (d)->bd_scnt; \
1c79356b
A
1024 (d)->bd_sbuf = (d)->bd_fbuf; \
1025 (d)->bd_slen = 0; \
3e170ce0 1026 (d)->bd_scnt = 0; \
2d21ac55 1027 (d)->bd_fbuf = NULL;
1c79356b
A
1028/*
1029 * bpfread - read next chunk of packets from buffers
1030 */
2d21ac55 1031int
91447636 1032bpfread(dev_t dev, struct uio *uio, int ioflag)
1c79356b 1033{
2d21ac55 1034 struct bpf_d *d;
d9a64523 1035 caddr_t hbuf;
39236c6e 1036 int timed_out, hbuf_len;
1c79356b 1037 int error;
fe8ab488 1038 int flags;
2d21ac55
A
1039
1040 lck_mtx_lock(bpf_mlock);
1c79356b 1041
55e303ae 1042 d = bpf_dtab[minor(dev)];
d9a64523
A
1043 if (d == NULL || d == BPF_DEV_RESERVED ||
1044 (d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55 1045 lck_mtx_unlock(bpf_mlock);
0a7de745 1046 return ENXIO;
2d21ac55 1047 }
55e303ae 1048
3e170ce0
A
1049 bpf_acquire_d(d);
1050
1c79356b
A
1051 /*
1052 * Restrict application to use a buffer the same size as
1053 * as kernel buffers.
1054 */
b0d623f7 1055 if (uio_resid(uio) != d->bd_bufsize) {
3e170ce0 1056 bpf_release_d(d);
91447636 1057 lck_mtx_unlock(bpf_mlock);
0a7de745 1058 return EINVAL;
1c79356b 1059 }
d9a64523 1060
0a7de745 1061 if (d->bd_state == BPF_WAITING) {
6d2010ae 1062 bpf_stop_timer(d);
0a7de745 1063 }
d9a64523 1064
6d2010ae
A
1065 timed_out = (d->bd_state == BPF_TIMED_OUT);
1066 d->bd_state = BPF_IDLE;
1c79356b 1067
0a7de745 1068 while (d->bd_hbuf_read != 0) {
39236c6e 1069 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745 1070 }
3e170ce0
A
1071
1072 if ((d->bd_flags & BPF_CLOSING) != 0) {
1073 bpf_release_d(d);
39236c6e 1074 lck_mtx_unlock(bpf_mlock);
0a7de745 1075 return ENXIO;
39236c6e 1076 }
1c79356b
A
1077 /*
1078 * If the hold buffer is empty, then do a timed sleep, which
1079 * ends when the timeout expires or when enough packets
1080 * have arrived to fill the store buffer.
1081 */
1082 while (d->bd_hbuf == 0) {
d9a64523
A
1083 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1084 d->bd_slen != 0) {
1c79356b 1085 /*
6d2010ae
A
1086 * We're in immediate mode, or are reading
1087 * in non-blocking mode, or a timer was
1088 * started before the read (e.g., by select()
1089 * or poll()) and has expired and a packet(s)
1090 * either arrived since the previous
1c79356b
A
1091 * read or arrived while we were asleep.
1092 * Rotate the buffers and return what's here.
1093 */
1094 ROTATE_BUFFERS(d);
1095 break;
1096 }
9bccf70c
A
1097
1098 /*
1099 * No data is available, check to see if the bpf device
1100 * is still pointed at a real interface. If not, return
1101 * ENXIO so that the userland process knows to rebind
1102 * it before using it again.
1103 */
1104 if (d->bd_bif == NULL) {
3e170ce0 1105 bpf_release_d(d);
91447636 1106 lck_mtx_unlock(bpf_mlock);
0a7de745 1107 return ENXIO;
9bccf70c 1108 }
b0d623f7 1109 if (ioflag & IO_NDELAY) {
3e170ce0 1110 bpf_release_d(d);
b0d623f7 1111 lck_mtx_unlock(bpf_mlock);
0a7de745 1112 return EWOULDBLOCK;
b0d623f7 1113 }
0a7de745 1114 error = BPF_SLEEP(d, PRINET | PCATCH, "bpf", d->bd_rtout);
2d21ac55
A
1115 /*
1116 * Make sure device is still opened
1117 */
3e170ce0
A
1118 if ((d->bd_flags & BPF_CLOSING) != 0) {
1119 bpf_release_d(d);
2d21ac55 1120 lck_mtx_unlock(bpf_mlock);
0a7de745 1121 return ENXIO;
2d21ac55 1122 }
39236c6e 1123
0a7de745 1124 while (d->bd_hbuf_read != 0) {
d9a64523
A
1125 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1126 NULL);
0a7de745 1127 }
39236c6e 1128
3e170ce0
A
1129 if ((d->bd_flags & BPF_CLOSING) != 0) {
1130 bpf_release_d(d);
39236c6e 1131 lck_mtx_unlock(bpf_mlock);
0a7de745 1132 return ENXIO;
39236c6e 1133 }
fe8ab488 1134
1c79356b 1135 if (error == EINTR || error == ERESTART) {
5ba3f43e 1136 if (d->bd_hbuf != NULL) {
fe8ab488
A
1137 /*
1138 * Because we msleep, the hold buffer might
1139 * be filled when we wake up. Avoid rotating
1140 * in this case.
1141 */
1142 break;
1143 }
5ba3f43e 1144 if (d->bd_slen != 0) {
39236c6e
A
1145 /*
1146 * Sometimes we may be interrupted often and
1147 * the sleep above will not timeout.
1148 * Regardless, we should rotate the buffers
1149 * if there's any new data pending and
1150 * return it.
1151 */
1152 ROTATE_BUFFERS(d);
1153 break;
1154 }
3e170ce0 1155 bpf_release_d(d);
91447636 1156 lck_mtx_unlock(bpf_mlock);
5ba3f43e
A
1157 if (error == ERESTART) {
1158 printf("%s: %llx ERESTART to EINTR\n",
1159 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
1160 error = EINTR;
1161 }
0a7de745 1162 return error;
1c79356b
A
1163 }
1164 if (error == EWOULDBLOCK) {
1165 /*
1166 * On a timeout, return what's in the buffer,
1167 * which may be nothing. If there is something
1168 * in the store buffer, we can rotate the buffers.
1169 */
0a7de745 1170 if (d->bd_hbuf) {
1c79356b
A
1171 /*
1172 * We filled up the buffer in between
1173 * getting the timeout and arriving
1174 * here, so we don't need to rotate.
1175 */
1176 break;
0a7de745 1177 }
1c79356b
A
1178
1179 if (d->bd_slen == 0) {
3e170ce0 1180 bpf_release_d(d);
91447636 1181 lck_mtx_unlock(bpf_mlock);
0a7de745 1182 return 0;
1c79356b
A
1183 }
1184 ROTATE_BUFFERS(d);
1185 break;
1186 }
1187 }
1188 /*
1189 * At this point, we know we have something in the hold slot.
1190 */
1c79356b 1191
fe8ab488 1192 /*
d9a64523 1193 * Set the hold buffer read. So we do not
fe8ab488
A
1194 * rotate the buffers until the hold buffer
1195 * read is complete. Also to avoid issues resulting
1196 * from page faults during disk sleep (<rdar://problem/13436396>).
1197 */
1198 d->bd_hbuf_read = 1;
1199 hbuf = d->bd_hbuf;
1200 hbuf_len = d->bd_hlen;
1201 flags = d->bd_flags;
1202 lck_mtx_unlock(bpf_mlock);
1203
39236c6e 1204#ifdef __APPLE__
316670eb
A
1205 /*
1206 * Before we move data to userland, we fill out the extended
1207 * header fields.
1208 */
fe8ab488 1209 if (flags & BPF_EXTENDED_HDR) {
316670eb
A
1210 char *p;
1211
fe8ab488
A
1212 p = hbuf;
1213 while (p < hbuf + hbuf_len) {
316670eb 1214 struct bpf_hdr_ext *ehp;
39236c6e
A
1215 uint32_t flowid;
1216 struct so_procinfo soprocinfo;
1217 int found = 0;
316670eb
A
1218
1219 ehp = (struct bpf_hdr_ext *)(void *)p;
d9a64523 1220 if ((flowid = ehp->bh_flowid) != 0) {
0a7de745 1221 if (ehp->bh_proto == IPPROTO_TCP) {
39236c6e
A
1222 found = inp_findinpcb_procinfo(&tcbinfo,
1223 flowid, &soprocinfo);
0a7de745 1224 } else if (ehp->bh_proto == IPPROTO_UDP) {
39236c6e
A
1225 found = inp_findinpcb_procinfo(&udbinfo,
1226 flowid, &soprocinfo);
0a7de745 1227 }
fe8ab488 1228 if (found == 1) {
39236c6e 1229 ehp->bh_pid = soprocinfo.spi_pid;
cb323159 1230 strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
316670eb 1231 }
39236c6e 1232 ehp->bh_flowid = 0;
316670eb 1233 }
5ba3f43e 1234
fe8ab488
A
1235 if (flags & BPF_FINALIZE_PKTAP) {
1236 struct pktap_header *pktaphdr;
d9a64523 1237
fe8ab488
A
1238 pktaphdr = (struct pktap_header *)(void *)
1239 (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1240
d9a64523
A
1241 bpf_finalize_pktap((struct bpf_hdr *) ehp,
1242 pktaphdr);
fe8ab488 1243 }
316670eb
A
1244 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1245 }
fe8ab488
A
1246 } else if (flags & BPF_FINALIZE_PKTAP) {
1247 char *p;
1248
1249 p = hbuf;
1250 while (p < hbuf + hbuf_len) {
1251 struct bpf_hdr *hp;
1252 struct pktap_header *pktaphdr;
d9a64523 1253
fe8ab488
A
1254 hp = (struct bpf_hdr *)(void *)p;
1255 pktaphdr = (struct pktap_header *)(void *)
1256 (p + BPF_WORDALIGN(hp->bh_hdrlen));
1257
d9a64523 1258 bpf_finalize_pktap(hp, pktaphdr);
fe8ab488
A
1259
1260 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1261 }
316670eb 1262 }
39236c6e 1263#endif
39236c6e 1264
1c79356b
A
1265 /*
1266 * Move data from hold buffer into user space.
1267 * We know the entire buffer is transferred since
1268 * we checked above that the read buffer is bpf_bufsize bytes.
1269 */
39236c6e 1270 error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
d9a64523 1271
39236c6e
A
1272 lck_mtx_lock(bpf_mlock);
1273 /*
1274 * Make sure device is still opened
1275 */
3e170ce0
A
1276 if ((d->bd_flags & BPF_CLOSING) != 0) {
1277 bpf_release_d(d);
39236c6e 1278 lck_mtx_unlock(bpf_mlock);
0a7de745 1279 return ENXIO;
39236c6e 1280 }
d9a64523 1281
39236c6e 1282 d->bd_hbuf_read = 0;
1c79356b 1283 d->bd_fbuf = d->bd_hbuf;
2d21ac55 1284 d->bd_hbuf = NULL;
1c79356b 1285 d->bd_hlen = 0;
3e170ce0 1286 d->bd_hcnt = 0;
39236c6e 1287 wakeup((caddr_t)d);
3e170ce0
A
1288
1289 bpf_release_d(d);
91447636 1290 lck_mtx_unlock(bpf_mlock);
0a7de745 1291 return error;
1c79356b
A
1292}
1293
1c79356b
A
1294/*
1295 * If there are processes sleeping on this descriptor, wake them up.
1296 */
91447636
A
1297static void
1298bpf_wakeup(struct bpf_d *d)
1c79356b 1299{
6d2010ae
A
1300 if (d->bd_state == BPF_WAITING) {
1301 bpf_stop_timer(d);
1302 d->bd_state = BPF_IDLE;
1303 }
1c79356b 1304 wakeup((caddr_t)d);
0a7de745 1305 if (d->bd_async && d->bd_sig && d->bd_sigio) {
2d21ac55 1306 pgsigio(d->bd_sigio, d->bd_sig);
0a7de745 1307 }
1c79356b 1308
1c79356b 1309 selwakeup(&d->bd_sel);
0a7de745 1310 if ((d->bd_flags & BPF_KNOTE)) {
3e170ce0 1311 KNOTE(&d->bd_sel.si_note, 1);
0a7de745 1312 }
1c79356b
A
1313}
1314
6d2010ae
A
1315static void
1316bpf_timed_out(void *arg, __unused void *dummy)
1317{
1318 struct bpf_d *d = (struct bpf_d *)arg;
1319
1320 lck_mtx_lock(bpf_mlock);
1321 if (d->bd_state == BPF_WAITING) {
1322 /*
d9a64523 1323 * There's a select or kqueue waiting for this; if there's
6d2010ae
A
1324 * now stuff to read, wake it up.
1325 */
1326 d->bd_state = BPF_TIMED_OUT;
0a7de745 1327 if (d->bd_slen != 0) {
6d2010ae 1328 bpf_wakeup(d);
0a7de745 1329 }
6d2010ae
A
1330 } else if (d->bd_state == BPF_DRAINING) {
1331 /*
1332 * A close is waiting for this to finish.
1333 * Mark it as finished, and wake the close up.
1334 */
1335 d->bd_state = BPF_IDLE;
1336 bpf_wakeup(d);
1337 }
1338 lck_mtx_unlock(bpf_mlock);
1339}
6d2010ae 1340
55e303ae 1341/* keep in sync with bpf_movein above: */
0a7de745 1342#define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
55e303ae 1343
2d21ac55 1344int
91447636 1345bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1c79356b 1346{
2d21ac55 1347 struct bpf_d *d;
1c79356b 1348 struct ifnet *ifp;
2d21ac55 1349 struct mbuf *m = NULL;
91447636 1350 int error;
0a7de745 1351 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
b0d623f7 1352 int datlen = 0;
39236c6e
A
1353 int bif_dlt;
1354 int bd_hdrcmplt;
1c79356b 1355
2d21ac55
A
1356 lck_mtx_lock(bpf_mlock);
1357
55e303ae 1358 d = bpf_dtab[minor(dev)];
d9a64523
A
1359 if (d == NULL || d == BPF_DEV_RESERVED ||
1360 (d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55 1361 lck_mtx_unlock(bpf_mlock);
0a7de745 1362 return ENXIO;
2d21ac55 1363 }
3e170ce0
A
1364
1365 bpf_acquire_d(d);
1366
1c79356b 1367 if (d->bd_bif == 0) {
3e170ce0 1368 bpf_release_d(d);
91447636 1369 lck_mtx_unlock(bpf_mlock);
0a7de745 1370 return ENXIO;
1c79356b
A
1371 }
1372
1373 ifp = d->bd_bif->bif_ifp;
1374
6d2010ae 1375 if ((ifp->if_flags & IFF_UP) == 0) {
3e170ce0 1376 bpf_release_d(d);
6d2010ae 1377 lck_mtx_unlock(bpf_mlock);
0a7de745 1378 return ENETDOWN;
6d2010ae 1379 }
b0d623f7 1380 if (uio_resid(uio) == 0) {
3e170ce0 1381 bpf_release_d(d);
91447636 1382 lck_mtx_unlock(bpf_mlock);
0a7de745 1383 return 0;
1c79356b 1384 }
55e303ae 1385 ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
6d2010ae 1386
316670eb
A
1387 /*
1388 * fix for PR-6849527
1389 * geting variables onto stack before dropping lock for bpf_movein()
1390 */
1391 bif_dlt = (int)d->bd_bif->bif_dlt;
1392 bd_hdrcmplt = d->bd_hdrcmplt;
1393
6d2010ae 1394 /* bpf_movein allocating mbufs; drop lock */
316670eb 1395 lck_mtx_unlock(bpf_mlock);
6d2010ae 1396
d9a64523 1397 error = bpf_movein(uio, bif_dlt, &m,
0a7de745
A
1398 bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1399 &datlen);
316670eb 1400
3e170ce0
A
1401 /* take the lock again */
1402 lck_mtx_lock(bpf_mlock);
316670eb 1403 if (error) {
3e170ce0
A
1404 bpf_release_d(d);
1405 lck_mtx_unlock(bpf_mlock);
0a7de745 1406 return error;
1c79356b
A
1407 }
1408
3e170ce0
A
1409 /* verify the device is still open */
1410 if ((d->bd_flags & BPF_CLOSING) != 0) {
1411 bpf_release_d(d);
91447636 1412 lck_mtx_unlock(bpf_mlock);
2d21ac55 1413 m_freem(m);
0a7de745 1414 return ENXIO;
2d21ac55 1415 }
6d2010ae
A
1416
1417 if (d->bd_bif == NULL) {
3e170ce0 1418 bpf_release_d(d);
6d2010ae
A
1419 lck_mtx_unlock(bpf_mlock);
1420 m_free(m);
0a7de745 1421 return ENXIO;
6d2010ae
A
1422 }
1423
1424 if ((unsigned)datlen > ifp->if_mtu) {
3e170ce0 1425 bpf_release_d(d);
2d21ac55
A
1426 lck_mtx_unlock(bpf_mlock);
1427 m_freem(m);
0a7de745 1428 return EMSGSIZE;
1c79356b
A
1429 }
1430
2d21ac55
A
1431#if CONFIG_MACF_NET
1432 mac_mbuf_label_associate_bpfdesc(d, m);
1433#endif
316670eb
A
1434
1435 bpf_set_packet_service_class(m, d->bd_traffic_class);
1436
91447636
A
1437 lck_mtx_unlock(bpf_mlock);
1438
3e170ce0
A
1439 /*
1440 * The driver frees the mbuf.
1441 */
55e303ae 1442 if (d->bd_hdrcmplt) {
0a7de745 1443 if (d->bd_bif->bif_send) {
2d21ac55 1444 error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
0a7de745 1445 } else {
316670eb 1446 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
0a7de745 1447 }
316670eb
A
1448 } else {
1449 error = dlil_output(ifp, PF_INET, m, NULL,
1450 (struct sockaddr *)dst_buf, 0, NULL);
91447636 1451 }
6d2010ae 1452
3e170ce0
A
1453 lck_mtx_lock(bpf_mlock);
1454 bpf_release_d(d);
1455 lck_mtx_unlock(bpf_mlock);
1456
0a7de745 1457 return error;
1c79356b
A
1458}
1459
1460/*
1461 * Reset a descriptor by flushing its packet buffer and clearing the
2d21ac55 1462 * receive and drop counts.
1c79356b
A
1463 */
1464static void
91447636 1465reset_d(struct bpf_d *d)
1c79356b 1466{
0a7de745 1467 if (d->bd_hbuf_read != 0) {
39236c6e 1468 panic("resetting buffers during read");
0a7de745 1469 }
39236c6e 1470
1c79356b
A
1471 if (d->bd_hbuf) {
1472 /* Free the hold buffer. */
1473 d->bd_fbuf = d->bd_hbuf;
2d21ac55 1474 d->bd_hbuf = NULL;
1c79356b
A
1475 }
1476 d->bd_slen = 0;
1477 d->bd_hlen = 0;
3e170ce0
A
1478 d->bd_scnt = 0;
1479 d->bd_hcnt = 0;
1c79356b
A
1480 d->bd_rcount = 0;
1481 d->bd_dcount = 0;
1482}
1483
d9a64523
A
1484static struct bpf_d *
1485bpf_get_device_from_uuid(uuid_t uuid)
1486{
1487 unsigned int i;
1488
1489 for (i = 0; i < nbpfilter; i++) {
1490 struct bpf_d *d = bpf_dtab[i];
1491
1492 if (d == NULL || d == BPF_DEV_RESERVED ||
0a7de745 1493 (d->bd_flags & BPF_CLOSING) != 0) {
d9a64523 1494 continue;
0a7de745
A
1495 }
1496 if (uuid_compare(uuid, d->bd_uuid) == 0) {
1497 return d;
1498 }
d9a64523
A
1499 }
1500
0a7de745 1501 return NULL;
d9a64523
A
1502}
1503
1504/*
1505 * The BIOCSETUP command "atomically" attach to the interface and
1506 * copy the buffer from another interface. This minimizes the risk
1507 * of missing packet because this is done while holding
1508 * the BPF global lock
1509 */
1510static int
1511bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1512{
1513 struct bpf_d *d_from;
1514 int error = 0;
1515
1516 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1517
1518 /*
1519 * Sanity checks
1520 */
1521 d_from = bpf_get_device_from_uuid(uuid_from);
1522 if (d_from == NULL) {
1523 error = ENOENT;
1524 os_log_info(OS_LOG_DEFAULT,
1525 "%s: uuids not found error %d",
1526 __func__, error);
0a7de745 1527 return error;
d9a64523
A
1528 }
1529 if (d_from->bd_opened_by != d_to->bd_opened_by) {
1530 error = EACCES;
1531 os_log_info(OS_LOG_DEFAULT,
1532 "%s: processes not matching error %d",
1533 __func__, error);
0a7de745 1534 return error;
d9a64523
A
1535 }
1536
1537 /*
1538 * Prevent any read while copying
1539 */
0a7de745 1540 while (d_to->bd_hbuf_read != 0) {
d9a64523 1541 msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
0a7de745 1542 }
d9a64523
A
1543 d_to->bd_hbuf_read = 1;
1544
0a7de745 1545 while (d_from->bd_hbuf_read != 0) {
d9a64523 1546 msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
0a7de745 1547 }
d9a64523
A
1548 d_from->bd_hbuf_read = 1;
1549
1550 /*
1551 * Verify the devices have not been closed
1552 */
1553 if (d_to->bd_flags & BPF_CLOSING) {
1554 error = ENXIO;
1555 os_log_info(OS_LOG_DEFAULT,
1556 "%s: d_to is closing error %d",
1557 __func__, error);
1558 goto done;
1559 }
1560 if (d_from->bd_flags & BPF_CLOSING) {
1561 error = ENXIO;
1562 os_log_info(OS_LOG_DEFAULT,
1563 "%s: d_from is closing error %d",
1564 __func__, error);
1565 goto done;
1566 }
1567
1568 /*
1569 * For now require the same buffer size
1570 */
1571 if (d_from->bd_bufsize != d_to->bd_bufsize) {
1572 error = EINVAL;
1573 os_log_info(OS_LOG_DEFAULT,
1574 "%s: bufsizes not matching error %d",
1575 __func__, error);
1576 goto done;
1577 }
1578
1579 /*
1580 * Attach to the interface
1581 */
1582 error = bpf_setif(d_to, ifp, false, true);
1583 if (error != 0) {
1584 os_log_info(OS_LOG_DEFAULT,
1585 "%s: bpf_setif() failed error %d",
1586 __func__, error);
1587 goto done;
1588 }
1589
1590 /*
1591 * Make sure the buffers are setup as expected by bpf_setif()
1592 */
1593 ASSERT(d_to->bd_hbuf == NULL);
1594 ASSERT(d_to->bd_sbuf != NULL);
1595 ASSERT(d_to->bd_fbuf != NULL);
1596
1597 /*
1598 * Copy the buffers and update the pointers and counts
1599 */
1600 memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1601 d_to->bd_slen = d_from->bd_slen;
1602 d_to->bd_scnt = d_from->bd_scnt;
1603
1604 if (d_from->bd_hbuf != NULL) {
1605 d_to->bd_hbuf = d_to->bd_fbuf;
1606 d_to->bd_fbuf = NULL;
1607 memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1608 }
1609 d_to->bd_hlen = d_from->bd_hlen;
1610 d_to->bd_hcnt = d_from->bd_hcnt;
1611
1612 if (bpf_debug > 0) {
1613 os_log_info(OS_LOG_DEFAULT,
1614 "%s: done slen %u scnt %u hlen %u hcnt %u",
1615 __func__, d_to->bd_slen, d_to->bd_scnt,
1616 d_to->bd_hlen, d_to->bd_hcnt);
1617 }
1618done:
1619 d_from->bd_hbuf_read = 0;
1620 wakeup((caddr_t)d_from);
1621
1622 d_to->bd_hbuf_read = 0;
1623 wakeup((caddr_t)d_to);
1624
0a7de745 1625 return error;
d9a64523
A
1626}
1627
1c79356b
A
1628/*
1629 * FIONREAD Check for read packet available.
1630 * SIOCGIFADDR Get interface address - convenient hook to driver.
1631 * BIOCGBLEN Get buffer len [for read()].
1632 * BIOCSETF Set ethernet read filter.
1633 * BIOCFLUSH Flush read packet buffer.
1634 * BIOCPROMISC Put interface into promiscuous mode.
1635 * BIOCGDLT Get link layer type.
1636 * BIOCGETIF Get interface name.
1637 * BIOCSETIF Set interface.
1638 * BIOCSRTIMEOUT Set read timeout.
1639 * BIOCGRTIMEOUT Get read timeout.
1640 * BIOCGSTATS Get packet stats.
1641 * BIOCIMMEDIATE Set immediate mode.
1642 * BIOCVERSION Get filter language version.
9bccf70c
A
1643 * BIOCGHDRCMPLT Get "header already complete" flag
1644 * BIOCSHDRCMPLT Set "header already complete" flag
1645 * BIOCGSEESENT Get "see packets sent" flag
1646 * BIOCSSEESENT Set "see packets sent" flag
316670eb
A
1647 * BIOCSETTC Set traffic class.
1648 * BIOCGETTC Get traffic class.
1649 * BIOCSEXTHDR Set "extended header" flag
3e170ce0
A
1650 * BIOCSHEADDROP Drop head of the buffer if user is not reading
1651 * BIOCGHEADDROP Get "head-drop" flag
1c79356b
A
1652 */
1653/* ARGSUSED */
9bccf70c 1654int
2d21ac55 1655bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
b0d623f7 1656 struct proc *p)
1c79356b 1657{
2d21ac55 1658 struct bpf_d *d;
fe8ab488
A
1659 int error = 0;
1660 u_int int_arg;
316670eb 1661 struct ifreq ifr;
2d21ac55
A
1662
1663 lck_mtx_lock(bpf_mlock);
1c79356b 1664
55e303ae 1665 d = bpf_dtab[minor(dev)];
d9a64523
A
1666 if (d == NULL || d == BPF_DEV_RESERVED ||
1667 (d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55 1668 lck_mtx_unlock(bpf_mlock);
0a7de745 1669 return ENXIO;
2d21ac55 1670 }
1c79356b 1671
3e170ce0
A
1672 bpf_acquire_d(d);
1673
0a7de745 1674 if (d->bd_state == BPF_WAITING) {
6d2010ae 1675 bpf_stop_timer(d);
0a7de745 1676 }
6d2010ae
A
1677 d->bd_state = BPF_IDLE;
1678
1c79356b 1679 switch (cmd) {
1c79356b
A
1680 default:
1681 error = EINVAL;
1682 break;
1683
1684 /*
1685 * Check for read packet available.
1686 */
0a7de745
A
1687 case FIONREAD: /* int */
1688 {
1689 int n;
1c79356b 1690
0a7de745
A
1691 n = d->bd_slen;
1692 if (d->bd_hbuf && d->bd_hbuf_read == 0) {
1693 n += d->bd_hlen;
1c79356b
A
1694 }
1695
0a7de745
A
1696 bcopy(&n, addr, sizeof(n));
1697 break;
1698 }
1c79356b 1699
0a7de745
A
1700 case SIOCGIFADDR: /* struct ifreq */
1701 {
1702 struct ifnet *ifp;
1703
1704 if (d->bd_bif == 0) {
1705 error = EINVAL;
1706 } else {
1707 ifp = d->bd_bif->bif_ifp;
1708 error = ifnet_ioctl(ifp, 0, cmd, addr);
1c79356b 1709 }
0a7de745
A
1710 break;
1711 }
1c79356b
A
1712
1713 /*
1714 * Get buffer len [for read()].
1715 */
0a7de745
A
1716 case BIOCGBLEN: /* u_int */
1717 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1c79356b
A
1718 break;
1719
1720 /*
1721 * Set buffer length.
1722 */
0a7de745 1723 case BIOCSBLEN: { /* u_int */
d9a64523
A
1724 u_int size;
1725 unsigned int maxbufsize = bpf_maxbufsize;
316670eb 1726
d9a64523
A
1727 /*
1728 * Allow larger buffer in head drop mode to with the
1729 * assumption the reading process may be low priority but
1730 * is interested in the most recent traffic
1731 */
1732 if (d->bd_headdrop != 0) {
1733 maxbufsize = 2 * bpf_maxbufsize;
1734 }
1c79356b 1735
d9a64523 1736 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
813fb2f6 1737 /*
d9a64523 1738 * Interface already attached, unable to change buffers
813fb2f6 1739 */
d9a64523
A
1740 error = EINVAL;
1741 break;
1742 }
0a7de745 1743 bcopy(addr, &size, sizeof(size));
d9a64523
A
1744
1745 if (size > maxbufsize) {
1746 d->bd_bufsize = maxbufsize;
1747
1748 os_log_info(OS_LOG_DEFAULT,
1749 "%s bufsize capped to %u from %u",
1750 __func__, d->bd_bufsize, size);
1751 } else if (size < BPF_MINBUFSIZE) {
1752 d->bd_bufsize = BPF_MINBUFSIZE;
1753
1754 os_log_info(OS_LOG_DEFAULT,
1755 "%s bufsize bumped to %u from %u",
1756 __func__, d->bd_bufsize, size);
1757 } else {
1c79356b
A
1758 d->bd_bufsize = size;
1759 }
1c79356b 1760
d9a64523 1761 /* It's a read/write ioctl */
0a7de745 1762 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
d9a64523
A
1763 break;
1764 }
1c79356b
A
1765 /*
1766 * Set link layer read filter.
1767 */
39236c6e 1768 case BIOCSETF32:
0a7de745 1769 case BIOCSETFNR32: { /* struct bpf_program32 */
316670eb
A
1770 struct bpf_program32 prg32;
1771
0a7de745 1772 bcopy(addr, &prg32, sizeof(prg32));
316670eb 1773 error = bpf_setf(d, prg32.bf_len,
3e170ce0 1774 CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1c79356b 1775 break;
2d21ac55 1776 }
b0d623f7 1777
39236c6e 1778 case BIOCSETF64:
0a7de745 1779 case BIOCSETFNR64: { /* struct bpf_program64 */
316670eb
A
1780 struct bpf_program64 prg64;
1781
0a7de745 1782 bcopy(addr, &prg64, sizeof(prg64));
3e170ce0 1783 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
b0d623f7
A
1784 break;
1785 }
1786
1c79356b
A
1787 /*
1788 * Flush read packet buffer.
1789 */
1790 case BIOCFLUSH:
d9a64523
A
1791 while (d->bd_hbuf_read != 0) {
1792 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1793 NULL);
39236c6e 1794 }
3e170ce0
A
1795 if ((d->bd_flags & BPF_CLOSING) != 0) {
1796 error = ENXIO;
1797 break;
1798 }
1c79356b 1799 reset_d(d);
1c79356b
A
1800 break;
1801
1802 /*
1803 * Put interface into promiscuous mode.
1804 */
1805 case BIOCPROMISC:
1806 if (d->bd_bif == 0) {
1807 /*
1808 * No interface attached yet.
1809 */
1810 error = EINVAL;
1811 break;
1812 }
1c79356b 1813 if (d->bd_promisc == 0) {
2d21ac55 1814 lck_mtx_unlock(bpf_mlock);
91447636 1815 error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
2d21ac55 1816 lck_mtx_lock(bpf_mlock);
0a7de745 1817 if (error == 0) {
1c79356b 1818 d->bd_promisc = 1;
0a7de745 1819 }
1c79356b 1820 }
1c79356b
A
1821 break;
1822
1823 /*
1824 * Get device parameters.
1825 */
0a7de745
A
1826 case BIOCGDLT: /* u_int */
1827 if (d->bd_bif == 0) {
1c79356b 1828 error = EINVAL;
0a7de745
A
1829 } else {
1830 bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
1831 }
1c79356b
A
1832 break;
1833
2d21ac55
A
1834 /*
1835 * Get a list of supported data link types.
1836 */
0a7de745 1837 case BIOCGDLTLIST: /* struct bpf_dltlist */
b0d623f7
A
1838 if (d->bd_bif == NULL) {
1839 error = EINVAL;
1840 } else {
316670eb 1841 error = bpf_getdltlist(d, addr, p);
b0d623f7
A
1842 }
1843 break;
2d21ac55
A
1844
1845 /*
1846 * Set data link type.
1847 */
0a7de745 1848 case BIOCSDLT: /* u_int */
316670eb
A
1849 if (d->bd_bif == NULL) {
1850 error = EINVAL;
1851 } else {
1852 u_int dlt;
1853
0a7de745 1854 bcopy(addr, &dlt, sizeof(dlt));
d9a64523
A
1855
1856 if (dlt == DLT_PKTAP &&
1857 !(d->bd_flags & BPF_WANT_PKTAP)) {
5ba3f43e
A
1858 dlt = DLT_RAW;
1859 }
3e170ce0 1860 error = bpf_setdlt(d, dlt);
316670eb
A
1861 }
1862 break;
2d21ac55 1863
1c79356b 1864 /*
9bccf70c 1865 * Get interface name.
1c79356b 1866 */
0a7de745
A
1867 case BIOCGETIF: /* struct ifreq */
1868 if (d->bd_bif == 0) {
1c79356b 1869 error = EINVAL;
0a7de745 1870 } else {
9bccf70c 1871 struct ifnet *const ifp = d->bd_bif->bif_ifp;
9bccf70c 1872
316670eb 1873 snprintf(((struct ifreq *)(void *)addr)->ifr_name,
0a7de745 1874 sizeof(ifr.ifr_name), "%s", if_name(ifp));
9bccf70c 1875 }
1c79356b
A
1876 break;
1877
1878 /*
1879 * Set interface.
1880 */
0a7de745
A
1881 case BIOCSETIF: { /* struct ifreq */
1882 ifnet_t ifp;
316670eb 1883
0a7de745 1884 bcopy(addr, &ifr, sizeof(ifr));
316670eb
A
1885 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1886 ifp = ifunit(ifr.ifr_name);
0a7de745 1887 if (ifp == NULL) {
2d21ac55 1888 error = ENXIO;
0a7de745 1889 } else {
d9a64523 1890 error = bpf_setif(d, ifp, true, false);
0a7de745 1891 }
1c79356b 1892 break;
2d21ac55 1893 }
1c79356b
A
1894
1895 /*
1896 * Set read timeout.
1897 */
0a7de745 1898 case BIOCSRTIMEOUT32: { /* struct user32_timeval */
316670eb
A
1899 struct user32_timeval _tv;
1900 struct timeval tv;
b0d623f7 1901
0a7de745 1902 bcopy(addr, &_tv, sizeof(_tv));
316670eb
A
1903 tv.tv_sec = _tv.tv_sec;
1904 tv.tv_usec = _tv.tv_usec;
1905
1906 /*
1907 * Subtract 1 tick from tvtohz() since this isn't
1908 * a one-shot timer.
1909 */
0a7de745 1910 if ((error = itimerfix(&tv)) == 0) {
316670eb 1911 d->bd_rtout = tvtohz(&tv) - 1;
0a7de745 1912 }
316670eb
A
1913 break;
1914 }
1915
0a7de745 1916 case BIOCSRTIMEOUT64: { /* struct user64_timeval */
316670eb
A
1917 struct user64_timeval _tv;
1918 struct timeval tv;
1919
0a7de745 1920 bcopy(addr, &_tv, sizeof(_tv));
316670eb
A
1921 tv.tv_sec = _tv.tv_sec;
1922 tv.tv_usec = _tv.tv_usec;
1923
1924 /*
1925 * Subtract 1 tick from tvtohz() since this isn't
1926 * a one-shot timer.
1927 */
0a7de745 1928 if ((error = itimerfix(&tv)) == 0) {
316670eb 1929 d->bd_rtout = tvtohz(&tv) - 1;
0a7de745 1930 }
316670eb
A
1931 break;
1932 }
1c79356b 1933
39236c6e 1934 /*
1c79356b
A
1935 * Get read timeout.
1936 */
0a7de745 1937 case BIOCGRTIMEOUT32: { /* struct user32_timeval */
316670eb 1938 struct user32_timeval tv;
1c79356b 1939
0a7de745 1940 bzero(&tv, sizeof(tv));
316670eb
A
1941 tv.tv_sec = d->bd_rtout / hz;
1942 tv.tv_usec = (d->bd_rtout % hz) * tick;
0a7de745 1943 bcopy(&tv, addr, sizeof(tv));
316670eb
A
1944 break;
1945 }
6d2010ae 1946
0a7de745 1947 case BIOCGRTIMEOUT64: { /* struct user64_timeval */
316670eb 1948 struct user64_timeval tv;
6d2010ae 1949
0a7de745 1950 bzero(&tv, sizeof(tv));
316670eb
A
1951 tv.tv_sec = d->bd_rtout / hz;
1952 tv.tv_usec = (d->bd_rtout % hz) * tick;
0a7de745 1953 bcopy(&tv, addr, sizeof(tv));
316670eb
A
1954 break;
1955 }
1c79356b
A
1956
1957 /*
1958 * Get packet stats.
1959 */
0a7de745 1960 case BIOCGSTATS: { /* struct bpf_stat */
316670eb 1961 struct bpf_stat bs;
1c79356b 1962
0a7de745 1963 bzero(&bs, sizeof(bs));
316670eb
A
1964 bs.bs_recv = d->bd_rcount;
1965 bs.bs_drop = d->bd_dcount;
0a7de745 1966 bcopy(&bs, addr, sizeof(bs));
316670eb
A
1967 break;
1968 }
1c79356b
A
1969
1970 /*
1971 * Set immediate mode.
1972 */
0a7de745 1973 case BIOCIMMEDIATE: /* u_int */
3e170ce0 1974 d->bd_immediate = *(u_int *)(void *)addr;
1c79356b
A
1975 break;
1976
0a7de745 1977 case BIOCVERSION: { /* struct bpf_version */
316670eb 1978 struct bpf_version bv;
1c79356b 1979
0a7de745 1980 bzero(&bv, sizeof(bv));
316670eb
A
1981 bv.bv_major = BPF_MAJOR_VERSION;
1982 bv.bv_minor = BPF_MINOR_VERSION;
0a7de745 1983 bcopy(&bv, addr, sizeof(bv));
316670eb
A
1984 break;
1985 }
1c79356b 1986
9bccf70c
A
1987 /*
1988 * Get "header already complete" flag
1989 */
0a7de745
A
1990 case BIOCGHDRCMPLT: /* u_int */
1991 bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
9bccf70c
A
1992 break;
1993
1994 /*
1995 * Set "header already complete" flag
1996 */
0a7de745
A
1997 case BIOCSHDRCMPLT: /* u_int */
1998 bcopy(addr, &int_arg, sizeof(int_arg));
316670eb 1999 d->bd_hdrcmplt = int_arg ? 1 : 0;
9bccf70c
A
2000 break;
2001
2002 /*
2003 * Get "see sent packets" flag
2004 */
0a7de745
A
2005 case BIOCGSEESENT: /* u_int */
2006 bcopy(&d->bd_seesent, addr, sizeof(u_int));
9bccf70c
A
2007 break;
2008
2009 /*
2010 * Set "see sent packets" flag
2011 */
0a7de745
A
2012 case BIOCSSEESENT: /* u_int */
2013 bcopy(addr, &d->bd_seesent, sizeof(u_int));
316670eb
A
2014 break;
2015
2016 /*
2017 * Set traffic service class
2018 */
0a7de745 2019 case BIOCSETTC: { /* int */
316670eb
A
2020 int tc;
2021
0a7de745 2022 bcopy(addr, &tc, sizeof(int));
316670eb 2023 error = bpf_set_traffic_class(d, tc);
9bccf70c 2024 break;
316670eb 2025 }
9bccf70c 2026
316670eb
A
2027 /*
2028 * Get traffic service class
2029 */
0a7de745
A
2030 case BIOCGETTC: /* int */
2031 bcopy(&d->bd_traffic_class, addr, sizeof(int));
1c79356b
A
2032 break;
2033
0a7de745 2034 case FIONBIO: /* Non-blocking I/O; int */
316670eb
A
2035 break;
2036
0a7de745
A
2037 case FIOASYNC: /* Send signal on receive packets; int */
2038 bcopy(addr, &d->bd_async, sizeof(int));
1c79356b 2039 break;
9bccf70c 2040#ifndef __APPLE__
1c79356b
A
2041 case FIOSETOWN:
2042 error = fsetown(*(int *)addr, &d->bd_sigio);
2043 break;
2044
2045 case FIOGETOWN:
2046 *(int *)addr = fgetown(d->bd_sigio);
2047 break;
2048
2049 /* This is deprecated, FIOSETOWN should be used instead. */
2050 case TIOCSPGRP:
2051 error = fsetown(-(*(int *)addr), &d->bd_sigio);
2052 break;
2053
2054 /* This is deprecated, FIOGETOWN should be used instead. */
2055 case TIOCGPGRP:
2056 *(int *)addr = -fgetown(d->bd_sigio);
2057 break;
2058#endif
0a7de745 2059 case BIOCSRSIG: { /* Set receive signal; u_int */
316670eb 2060 u_int sig;
1c79356b 2061
0a7de745 2062 bcopy(addr, &sig, sizeof(u_int));
1c79356b 2063
0a7de745 2064 if (sig >= NSIG) {
316670eb 2065 error = EINVAL;
0a7de745 2066 } else {
316670eb 2067 d->bd_sig = sig;
0a7de745 2068 }
1c79356b
A
2069 break;
2070 }
0a7de745
A
2071 case BIOCGRSIG: /* u_int */
2072 bcopy(&d->bd_sig, addr, sizeof(u_int));
316670eb 2073 break;
39236c6e 2074#ifdef __APPLE__
0a7de745
A
2075 case BIOCSEXTHDR: /* u_int */
2076 bcopy(addr, &int_arg, sizeof(int_arg));
2077 if (int_arg) {
fe8ab488 2078 d->bd_flags |= BPF_EXTENDED_HDR;
0a7de745 2079 } else {
fe8ab488 2080 d->bd_flags &= ~BPF_EXTENDED_HDR;
0a7de745 2081 }
316670eb 2082 break;
39236c6e 2083
0a7de745
A
2084 case BIOCGIFATTACHCOUNT: { /* struct ifreq */
2085 ifnet_t ifp;
39236c6e
A
2086 struct bpf_if *bp;
2087
0a7de745 2088 bcopy(addr, &ifr, sizeof(ifr));
39236c6e
A
2089 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2090 ifp = ifunit(ifr.ifr_name);
2091 if (ifp == NULL) {
2092 error = ENXIO;
2093 break;
2094 }
2095 ifr.ifr_intval = 0;
2096 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2097 struct bpf_d *bpf_d;
d9a64523 2098
0a7de745 2099 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
39236c6e 2100 continue;
0a7de745 2101 }
d9a64523
A
2102 for (bpf_d = bp->bif_dlist; bpf_d;
2103 bpf_d = bpf_d->bd_next) {
39236c6e
A
2104 ifr.ifr_intval += 1;
2105 }
2106 }
0a7de745 2107 bcopy(&ifr, addr, sizeof(ifr));
39236c6e
A
2108 break;
2109 }
0a7de745 2110 case BIOCGWANTPKTAP: /* u_int */
fe8ab488 2111 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
0a7de745 2112 bcopy(&int_arg, addr, sizeof(int_arg));
fe8ab488
A
2113 break;
2114
0a7de745
A
2115 case BIOCSWANTPKTAP: /* u_int */
2116 bcopy(addr, &int_arg, sizeof(int_arg));
2117 if (int_arg) {
d9a64523 2118 d->bd_flags |= BPF_WANT_PKTAP;
0a7de745 2119 } else {
d9a64523 2120 d->bd_flags &= ~BPF_WANT_PKTAP;
0a7de745 2121 }
fe8ab488 2122 break;
39236c6e 2123#endif
3e170ce0
A
2124
2125 case BIOCSHEADDROP:
0a7de745 2126 bcopy(addr, &int_arg, sizeof(int_arg));
3e170ce0
A
2127 d->bd_headdrop = int_arg ? 1 : 0;
2128 break;
2129
2130 case BIOCGHEADDROP:
0a7de745 2131 bcopy(&d->bd_headdrop, addr, sizeof(int));
3e170ce0 2132 break;
d9a64523
A
2133
2134 case BIOCSTRUNCATE:
2135 bcopy(addr, &int_arg, sizeof(int_arg));
0a7de745
A
2136 if (int_arg) {
2137 d->bd_flags |= BPF_TRUNCATE;
2138 } else {
d9a64523 2139 d->bd_flags &= ~BPF_TRUNCATE;
0a7de745 2140 }
d9a64523
A
2141 break;
2142
2143 case BIOCGETUUID:
0a7de745 2144 bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
d9a64523
A
2145 break;
2146
2147 case BIOCSETUP: {
2148 struct bpf_setup_args bsa;
0a7de745 2149 ifnet_t ifp;
d9a64523 2150
0a7de745 2151 bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
d9a64523
A
2152 bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2153 ifp = ifunit(bsa.bsa_ifname);
2154 if (ifp == NULL) {
2155 error = ENXIO;
2156 os_log_info(OS_LOG_DEFAULT,
2157 "%s: ifnet not found for %s error %d",
2158 __func__, bsa.bsa_ifname, error);
2159 break;
0a7de745 2160 }
d9a64523
A
2161
2162 error = bpf_setup(d, bsa.bsa_uuid, ifp);
2163 break;
2164 }
2165 case BIOCSPKTHDRV2:
2166 bcopy(addr, &int_arg, sizeof(int_arg));
0a7de745 2167 if (int_arg != 0) {
d9a64523 2168 d->bd_flags |= BPF_PKTHDRV2;
0a7de745 2169 } else {
d9a64523 2170 d->bd_flags &= ~BPF_PKTHDRV2;
0a7de745 2171 }
d9a64523
A
2172 break;
2173
2174 case BIOCGPKTHDRV2:
2175 int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
0a7de745 2176 bcopy(&int_arg, addr, sizeof(int));
d9a64523 2177 break;
316670eb
A
2178 }
2179
3e170ce0 2180 bpf_release_d(d);
91447636 2181 lck_mtx_unlock(bpf_mlock);
b0d623f7 2182
0a7de745 2183 return error;
1c79356b
A
2184}
2185
2186/*
2187 * Set d's packet filter program to fp. If this file already has a filter,
2188 * free it and replace it. Returns EINVAL for bogus requests.
2189 */
2190static int
3e170ce0
A
2191bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2192 u_long cmd)
1c79356b
A
2193{
2194 struct bpf_insn *fcode, *old;
2195 u_int flen, size;
1c79356b 2196
0a7de745 2197 while (d->bd_hbuf_read != 0) {
39236c6e 2198 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745 2199 }
39236c6e 2200
0a7de745
A
2201 if ((d->bd_flags & BPF_CLOSING) != 0) {
2202 return ENXIO;
2203 }
d9a64523 2204
1c79356b 2205 old = d->bd_filter;
2d21ac55 2206 if (bf_insns == USER_ADDR_NULL) {
0a7de745
A
2207 if (bf_len != 0) {
2208 return EINVAL;
2209 }
2d21ac55 2210 d->bd_filter = NULL;
1c79356b 2211 reset_d(d);
0a7de745
A
2212 if (old != 0) {
2213 FREE(old, M_DEVBUF);
2214 }
2215 return 0;
1c79356b 2216 }
2d21ac55 2217 flen = bf_len;
0a7de745
A
2218 if (flen > BPF_MAXINSNS) {
2219 return EINVAL;
2220 }
1c79356b 2221
91447636 2222 size = flen * sizeof(struct bpf_insn);
1c79356b 2223 fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
9bccf70c 2224#ifdef __APPLE__
0a7de745
A
2225 if (fcode == NULL) {
2226 return ENOBUFS;
2227 }
9bccf70c 2228#endif
2d21ac55 2229 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
1c79356b 2230 bpf_validate(fcode, (int)flen)) {
1c79356b 2231 d->bd_filter = fcode;
d9a64523 2232
0a7de745 2233 if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
39236c6e 2234 reset_d(d);
0a7de745 2235 }
d9a64523 2236
0a7de745
A
2237 if (old != 0) {
2238 FREE(old, M_DEVBUF);
2239 }
1c79356b 2240
0a7de745 2241 return 0;
1c79356b 2242 }
0a7de745
A
2243 FREE(fcode, M_DEVBUF);
2244 return EINVAL;
1c79356b
A
2245}
2246
2247/*
2248 * Detach a file from its current interface (if attached at all) and attach
2249 * to the interface indicated by the name stored in ifr.
2250 * Return an errno or 0.
2251 */
2252static int
d9a64523 2253bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read)
1c79356b
A
2254{
2255 struct bpf_if *bp;
2d21ac55 2256 int error;
39236c6e 2257
0a7de745 2258 while (d->bd_hbuf_read != 0 && !has_hbuf_read) {
39236c6e 2259 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745 2260 }
39236c6e 2261
0a7de745
A
2262 if ((d->bd_flags & BPF_CLOSING) != 0) {
2263 return ENXIO;
2264 }
39236c6e 2265
1c79356b
A
2266 /*
2267 * Look through attached interfaces for the named one.
2268 */
2269 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2270 struct ifnet *ifp = bp->bif_ifp;
2271
0a7de745 2272 if (ifp == 0 || ifp != theywant) {
1c79356b 2273 continue;
0a7de745 2274 }
fe8ab488 2275 /*
5ba3f43e 2276 * Do not use DLT_PKTAP, unless requested explicitly
fe8ab488 2277 */
0a7de745 2278 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
fe8ab488 2279 continue;
0a7de745 2280 }
5c9f4661
A
2281 /*
2282 * Skip the coprocessor interface
2283 */
0a7de745 2284 if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
5c9f4661 2285 continue;
0a7de745 2286 }
1c79356b
A
2287 /*
2288 * We found the requested interface.
813fb2f6
A
2289 * Allocate the packet buffers.
2290 */
2291 error = bpf_allocbufs(d);
0a7de745
A
2292 if (error != 0) {
2293 return error;
2294 }
813fb2f6
A
2295 /*
2296 * Detach if attached to something else.
1c79356b 2297 */
1c79356b 2298 if (bp != d->bd_bif) {
813fb2f6 2299 if (d->bd_bif != NULL) {
0a7de745
A
2300 if (bpf_detachd(d, 0) != 0) {
2301 return ENXIO;
2302 }
2303 }
2304 if (bpf_attachd(d, bp) != 0) {
2305 return ENXIO;
2d21ac55 2306 }
1c79356b 2307 }
d9a64523 2308 if (do_reset) {
0a7de745 2309 reset_d(d);
d9a64523 2310 }
0a7de745 2311 return 0;
1c79356b
A
2312 }
2313 /* Not found. */
0a7de745 2314 return ENXIO;
1c79356b
A
2315}
2316
2d21ac55
A
2317/*
2318 * Get a list of available data link type of the interface.
2319 */
2320static int
316670eb 2321bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2d21ac55 2322{
0a7de745
A
2323 u_int n;
2324 int error;
2325 struct ifnet *ifp;
2326 struct bpf_if *bp;
2327 user_addr_t dlist;
316670eb 2328 struct bpf_dltlist bfl;
b0d623f7 2329
0a7de745 2330 bcopy(addr, &bfl, sizeof(bfl));
b0d623f7 2331 if (proc_is64bit(p)) {
316670eb 2332 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
b0d623f7 2333 } else {
316670eb 2334 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2d21ac55 2335 }
b0d623f7 2336
2d21ac55
A
2337 ifp = d->bd_bif->bif_ifp;
2338 n = 0;
2339 error = 0;
fe8ab488 2340
2d21ac55 2341 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
0a7de745 2342 if (bp->bif_ifp != ifp) {
2d21ac55 2343 continue;
0a7de745 2344 }
d9a64523 2345 /*
5ba3f43e 2346 * Do not use DLT_PKTAP, unless requested explicitly
fe8ab488 2347 */
0a7de745 2348 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
fe8ab488 2349 continue;
0a7de745 2350 }
b0d623f7 2351 if (dlist != USER_ADDR_NULL) {
316670eb 2352 if (n >= bfl.bfl_len) {
0a7de745 2353 return ENOMEM;
2d21ac55 2354 }
b0d623f7 2355 error = copyout(&bp->bif_dlt, dlist,
0a7de745
A
2356 sizeof(bp->bif_dlt));
2357 if (error != 0) {
316670eb 2358 break;
0a7de745
A
2359 }
2360 dlist += sizeof(bp->bif_dlt);
2d21ac55
A
2361 }
2362 n++;
2363 }
316670eb 2364 bfl.bfl_len = n;
0a7de745 2365 bcopy(&bfl, addr, sizeof(bfl));
316670eb 2366
0a7de745 2367 return error;
2d21ac55
A
2368}
2369
2370/*
2371 * Set the data link type of a BPF instance.
2372 */
2373static int
3e170ce0 2374bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2d21ac55
A
2375{
2376 int error, opromisc;
2377 struct ifnet *ifp;
2378 struct bpf_if *bp;
d9a64523 2379
0a7de745
A
2380 if (d->bd_bif->bif_dlt == dlt) {
2381 return 0;
2382 }
d9a64523 2383
0a7de745 2384 while (d->bd_hbuf_read != 0) {
39236c6e 2385 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745 2386 }
39236c6e 2387
0a7de745
A
2388 if ((d->bd_flags & BPF_CLOSING) != 0) {
2389 return ENXIO;
2390 }
fe8ab488 2391
2d21ac55
A
2392 ifp = d->bd_bif->bif_ifp;
2393 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
5ba3f43e
A
2394 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2395 /*
2396 * Do not use DLT_PKTAP, unless requested explicitly
2397 */
d9a64523
A
2398 if (bp->bif_dlt == DLT_PKTAP &&
2399 !(d->bd_flags & BPF_WANT_PKTAP)) {
5ba3f43e
A
2400 continue;
2401 }
2d21ac55 2402 break;
5ba3f43e 2403 }
2d21ac55
A
2404 }
2405 if (bp != NULL) {
2406 opromisc = d->bd_promisc;
0a7de745
A
2407 if (bpf_detachd(d, 0) != 0) {
2408 return ENXIO;
2409 }
2d21ac55
A
2410 error = bpf_attachd(d, bp);
2411 if (error) {
2412 printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
d9a64523
A
2413 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp),
2414 error);
0a7de745 2415 return error;
2d21ac55
A
2416 }
2417 reset_d(d);
2418 if (opromisc) {
2419 lck_mtx_unlock(bpf_mlock);
2420 error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2421 lck_mtx_lock(bpf_mlock);
3e170ce0
A
2422 if (error) {
2423 printf("%s: ifpromisc %s%d failed (%d)\n",
2424 __func__, ifnet_name(bp->bif_ifp),
2425 ifnet_unit(bp->bif_ifp), error);
2426 } else {
2d21ac55 2427 d->bd_promisc = 1;
3e170ce0 2428 }
2d21ac55
A
2429 }
2430 }
0a7de745 2431 return bp == NULL ? EINVAL : 0;
2d21ac55
A
2432}
2433
316670eb
A
2434static int
2435bpf_set_traffic_class(struct bpf_d *d, int tc)
2436{
2437 int error = 0;
2438
0a7de745 2439 if (!SO_VALID_TC(tc)) {
316670eb 2440 error = EINVAL;
0a7de745 2441 } else {
316670eb 2442 d->bd_traffic_class = tc;
0a7de745 2443 }
316670eb 2444
0a7de745 2445 return error;
316670eb
A
2446}
2447
2448static void
2449bpf_set_packet_service_class(struct mbuf *m, int tc)
2450{
0a7de745 2451 if (!(m->m_flags & M_PKTHDR)) {
316670eb 2452 return;
0a7de745 2453 }
316670eb
A
2454
2455 VERIFY(SO_VALID_TC(tc));
2456 (void) m_set_service_class(m, so_tc2msc(tc));
2457}
2458
1c79356b 2459/*
b0d623f7 2460 * Support for select()
1c79356b
A
2461 *
2462 * Return true iff the specific operation will not block indefinitely.
2463 * Otherwise, return false but make a note that a selwakeup() must be done.
2464 */
2465int
6d2010ae 2466bpfselect(dev_t dev, int which, void * wql, struct proc *p)
1c79356b 2467{
2d21ac55 2468 struct bpf_d *d;
6d2010ae 2469 int ret = 0;
1c79356b 2470
2d21ac55
A
2471 lck_mtx_lock(bpf_mlock);
2472
55e303ae 2473 d = bpf_dtab[minor(dev)];
d9a64523
A
2474 if (d == NULL || d == BPF_DEV_RESERVED ||
2475 (d->bd_flags & BPF_CLOSING) != 0) {
2d21ac55 2476 lck_mtx_unlock(bpf_mlock);
0a7de745 2477 return ENXIO;
2d21ac55 2478 }
55e303ae 2479
3e170ce0
A
2480 bpf_acquire_d(d);
2481
9bccf70c 2482 if (d->bd_bif == NULL) {
3e170ce0 2483 bpf_release_d(d);
91447636 2484 lck_mtx_unlock(bpf_mlock);
0a7de745 2485 return ENXIO;
9bccf70c
A
2486 }
2487
0a7de745 2488 while (d->bd_hbuf_read != 0) {
39236c6e 2489 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
0a7de745 2490 }
3e170ce0
A
2491
2492 if ((d->bd_flags & BPF_CLOSING) != 0) {
2493 bpf_release_d(d);
39236c6e 2494 lck_mtx_unlock(bpf_mlock);
0a7de745 2495 return ENXIO;
39236c6e
A
2496 }
2497
6d2010ae 2498 switch (which) {
0a7de745
A
2499 case FREAD:
2500 if (d->bd_hlen != 0 ||
2501 ((d->bd_immediate ||
2502 d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2503 ret = 1; /* read has data to return */
2504 } else {
2505 /*
2506 * Read has no data to return.
2507 * Make the select wait, and start a timer if
2508 * necessary.
2509 */
2510 selrecord(p, &d->bd_sel, wql);
2511 bpf_start_timer(d);
2512 }
2513 break;
6d2010ae 2514
0a7de745
A
2515 case FWRITE:
2516 /* can't determine whether a write would block */
2517 ret = 1;
2518 break;
9bccf70c 2519 }
91447636 2520
3e170ce0 2521 bpf_release_d(d);
91447636 2522 lck_mtx_unlock(bpf_mlock);
3e170ce0 2523
0a7de745 2524 return ret;
1c79356b
A
2525}
2526
b0d623f7
A
2527/*
2528 * Support for kevent() system call. Register EVFILT_READ filters and
2529 * reject all others.
2530 */
2531int bpfkqfilter(dev_t dev, struct knote *kn);
2532static void filt_bpfdetach(struct knote *);
2533static int filt_bpfread(struct knote *, long);
cb323159
A
2534static int filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev);
2535static int filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev);
b0d623f7 2536
5ba3f43e 2537SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
d9a64523 2538 .f_isfd = 1,
b0d623f7
A
2539 .f_detach = filt_bpfdetach,
2540 .f_event = filt_bpfread,
39037602
A
2541 .f_touch = filt_bpftouch,
2542 .f_process = filt_bpfprocess,
b0d623f7
A
2543};
2544
b0d623f7 2545static int
cb323159 2546filt_bpfread_common(struct knote *kn, struct kevent_qos_s *kev, struct bpf_d *d)
b0d623f7 2547{
b0d623f7 2548 int ready = 0;
cb323159 2549 int64_t data = 0;
b0d623f7 2550
b0d623f7 2551 if (d->bd_immediate) {
6d2010ae 2552 /*
d9a64523 2553 * If there's data in the hold buffer, it's the
6d2010ae
A
2554 * amount of data a read will return.
2555 *
2556 * If there's no data in the hold buffer, but
2557 * there's data in the store buffer, a read will
d9a64523 2558 * immediately rotate the store buffer to the
6d2010ae 2559 * hold buffer, the amount of data in the store
d9a64523 2560 * buffer is the amount of data a read will
6d2010ae
A
2561 * return.
2562 *
d9a64523 2563 * If there's no data in either buffer, we're not
6d2010ae
A
2564 * ready to read.
2565 */
cb323159 2566 data = (d->bd_hlen == 0 || d->bd_hbuf_read != 0 ?
d9a64523 2567 d->bd_slen : d->bd_hlen);
cb323159
A
2568 int64_t lowwat = knote_low_watermark(kn);
2569 if (lowwat > d->bd_bufsize) {
2570 lowwat = d->bd_bufsize;
6d2010ae 2571 }
cb323159 2572 ready = (data >= lowwat);
b0d623f7 2573 } else {
6d2010ae 2574 /*
d9a64523 2575 * If there's data in the hold buffer, it's the
6d2010ae
A
2576 * amount of data a read will return.
2577 *
d9a64523
A
2578 * If there's no data in the hold buffer, but
2579 * there's data in the store buffer, if the
6d2010ae
A
2580 * timer has expired a read will immediately
2581 * rotate the store buffer to the hold buffer,
d9a64523 2582 * so the amount of data in the store buffer is
6d2010ae
A
2583 * the amount of data a read will return.
2584 *
d9a64523
A
2585 * If there's no data in either buffer, or there's
2586 * no data in the hold buffer and the timer hasn't
6d2010ae
A
2587 * expired, we're not ready to read.
2588 */
cb323159 2589 data = ((d->bd_hlen == 0 || d->bd_hbuf_read != 0) &&
d9a64523 2590 d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
cb323159 2591 ready = (data > 0);
b0d623f7 2592 }
0a7de745 2593 if (!ready) {
6d2010ae 2594 bpf_start_timer(d);
cb323159
A
2595 } else if (kev) {
2596 knote_fill_kevent(kn, kev, data);
0a7de745 2597 }
b0d623f7 2598
0a7de745 2599 return ready;
b0d623f7
A
2600}
2601
39037602
A
2602int
2603bpfkqfilter(dev_t dev, struct knote *kn)
2604{
2605 struct bpf_d *d;
2606 int res;
2607
2608 /*
2609 * Is this device a bpf?
2610 */
cb323159
A
2611 if (major(dev) != CDEV_MAJOR || kn->kn_filter != EVFILT_READ) {
2612 knote_set_error(kn, EINVAL);
0a7de745 2613 return 0;
39037602
A
2614 }
2615
2616 lck_mtx_lock(bpf_mlock);
2617
2618 d = bpf_dtab[minor(dev)];
2619
d9a64523
A
2620 if (d == NULL || d == BPF_DEV_RESERVED ||
2621 (d->bd_flags & BPF_CLOSING) != 0 ||
2622 d->bd_bif == NULL) {
39037602 2623 lck_mtx_unlock(bpf_mlock);
cb323159 2624 knote_set_error(kn, ENXIO);
0a7de745 2625 return 0;
39037602
A
2626 }
2627
2628 kn->kn_hook = d;
2629 kn->kn_filtid = EVFILTID_BPFREAD;
2630 KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2631 d->bd_flags |= BPF_KNOTE;
2632
2633 /* capture the current state */
cb323159 2634 res = filt_bpfread_common(kn, NULL, d);
39037602
A
2635
2636 lck_mtx_unlock(bpf_mlock);
2637
0a7de745 2638 return res;
39037602
A
2639}
2640
2641static void
2642filt_bpfdetach(struct knote *kn)
2643{
2644 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2645
2646 lck_mtx_lock(bpf_mlock);
2647 if (d->bd_flags & BPF_KNOTE) {
2648 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2649 d->bd_flags &= ~BPF_KNOTE;
2650 }
2651 lck_mtx_unlock(bpf_mlock);
2652}
2653
2654static int
2655filt_bpfread(struct knote *kn, long hint)
2656{
2657#pragma unused(hint)
2658 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2659
cb323159 2660 return filt_bpfread_common(kn, NULL, d);
39037602
A
2661}
2662
2663static int
cb323159 2664filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev)
39037602
A
2665{
2666 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2667 int res;
2668
2669 lck_mtx_lock(bpf_mlock);
2670
2671 /* save off the lowat threshold and flag */
2672 kn->kn_sdata = kev->data;
2673 kn->kn_sfflags = kev->fflags;
39037602
A
2674
2675 /* output data will be re-generated here */
cb323159 2676 res = filt_bpfread_common(kn, NULL, d);
39037602
A
2677
2678 lck_mtx_unlock(bpf_mlock);
2679
0a7de745 2680 return res;
39037602
A
2681}
2682
2683static int
cb323159 2684filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev)
39037602 2685{
39037602
A
2686 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2687 int res;
2688
2689 lck_mtx_lock(bpf_mlock);
cb323159 2690 res = filt_bpfread_common(kn, kev, d);
39037602
A
2691 lck_mtx_unlock(bpf_mlock);
2692
0a7de745 2693 return res;
39037602
A
2694}
2695
1c79356b 2696/*
d9a64523 2697 * Copy data from an mbuf chain into a buffer. This code is derived
5ba3f43e 2698 * from m_copydata in kern/uipc_mbuf.c.
1c79356b
A
2699 */
2700static void
5ba3f43e 2701bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
1c79356b 2702{
91447636 2703 u_int count;
1c79356b
A
2704 u_char *dst;
2705
1c79356b
A
2706 dst = dst_arg;
2707 while (len > 0) {
0a7de745 2708 if (m == 0) {
1c79356b 2709 panic("bpf_mcopy");
0a7de745 2710 }
1c79356b 2711 count = min(m->m_len, len);
2d21ac55 2712 bcopy(mbuf_data(m), dst, count);
1c79356b
A
2713 m = m->m_next;
2714 dst += count;
2715 len -= count;
2716 }
2717}
2718
2d21ac55
A
2719static inline void
2720bpf_tap_imp(
0a7de745
A
2721 ifnet_t ifp,
2722 u_int32_t dlt,
5ba3f43e 2723 struct bpf_packet *bpf_pkt,
0a7de745 2724 int outbound)
1c79356b 2725{
0a7de745 2726 struct bpf_d *d;
5ba3f43e 2727 u_int slen;
91447636 2728 struct bpf_if *bp;
1c79356b 2729
2d21ac55
A
2730 /*
2731 * It's possible that we get here after the bpf descriptor has been
2732 * detached from the interface; in such a case we simply return.
2733 * Lock ordering is important since we can be called asynchronously
5ba3f43e 2734 * (from IOKit) to process an inbound packet; when that happens
2d21ac55
A
2735 * we would have been holding its "gateLock" and will be acquiring
2736 * "bpf_mlock" upon entering this routine. Due to that, we release
2737 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2738 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2739 * when a ifnet_set_promiscuous request simultaneously collides with
2740 * an inbound packet being passed into the tap callback.
2741 */
91447636 2742 lck_mtx_lock(bpf_mlock);
2d21ac55
A
2743 if (ifp->if_bpf == NULL) {
2744 lck_mtx_unlock(bpf_mlock);
2745 return;
2746 }
5ba3f43e
A
2747 for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2748 if (bp->bif_ifp != ifp) {
2749 /* wrong interface */
2750 bp = NULL;
2751 break;
2d21ac55 2752 }
5ba3f43e
A
2753 if (dlt == 0 || bp->bif_dlt == dlt) {
2754 /* tapping default DLT or DLT matches */
2755 break;
2756 }
2757 }
2758 if (bp == NULL) {
2759 goto done;
2760 }
2761 for (d = bp->bif_dlist; d; d = d->bd_next) {
d9a64523
A
2762 struct bpf_packet *bpf_pkt_saved = bpf_pkt;
2763 struct bpf_packet bpf_pkt_tmp;
2764 struct pktap_header_buffer bpfp_header_tmp;
2765
0a7de745 2766 if (outbound && !d->bd_seesent) {
5ba3f43e 2767 continue;
0a7de745 2768 }
d9a64523 2769
5ba3f43e
A
2770 ++d->bd_rcount;
2771 slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
d9a64523
A
2772 bpf_pkt->bpfp_total_length, 0);
2773 if (bp->bif_ifp->if_type == IFT_PKTAP &&
2774 bp->bif_dlt == DLT_PKTAP) {
2775 /*
2776 * Need to copy the bpf_pkt because the conversion
2777 * to v2 pktap header modifies the content of the
2778 * bpfp_header
2779 */
2780 if ((d->bd_flags & BPF_PKTHDRV2) &&
2781 bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
2782 bpf_pkt_tmp = *bpf_pkt;
2783
2784 bpf_pkt = &bpf_pkt_tmp;
2785
2786 memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
2787 bpf_pkt->bpfp_header_length);
2788
2789 bpf_pkt->bpfp_header = &bpfp_header_tmp;
2790
2791 convert_to_pktap_header_to_v2(bpf_pkt,
2792 !!(d->bd_flags & BPF_TRUNCATE));
2793 }
2794
0a7de745 2795 if (d->bd_flags & BPF_TRUNCATE) {
d9a64523
A
2796 slen = min(slen,
2797 get_pkt_trunc_len((u_char *)bpf_pkt,
0a7de745
A
2798 bpf_pkt->bpfp_total_length));
2799 }
d9a64523 2800 }
5ba3f43e 2801 if (slen != 0) {
2d21ac55 2802#if CONFIG_MACF_NET
0a7de745 2803 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0) {
5ba3f43e 2804 continue;
0a7de745 2805 }
2d21ac55 2806#endif
5ba3f43e 2807 catchpacket(d, bpf_pkt, slen, outbound);
91447636 2808 }
d9a64523 2809 bpf_pkt = bpf_pkt_saved;
1c79356b 2810 }
5ba3f43e 2811
d9a64523 2812done:
91447636 2813 lck_mtx_unlock(bpf_mlock);
1c79356b
A
2814}
2815
5ba3f43e
A
2816static inline void
2817bpf_tap_mbuf(
0a7de745
A
2818 ifnet_t ifp,
2819 u_int32_t dlt,
2820 mbuf_t m,
2821 void* hdr,
2822 size_t hlen,
2823 int outbound)
5ba3f43e
A
2824{
2825 struct bpf_packet bpf_pkt;
2826 struct mbuf *m0;
2827
2828 if (ifp->if_bpf == NULL) {
2829 /* quickly check without taking lock */
2830 return;
2831 }
2832 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2833 bpf_pkt.bpfp_mbuf = m;
2834 bpf_pkt.bpfp_total_length = 0;
0a7de745 2835 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
5ba3f43e 2836 bpf_pkt.bpfp_total_length += m0->m_len;
0a7de745 2837 }
5ba3f43e
A
2838 bpf_pkt.bpfp_header = hdr;
2839 if (hdr != NULL) {
2840 bpf_pkt.bpfp_total_length += hlen;
2841 bpf_pkt.bpfp_header_length = hlen;
2842 } else {
2843 bpf_pkt.bpfp_header_length = 0;
2844 }
2845 bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2846}
2847
2d21ac55
A
2848void
2849bpf_tap_out(
0a7de745
A
2850 ifnet_t ifp,
2851 u_int32_t dlt,
2852 mbuf_t m,
2853 void* hdr,
2854 size_t hlen)
2d21ac55 2855{
5ba3f43e 2856 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
2d21ac55
A
2857}
2858
2859void
2860bpf_tap_in(
0a7de745
A
2861 ifnet_t ifp,
2862 u_int32_t dlt,
2863 mbuf_t m,
2864 void* hdr,
2865 size_t hlen)
2d21ac55 2866{
5ba3f43e 2867 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
2d21ac55
A
2868}
2869
2870/* Callback registered with Ethernet driver. */
0a7de745
A
2871static int
2872bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2d21ac55 2873{
5ba3f43e 2874 bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
d9a64523 2875
0a7de745 2876 return 0;
2d21ac55
A
2877}
2878
5ba3f43e 2879
d9a64523
A
2880static errno_t
2881bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
2882{
2883 errno_t err = 0;
2884 if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
2885 err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
2886 } else {
2887 err = EINVAL;
2888 }
2889
0a7de745 2890 return err;
d9a64523
A
2891}
2892
5ba3f43e
A
2893static void
2894copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
2895{
2896 /* copy the optional header */
2897 if (pkt->bpfp_header_length != 0) {
0a7de745 2898 size_t count = min(len, pkt->bpfp_header_length);
5ba3f43e
A
2899 bcopy(pkt->bpfp_header, dst, count);
2900 len -= count;
2901 dst += count;
2902 }
2903 if (len == 0) {
2904 /* nothing past the header */
2905 return;
2906 }
2907 /* copy the packet */
2908 switch (pkt->bpfp_type) {
2909 case BPF_PACKET_TYPE_MBUF:
2910 bpf_mcopy(pkt->bpfp_mbuf, dst, len);
2911 break;
2912 default:
2913 break;
2914 }
2915}
2916
d9a64523
A
2917static uint16_t
2918get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
2919 const uint16_t remaining_caplen)
2920{
2921 /*
2922 * For some reason tcpdump expects to have one byte beyond the ESP header
2923 */
2924 uint16_t trunc_len = ESP_HDR_SIZE + 1;
2925
0a7de745
A
2926 if (trunc_len > remaining_caplen) {
2927 return remaining_caplen;
2928 }
d9a64523 2929
0a7de745 2930 return trunc_len;
d9a64523
A
2931}
2932
2933static uint16_t
2934get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
2935 const uint16_t remaining_caplen)
2936{
2937 /*
2938 * Include the payload generic header
2939 */
2940 uint16_t trunc_len = ISAKMP_HDR_SIZE;
2941
0a7de745
A
2942 if (trunc_len > remaining_caplen) {
2943 return remaining_caplen;
2944 }
d9a64523 2945
0a7de745 2946 return trunc_len;
d9a64523
A
2947}
2948
2949static uint16_t
2950get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint16_t off,
2951 const uint16_t remaining_caplen)
2952{
2953 int err = 0;
2954 uint16_t trunc_len = 0;
2955 char payload[remaining_caplen];
2956
2957 err = bpf_copydata(pkt, off, remaining_caplen, payload);
0a7de745
A
2958 if (err != 0) {
2959 return remaining_caplen;
2960 }
d9a64523
A
2961 /*
2962 * They are three cases:
2963 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
2964 * - keep alive: 1 byte payload
2965 * - otherwise it's ESP
2966 */
2967 if (remaining_caplen >= 4 &&
0a7de745
A
2968 payload[0] == 0 && payload[1] == 0 &&
2969 payload[2] == 0 && payload[3] == 0) {
d9a64523
A
2970 trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
2971 } else if (remaining_caplen == 1) {
2972 trunc_len = 1;
2973 } else {
2974 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
2975 }
2976
0a7de745
A
2977 if (trunc_len > remaining_caplen) {
2978 return remaining_caplen;
2979 }
d9a64523 2980
0a7de745 2981 return trunc_len;
d9a64523
A
2982}
2983
2984static uint16_t
2985get_udp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
2986{
2987 int err = 0;
2988 uint16_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
2989
0a7de745
A
2990 if (trunc_len >= remaining_caplen) {
2991 return remaining_caplen;
2992 }
d9a64523
A
2993
2994 struct udphdr udphdr;
2995 err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
0a7de745
A
2996 if (err != 0) {
2997 return remaining_caplen;
2998 }
d9a64523
A
2999
3000 u_short sport, dport;
3001
3002 sport = EXTRACT_SHORT(&udphdr.uh_sport);
3003 dport = EXTRACT_SHORT(&udphdr.uh_dport);
3004
3005 if (dport == PORT_DNS || sport == PORT_DNS) {
3006 /*
3007 * Full UDP payload for DNS
3008 */
3009 trunc_len = remaining_caplen;
3010 } else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
0a7de745 3011 (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
d9a64523
A
3012 /*
3013 * Full UDP payload for BOOTP and DHCP
3014 */
3015 trunc_len = remaining_caplen;
3016 } else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
3017 /*
3018 * Return the ISAKMP header
3019 */
3020 trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
3021 remaining_caplen - sizeof(struct udphdr));
3022 } else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3023 trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
3024 remaining_caplen - sizeof(struct udphdr));
3025 }
0a7de745
A
3026 if (trunc_len >= remaining_caplen) {
3027 return remaining_caplen;
3028 }
d9a64523 3029
0a7de745 3030 return trunc_len;
d9a64523
A
3031}
3032
3033static uint16_t
3034get_tcp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3035{
3036 int err = 0;
3037 uint16_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
0a7de745
A
3038 if (trunc_len >= remaining_caplen) {
3039 return remaining_caplen;
3040 }
d9a64523
A
3041
3042 struct tcphdr tcphdr;
3043 err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
0a7de745
A
3044 if (err != 0) {
3045 return remaining_caplen;
3046 }
d9a64523
A
3047
3048 u_short sport, dport;
3049 sport = EXTRACT_SHORT(&tcphdr.th_sport);
3050 dport = EXTRACT_SHORT(&tcphdr.th_dport);
3051
3052 if (dport == PORT_DNS || sport == PORT_DNS) {
3053 /*
3054 * Full TCP payload for DNS
3055 */
3056 trunc_len = remaining_caplen;
3057 } else {
3058 trunc_len = tcphdr.th_off << 2;
3059 }
0a7de745
A
3060 if (trunc_len >= remaining_caplen) {
3061 return remaining_caplen;
3062 }
d9a64523 3063
0a7de745 3064 return trunc_len;
d9a64523
A
3065}
3066
3067static uint16_t
3068get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3069{
3070 uint16_t trunc_len;
3071
3072 switch (proto) {
3073 case IPPROTO_ICMP: {
3074 /*
3075 * Full IMCP payload
3076 */
3077 trunc_len = remaining_caplen;
3078 break;
3079 }
3080 case IPPROTO_ICMPV6: {
3081 /*
3082 * Full IMCPV6 payload
3083 */
3084 trunc_len = remaining_caplen;
3085 break;
3086 }
3087 case IPPROTO_IGMP: {
3088 /*
3089 * Full IGMP payload
3090 */
3091 trunc_len = remaining_caplen;
3092 break;
3093 }
3094 case IPPROTO_UDP: {
3095 trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3096 break;
3097 }
3098 case IPPROTO_TCP: {
3099 trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3100 break;
3101 }
3102 case IPPROTO_ESP: {
3103 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3104 break;
3105 }
3106 default: {
3107 /*
3108 * By default we only include the IP header
3109 */
3110 trunc_len = 0;
3111 break;
3112 }
3113 }
0a7de745
A
3114 if (trunc_len >= remaining_caplen) {
3115 return remaining_caplen;
3116 }
d9a64523 3117
0a7de745 3118 return trunc_len;
d9a64523
A
3119}
3120
3121static uint16_t
3122get_ip_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3123{
3124 int err = 0;
3125 uint16_t iplen = sizeof(struct ip);
0a7de745
A
3126 if (iplen >= remaining_caplen) {
3127 return remaining_caplen;
3128 }
d9a64523
A
3129
3130 struct ip iphdr;
3131 err = bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
0a7de745
A
3132 if (err != 0) {
3133 return remaining_caplen;
3134 }
d9a64523
A
3135
3136 uint8_t proto = 0;
3137
3138 iplen = iphdr.ip_hl << 2;
0a7de745
A
3139 if (iplen >= remaining_caplen) {
3140 return remaining_caplen;
3141 }
d9a64523
A
3142
3143 proto = iphdr.ip_p;
3144 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3145
0a7de745
A
3146 if (iplen >= remaining_caplen) {
3147 return remaining_caplen;
3148 }
d9a64523 3149
0a7de745 3150 return iplen;
d9a64523
A
3151}
3152
3153static uint16_t
3154get_ip6_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3155{
3156 int err = 0;
3157 uint16_t iplen = sizeof(struct ip6_hdr);
0a7de745
A
3158 if (iplen >= remaining_caplen) {
3159 return remaining_caplen;
3160 }
d9a64523
A
3161
3162 struct ip6_hdr ip6hdr;
3163 err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
0a7de745
A
3164 if (err != 0) {
3165 return remaining_caplen;
3166 }
d9a64523
A
3167
3168 uint8_t proto = 0;
3169
3170 /*
3171 * TBD: process the extension headers
3172 */
3173 proto = ip6hdr.ip6_nxt;
3174 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3175
0a7de745
A
3176 if (iplen >= remaining_caplen) {
3177 return remaining_caplen;
3178 }
d9a64523 3179
0a7de745 3180 return iplen;
d9a64523
A
3181}
3182
3183static uint16_t
3184get_ether_trunc_len(struct bpf_packet *pkt, int off, const uint16_t remaining_caplen)
3185{
3186 int err = 0;
3187 uint16_t ethlen = sizeof(struct ether_header);
0a7de745
A
3188 if (ethlen >= remaining_caplen) {
3189 return remaining_caplen;
3190 }
d9a64523
A
3191
3192 struct ether_header eh;
3193 u_short type;
3194 err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
0a7de745
A
3195 if (err != 0) {
3196 return remaining_caplen;
3197 }
d9a64523
A
3198
3199 type = EXTRACT_SHORT(&eh.ether_type);
3200 /* Include full ARP */
3201 if (type == ETHERTYPE_ARP) {
3202 ethlen = remaining_caplen;
3203 } else if (type != ETHERTYPE_IP && type != ETHERTYPE_IPV6) {
3204 ethlen = min(BPF_MIN_PKT_SIZE, remaining_caplen);
3205 } else {
3206 if (type == ETHERTYPE_IP) {
3207 ethlen += get_ip_trunc_len(pkt, sizeof(struct ether_header),
3208 remaining_caplen);
3209 } else if (type == ETHERTYPE_IPV6) {
3210 ethlen += get_ip6_trunc_len(pkt, sizeof(struct ether_header),
0a7de745 3211 remaining_caplen);
d9a64523
A
3212 }
3213 }
0a7de745 3214 return ethlen;
d9a64523
A
3215}
3216
3217static uint32_t
3218get_pkt_trunc_len(u_char *p, u_int len)
3219{
3220 struct bpf_packet *pkt = (struct bpf_packet *)(void *) p;
3221 struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3222 uint32_t out_pkt_len = 0, tlen = 0;
3223 /*
3224 * pktap->pth_frame_pre_length is L2 header length and accounts
3225 * for both pre and pre_adjust.
3226 * pktap->pth_length is sizeof(pktap_header) (excl the pre/pre_adjust)
3227 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3228 * pre is the offset to the L3 header after the bpfp_header, or length
3229 * of L2 header after bpfp_header, if present.
0a7de745 3230 */
cb323159 3231 int32_t pre = pktap->pth_frame_pre_length -
d9a64523
A
3232 (pkt->bpfp_header_length - pktap->pth_length);
3233
3234 /* Length of the input packet starting from L3 header */
3235 uint32_t in_pkt_len = len - pkt->bpfp_header_length - pre;
3236 if (pktap->pth_protocol_family == AF_INET ||
3237 pktap->pth_protocol_family == AF_INET6) {
3238 /* Contains L2 header */
3239 if (pre > 0) {
cb323159 3240 if (pre < (int32_t)sizeof(struct ether_header)) {
d9a64523 3241 goto too_short;
0a7de745 3242 }
d9a64523
A
3243
3244 out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3245 } else if (pre == 0) {
3246 if (pktap->pth_protocol_family == AF_INET) {
3247 out_pkt_len = get_ip_trunc_len(pkt, pre, in_pkt_len);
3248 } else if (pktap->pth_protocol_family == AF_INET6) {
3249 out_pkt_len = get_ip6_trunc_len(pkt, pre, in_pkt_len);
3250 }
3251 } else {
3252 /* Ideally pre should be >= 0. This is an exception */
3253 out_pkt_len = min(BPF_MIN_PKT_SIZE, in_pkt_len);
3254 }
3255 } else {
3256 if (pktap->pth_iftype == IFT_ETHER) {
3257 if (in_pkt_len < sizeof(struct ether_header)) {
3258 goto too_short;
3259 }
3260 /* At most include the Ethernet header and 16 bytes */
3261 out_pkt_len = MIN(sizeof(struct ether_header) + 16,
3262 in_pkt_len);
3263 } else {
3264 /*
3265 * For unknown protocols include at most 16 bytes
3266 */
3267 out_pkt_len = MIN(16, in_pkt_len);
3268 }
3269 }
3270done:
3271 tlen = pkt->bpfp_header_length + out_pkt_len + pre;
0a7de745 3272 return tlen;
d9a64523
A
3273too_short:
3274 out_pkt_len = in_pkt_len;
3275 goto done;
3276}
3277
1c79356b
A
3278/*
3279 * Move the packet data from interface memory (pkt) into the
3280 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
5ba3f43e 3281 * otherwise 0.
1c79356b
A
3282 */
3283static void
5ba3f43e 3284catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
0a7de745 3285 u_int snaplen, int outbound)
1c79356b 3286{
2d21ac55 3287 struct bpf_hdr *hp;
316670eb 3288 struct bpf_hdr_ext *ehp;
2d21ac55 3289 int totlen, curlen;
316670eb 3290 int hdrlen, caplen;
6d2010ae 3291 int do_wakeup = 0;
316670eb 3292 u_char *payload;
39236c6e 3293 struct timeval tv;
316670eb 3294
fe8ab488 3295 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
316670eb 3296 d->bd_bif->bif_hdrlen;
1c79356b
A
3297 /*
3298 * Figure out how many bytes to move. If the packet is
3299 * greater or equal to the snapshot length, transfer that
3300 * much. Otherwise, transfer the whole packet (unless
3301 * we hit the buffer size limit).
3302 */
5ba3f43e 3303 totlen = hdrlen + min(snaplen, pkt->bpfp_total_length);
0a7de745 3304 if (totlen > d->bd_bufsize) {
1c79356b 3305 totlen = d->bd_bufsize;
0a7de745 3306 }
1c79356b 3307
0a7de745 3308 if (hdrlen > totlen) {
a39ff7e2 3309 return;
0a7de745 3310 }
a39ff7e2 3311
1c79356b
A
3312 /*
3313 * Round up the end of the previous packet to the next longword.
3314 */
3315 curlen = BPF_WORDALIGN(d->bd_slen);
3316 if (curlen + totlen > d->bd_bufsize) {
3317 /*
3318 * This packet will overflow the storage buffer.
3319 * Rotate the buffers if we can, then wakeup any
3320 * pending reads.
813fb2f6
A
3321 *
3322 * We cannot rotate buffers if a read is in progress
3323 * so drop the packet
1c79356b 3324 */
d9a64523 3325 if (d->bd_hbuf_read != 0) {
813fb2f6
A
3326 ++d->bd_dcount;
3327 return;
3328 }
d9a64523 3329
6d2010ae 3330 if (d->bd_fbuf == NULL) {
3e170ce0
A
3331 if (d->bd_headdrop == 0) {
3332 /*
3333 * We haven't completed the previous read yet,
3334 * so drop the packet.
3335 */
3336 ++d->bd_dcount;
3337 return;
3338 }
1c79356b 3339 /*
3e170ce0 3340 * Drop the hold buffer as it contains older packets
1c79356b 3341 */
3e170ce0
A
3342 d->bd_dcount += d->bd_hcnt;
3343 d->bd_fbuf = d->bd_hbuf;
3344 ROTATE_BUFFERS(d);
3345 } else {
3346 ROTATE_BUFFERS(d);
1c79356b 3347 }
6d2010ae 3348 do_wakeup = 1;
1c79356b 3349 curlen = 0;
0a7de745 3350 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
1c79356b 3351 /*
d9a64523
A
3352 * Immediate mode is set, or the read timeout has
3353 * already expired during a select call. A packet
6d2010ae 3354 * arrived, so the reader should be woken up.
1c79356b 3355 */
6d2010ae 3356 do_wakeup = 1;
0a7de745 3357 }
1c79356b
A
3358
3359 /*
3360 * Append the bpf header.
3361 */
b0d623f7 3362 microtime(&tv);
d9a64523 3363 if (d->bd_flags & BPF_EXTENDED_HDR) {
5ba3f43e
A
3364 struct mbuf *m;
3365
3366 m = (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF)
0a7de745 3367 ? pkt->bpfp_mbuf : NULL;
d9a64523
A
3368 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
3369 memset(ehp, 0, sizeof(*ehp));
3370 ehp->bh_tstamp.tv_sec = tv.tv_sec;
3371 ehp->bh_tstamp.tv_usec = tv.tv_usec;
5ba3f43e
A
3372
3373 ehp->bh_datalen = pkt->bpfp_total_length;
d9a64523 3374 ehp->bh_hdrlen = hdrlen;
5ba3f43e
A
3375 caplen = ehp->bh_caplen = totlen - hdrlen;
3376 if (m == NULL) {
3377 if (outbound) {
39236c6e 3378 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
5ba3f43e 3379 } else {
39236c6e 3380 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
5ba3f43e 3381 }
39236c6e 3382 } else if (outbound) {
5ba3f43e
A
3383 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3384
39236c6e 3385 /* only do lookups on non-raw INPCB */
0a7de745
A
3386 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
3387 PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
3388 (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
39236c6e
A
3389 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3390 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3391 ehp->bh_proto = m->m_pkthdr.pkt_proto;
3392 }
3393 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
0a7de745 3394 if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
39037602 3395 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
0a7de745
A
3396 }
3397 if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
39037602 3398 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
0a7de745
A
3399 }
3400 if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
39037602 3401 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
0a7de745 3402 }
39037602
A
3403 if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3404 ehp->bh_unsent_bytes =
3405 m->m_pkthdr.bufstatus_if;
3406 ehp->bh_unsent_snd =
3407 m->m_pkthdr.bufstatus_sndbuf;
3408 }
0a7de745 3409 } else {
316670eb 3410 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
0a7de745 3411 }
d9a64523
A
3412 payload = (u_char *)ehp + hdrlen;
3413 } else {
3414 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
3415 hp->bh_tstamp.tv_sec = tv.tv_sec;
3416 hp->bh_tstamp.tv_usec = tv.tv_usec;
5ba3f43e 3417 hp->bh_datalen = pkt->bpfp_total_length;
d9a64523 3418 hp->bh_hdrlen = hdrlen;
5ba3f43e 3419 caplen = hp->bh_caplen = totlen - hdrlen;
d9a64523
A
3420 payload = (u_char *)hp + hdrlen;
3421 }
1c79356b
A
3422 /*
3423 * Copy the packet data into the store buffer and update its length.
3424 */
5ba3f43e 3425 copy_bpf_packet(pkt, payload, caplen);
1c79356b 3426 d->bd_slen = curlen + totlen;
3e170ce0 3427 d->bd_scnt += 1;
6d2010ae 3428
0a7de745 3429 if (do_wakeup) {
6d2010ae 3430 bpf_wakeup(d);
0a7de745 3431 }
1c79356b
A
3432}
3433
3434/*
3435 * Initialize all nonzero fields of a descriptor.
3436 */
3437static int
91447636 3438bpf_allocbufs(struct bpf_d *d)
1c79356b 3439{
813fb2f6
A
3440 if (d->bd_sbuf != NULL) {
3441 FREE(d->bd_sbuf, M_DEVBUF);
3442 d->bd_sbuf = NULL;
3443 }
3444 if (d->bd_hbuf != NULL) {
3445 FREE(d->bd_hbuf, M_DEVBUF);
3446 d->bd_hbuf = NULL;
3447 }
3448 if (d->bd_fbuf != NULL) {
3449 FREE(d->bd_fbuf, M_DEVBUF);
3450 d->bd_fbuf = NULL;
3451 }
3452
1c79356b 3453 d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
0a7de745
A
3454 if (d->bd_fbuf == NULL) {
3455 return ENOBUFS;
3456 }
1c79356b
A
3457
3458 d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
813fb2f6 3459 if (d->bd_sbuf == NULL) {
1c79356b 3460 FREE(d->bd_fbuf, M_DEVBUF);
813fb2f6 3461 d->bd_fbuf = NULL;
0a7de745 3462 return ENOBUFS;
1c79356b
A
3463 }
3464 d->bd_slen = 0;
3465 d->bd_hlen = 0;
3e170ce0
A
3466 d->bd_scnt = 0;
3467 d->bd_hcnt = 0;
0a7de745 3468 return 0;
1c79356b
A
3469}
3470
3471/*
3472 * Free buffers currently in use by a descriptor.
3473 * Called on close.
3474 */
3475static void
91447636 3476bpf_freed(struct bpf_d *d)
1c79356b
A
3477{
3478 /*
3479 * We don't need to lock out interrupts since this descriptor has
3480 * been detached from its interface and it yet hasn't been marked
3481 * free.
3482 */
0a7de745 3483 if (d->bd_hbuf_read != 0) {
39236c6e 3484 panic("bpf buffer freed during read");
0a7de745 3485 }
39236c6e 3486
1c79356b
A
3487 if (d->bd_sbuf != 0) {
3488 FREE(d->bd_sbuf, M_DEVBUF);
0a7de745 3489 if (d->bd_hbuf != 0) {
1c79356b 3490 FREE(d->bd_hbuf, M_DEVBUF);
0a7de745
A
3491 }
3492 if (d->bd_fbuf != 0) {
1c79356b 3493 FREE(d->bd_fbuf, M_DEVBUF);
0a7de745
A
3494 }
3495 }
3496 if (d->bd_filter) {
3497 FREE(d->bd_filter, M_DEVBUF);
1c79356b 3498 }
1c79356b
A
3499}
3500
3501/*
d9a64523 3502 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
1c79356b
A
3503 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
3504 * size of the link header (variable length headers not yet supported).
3505 */
3506void
91447636 3507bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1c79356b 3508{
2d21ac55
A
3509 bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
3510}
3511
3512errno_t
3513bpf_attach(
0a7de745
A
3514 ifnet_t ifp,
3515 u_int32_t dlt,
3516 u_int32_t hdrlen,
3517 bpf_send_func send,
3518 bpf_tap_func tap)
2d21ac55 3519{
5ba3f43e 3520 struct bpf_if *bp;
2d21ac55 3521 struct bpf_if *bp_new;
5ba3f43e 3522 struct bpf_if *bp_before_first = NULL;
2d21ac55 3523 struct bpf_if *bp_first = NULL;
5ba3f43e
A
3524 struct bpf_if *bp_last = NULL;
3525 boolean_t found;
3526
3e170ce0
A
3527 bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF,
3528 M_WAIT | M_ZERO);
0a7de745 3529 if (bp_new == 0) {
1c79356b 3530 panic("bpfattach");
0a7de745 3531 }
1c79356b 3532
91447636
A
3533 lck_mtx_lock(bpf_mlock);
3534
2d21ac55 3535 /*
5ba3f43e
A
3536 * Check if this interface/dlt is already attached. Remember the
3537 * first and last attachment for this interface, as well as the
3538 * element before the first attachment.
2d21ac55 3539 */
5ba3f43e
A
3540 found = FALSE;
3541 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
3542 if (bp->bif_ifp != ifp) {
3543 if (bp_first != NULL) {
3544 /* no more elements for this interface */
3545 break;
3546 }
3547 bp_before_first = bp;
3548 } else {
3549 if (bp->bif_dlt == dlt) {
3550 found = TRUE;
3551 break;
3552 }
3553 if (bp_first == NULL) {
3554 bp_first = bp;
3555 }
3556 bp_last = bp;
3557 }
2d21ac55 3558 }
5ba3f43e
A
3559 if (found) {
3560 lck_mtx_unlock(bpf_mlock);
39236c6e 3561 printf("bpfattach - %s with dlt %d is already attached\n",
0a7de745 3562 if_name(ifp), dlt);
2d21ac55 3563 FREE(bp_new, M_DEVBUF);
0a7de745 3564 return EEXIST;
2d21ac55 3565 }
d9a64523 3566
2d21ac55
A
3567 bp_new->bif_ifp = ifp;
3568 bp_new->bif_dlt = dlt;
3569 bp_new->bif_send = send;
3570 bp_new->bif_tap = tap;
d9a64523 3571
2d21ac55
A
3572 if (bp_first == NULL) {
3573 /* No other entries for this ifp */
3574 bp_new->bif_next = bpf_iflist;
3575 bpf_iflist = bp_new;
d9a64523 3576 } else {
5ba3f43e
A
3577 if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
3578 /* Make this the first entry for this interface */
3579 if (bp_before_first != NULL) {
3580 /* point the previous to us */
3581 bp_before_first->bif_next = bp_new;
3582 } else {
3583 /* we're the new head */
3584 bpf_iflist = bp_new;
3585 }
3586 bp_new->bif_next = bp_first;
3587 } else {
3588 /* Add this after the last entry for this interface */
3589 bp_new->bif_next = bp_last->bif_next;
3590 bp_last->bif_next = bp_new;
3591 }
2d21ac55 3592 }
d9a64523 3593
1c79356b
A
3594 /*
3595 * Compute the length of the bpf header. This is not necessarily
3596 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
3597 * that the network layer header begins on a longword boundary (for
3598 * performance reasons and to alleviate alignment restrictions).
3599 */
2d21ac55 3600 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
316670eb
A
3601 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
3602 sizeof(struct bpf_hdr_ext)) - hdrlen;
d9a64523 3603
91447636 3604 /* Take a reference on the interface */
2d21ac55 3605 ifnet_reference(ifp);
91447636
A
3606
3607 lck_mtx_unlock(bpf_mlock);
1c79356b 3608
55e303ae 3609#ifndef __APPLE__
0a7de745 3610 if (bootverbose) {
39236c6e 3611 printf("bpf: %s attached\n", if_name(ifp));
0a7de745 3612 }
1c79356b 3613#endif
2d21ac55 3614
0a7de745 3615 return 0;
1c79356b
A
3616}
3617
9bccf70c
A
3618/*
3619 * Detach bpf from an interface. This involves detaching each descriptor
3620 * associated with the interface, and leaving bd_bif NULL. Notify each
3621 * descriptor as it's detached so that any sleepers wake up and get
3622 * ENXIO.
3623 */
3624void
91447636 3625bpfdetach(struct ifnet *ifp)
9bccf70c 3626{
0a7de745
A
3627 struct bpf_if *bp, *bp_prev, *bp_next;
3628 struct bpf_d *d;
9bccf70c 3629
0a7de745 3630 if (bpf_debug != 0) {
5ba3f43e 3631 printf("%s: %s\n", __func__, if_name(ifp));
0a7de745 3632 }
3e170ce0 3633
91447636 3634 lck_mtx_lock(bpf_mlock);
9bccf70c 3635
fe8ab488
A
3636 /*
3637 * Build the list of devices attached to that interface
3638 * that we need to free while keeping the lock to maintain
3639 * the integrity of the interface list
3640 */
9bccf70c 3641 bp_prev = NULL;
2d21ac55
A
3642 for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
3643 bp_next = bp->bif_next;
fe8ab488 3644
2d21ac55
A
3645 if (ifp != bp->bif_ifp) {
3646 bp_prev = bp;
3647 continue;
3648 }
fe8ab488 3649 /* Unlink from the interface list */
0a7de745 3650 if (bp_prev) {
fe8ab488 3651 bp_prev->bif_next = bp->bif_next;
0a7de745 3652 } else {
fe8ab488 3653 bpf_iflist = bp->bif_next;
0a7de745 3654 }
fe8ab488 3655
3e170ce0 3656 /* Detach the devices attached to the interface */
2d21ac55 3657 while ((d = bp->bif_dlist) != NULL) {
3e170ce0
A
3658 /*
3659 * Take an extra reference to prevent the device
3660 * from being freed when bpf_detachd() releases
3661 * the reference for the interface list
3662 */
3663 bpf_acquire_d(d);
3664 bpf_detachd(d, 0);
2d21ac55 3665 bpf_wakeup(d);
3e170ce0 3666 bpf_release_d(d);
2d21ac55 3667 }
2d21ac55 3668 ifnet_release(ifp);
9bccf70c
A
3669 }
3670
91447636 3671 lck_mtx_unlock(bpf_mlock);
9bccf70c
A
3672}
3673
1c79356b 3674void
91447636 3675bpf_init(__unused void *unused)
1c79356b 3676{
9bccf70c 3677#ifdef __APPLE__
0a7de745
A
3678 int i;
3679 int maj;
1c79356b 3680
91447636 3681 if (bpf_devsw_installed == 0) {
9bccf70c 3682 bpf_devsw_installed = 1;
39236c6e
A
3683 bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
3684 bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
3685 bpf_mlock_attr = lck_attr_alloc_init();
3686 lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
9bccf70c
A
3687 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
3688 if (maj == -1) {
0a7de745 3689 if (bpf_mlock_attr) {
91447636 3690 lck_attr_free(bpf_mlock_attr);
0a7de745
A
3691 }
3692 if (bpf_mlock_grp) {
91447636 3693 lck_grp_free(bpf_mlock_grp);
0a7de745
A
3694 }
3695 if (bpf_mlock_grp_attr) {
91447636 3696 lck_grp_attr_free(bpf_mlock_grp_attr);
0a7de745 3697 }
d9a64523 3698
2d21ac55
A
3699 bpf_mlock = NULL;
3700 bpf_mlock_attr = NULL;
3701 bpf_mlock_grp = NULL;
3702 bpf_mlock_grp_attr = NULL;
91447636 3703 bpf_devsw_installed = 0;
d9a64523 3704 printf("bpf_init: failed to allocate a major number\n");
55e303ae 3705 return;
9bccf70c 3706 }
91447636 3707
0a7de745 3708 for (i = 0; i < NBPFILTER; i++) {
55e303ae 3709 bpf_make_dev_t(maj);
0a7de745 3710 }
9bccf70c
A
3711 }
3712#else
3713 cdevsw_add(&bpf_cdevsw);
3714#endif
1c79356b
A
3715}
3716
9bccf70c 3717#ifndef __APPLE__
cb323159 3718SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE + CDEV_MAJOR, bpf_drvinit, NULL);
1c79356b 3719#endif
9bccf70c 3720
2d21ac55
A
3721#if CONFIG_MACF_NET
3722struct label *
3723mac_bpfdesc_label_get(struct bpf_d *d)
9bccf70c 3724{
0a7de745 3725 return d->bd_label;
9bccf70c
A
3726}
3727
3728void
2d21ac55 3729mac_bpfdesc_label_set(struct bpf_d *d, struct label *label)
9bccf70c 3730{
2d21ac55 3731 d->bd_label = label;
9bccf70c 3732}
2d21ac55 3733#endif
cb323159
A
3734
3735static int
3736sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
3737{
3738#pragma unused(arg1, arg2)
3739 int i, err;
3740
3741 i = bpf_maxbufsize;
3742
3743 err = sysctl_handle_int(oidp, &i, 0, req);
3744 if (err != 0 || req->newptr == USER_ADDR_NULL) {
3745 return err;
3746 }
3747
3748 if (i < 0 || i > BPF_MAXSIZE_CAP) {
3749 i = BPF_MAXSIZE_CAP;
3750 }
3751
3752 bpf_maxbufsize = i;
3753 return err;
3754}