git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2019 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* Copyright (c) 1990, 1991, 1993
	30	* The Regents of the University of California. All rights reserved.
	31	*
	32	* This code is derived from the Stanford/CMU enet packet filter,
	33	* (net/enet.c) distributed as part of 4.3BSD, and code contributed
	34	* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
	35	* Berkeley Laboratory.
	36	*
	37	* Redistribution and use in source and binary forms, with or without
	38	* modification, are permitted provided that the following conditions
	39	* are met:
	40	* 1. Redistributions of source code must retain the above copyright
	41	* notice, this list of conditions and the following disclaimer.
	42	* 2. Redistributions in binary form must reproduce the above copyright
	43	* notice, this list of conditions and the following disclaimer in the
	44	* documentation and/or other materials provided with the distribution.
	45	* 3. All advertising materials mentioning features or use of this software
	46	* must display the following acknowledgement:
	47	* This product includes software developed by the University of
	48	* California, Berkeley and its contributors.
	49	* 4. Neither the name of the University nor the names of its contributors
	50	* may be used to endorse or promote products derived from this software
	51	* without specific prior written permission.
	52	*
	53	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	54	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	55	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	56	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	57	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	58	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	59	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	60	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	61	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	62	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	63	* SUCH DAMAGE.
	64	*
	65	* @(#)bpf.c 8.2 (Berkeley) 3/28/94
	66	*
	67	* $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
	68	*/
	69	/*
	70	* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
	71	* support for mandatory and extensible security protections. This notice
	72	* is included in support of clause 2.2 (b) of the Apple Public License,
	73	* Version 2.0.
	74	*/
	75
	76	#include "bpf.h"
	77
	78	#ifndef __GNUC__
	79	#define inline
	80	#else
	81	#define inline __inline
	82	#endif
	83
	84	#include <sys/param.h>
	85	#include <sys/systm.h>
	86	#include <sys/conf.h>
	87	#include <sys/malloc.h>
	88	#include <sys/mbuf.h>
	89	#include <sys/time.h>
	90	#include <sys/proc.h>
	91	#include <sys/signalvar.h>
	92	#include <sys/filio.h>
	93	#include <sys/sockio.h>
	94	#include <sys/ttycom.h>
	95	#include <sys/filedesc.h>
	96	#include <sys/uio_internal.h>
	97	#include <sys/file_internal.h>
	98	#include <sys/event.h>
	99
	100	#include <sys/poll.h>
	101
	102	#include <sys/socket.h>
	103	#include <sys/socketvar.h>
	104	#include <sys/vnode.h>
	105
	106	#include <net/if.h>
	107	#include <net/bpf.h>
	108	#include <net/bpfdesc.h>
	109
	110	#include <netinet/in.h>
	111	#include <netinet/ip.h>
	112	#include <netinet/ip6.h>
	113	#include <netinet/in_pcb.h>
	114	#include <netinet/in_var.h>
	115	#include <netinet/ip_var.h>
	116	#include <netinet/tcp.h>
	117	#include <netinet/tcp_var.h>
	118	#include <netinet/udp.h>
	119	#include <netinet/udp_var.h>
	120	#include <netinet/if_ether.h>
	121	#include <netinet/isakmp.h>
	122	#include <netinet6/esp.h>
	123	#include <sys/kernel.h>
	124	#include <sys/sysctl.h>
	125	#include <net/firewire.h>
	126
	127	#include <miscfs/devfs/devfs.h>
	128	#include <net/dlil.h>
	129	#include <net/pktap.h>
	130
	131	#include <kern/locks.h>
	132	#include <kern/thread_call.h>
	133	#include <libkern/section_keywords.h>
	134
	135	#if CONFIG_MACF_NET
	136	#include <security/mac_framework.h>
	137	#endif /* MAC_NET */
	138
	139	#include <os/log.h>
	140
	141	extern int tvtohz(struct timeval *);
	142
	143	#define BPF_BUFSIZE 4096
	144	#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
	145
	146	#define PRINET 26 /* interruptible */
	147
	148	#define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
	149	#define ESP_HDR_SIZE sizeof(struct newesp)
	150
	151	typedef void (pktcopyfunc_t)(const void , void *, size_t);
	152
	153	/*
	154	* The default read buffer size is patchable.
	155	*/
	156	static unsigned int bpf_bufsize = BPF_BUFSIZE;
	157	SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW \| CTLFLAG_LOCKED,
	158	&bpf_bufsize, 0, "");
	159
	160	static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
	161	extern const int copysize_limit_panic;
	162	#define BPF_MAXSIZE_CAP (copysize_limit_panic >> 1)
	163	__private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
	164	SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT \| CTLFLAG_RW \| CTLFLAG_LOCKED,
	165	&bpf_maxbufsize, 0,
	166	sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
	167
	168	static unsigned int bpf_maxdevices = 256;
	169	SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW \| CTLFLAG_LOCKED,
	170	&bpf_maxdevices, 0, "");
	171	/*
	172	* bpf_wantpktap controls the defaul visibility of DLT_PKTAP
	173	* For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
	174	* explicitly to be able to use DLT_PKTAP.
	175	*/
	176	#if CONFIG_EMBEDDED
	177	static unsigned int bpf_wantpktap = 1;
	178	#else
	179	static unsigned int bpf_wantpktap = 0;
	180	#endif
	181	SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW \| CTLFLAG_LOCKED,
	182	&bpf_wantpktap, 0, "");
	183
	184	static int bpf_debug = 0;
	185	SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW \| CTLFLAG_LOCKED,
	186	&bpf_debug, 0, "");
	187
	188	/*
	189	* bpf_iflist is the list of interfaces; each corresponds to an ifnet
	190	* bpf_dtab holds pointer to the descriptors, indexed by minor device #
	191	*/
	192	static struct bpf_if *bpf_iflist;
	193	#ifdef __APPLE__
	194	/*
	195	* BSD now stores the bpf_d in the dev_t which is a struct
	196	* on their system. Our dev_t is an int, so we still store
	197	* the bpf_d in a separate table indexed by minor device #.
	198	*
	199	* The value stored in bpf_dtab[n] represent three states:
	200	* NULL: device not opened
	201	* BPF_DEV_RESERVED: device opening or closing
	202	* other: device <n> opened with pointer to storage
	203	*/
	204	#define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
	205	static struct bpf_d **bpf_dtab = NULL;
	206	static unsigned int bpf_dtab_size = 0;
	207	static unsigned int nbpfilter = 0;
	208
	209	decl_lck_mtx_data(static, bpf_mlock_data);
	210	static lck_mtx_t *bpf_mlock = &bpf_mlock_data;
	211	static lck_grp_t *bpf_mlock_grp;
	212	static lck_grp_attr_t *bpf_mlock_grp_attr;
	213	static lck_attr_t *bpf_mlock_attr;
	214
	215	#endif /* __APPLE__ */
	216
	217	static int bpf_allocbufs(struct bpf_d *);
	218	static errno_t bpf_attachd(struct bpf_d d, struct bpf_if bp);
	219	static int bpf_detachd(struct bpf_d *d, int);
	220	static void bpf_freed(struct bpf_d *);
	221	static int bpf_movein(struct uio *, int,
	222	struct mbuf *, struct sockaddr , int *);
	223	static int bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool);
	224	static void bpf_timed_out(void , void );
	225	static void bpf_wakeup(struct bpf_d *);
	226	static u_int get_pkt_trunc_len(u_char *, u_int);
	227	static void catchpacket(struct bpf_d , struct bpf_packet , u_int, int);
	228	static void reset_d(struct bpf_d *);
	229	static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
	230	static int bpf_getdltlist(struct bpf_d , caddr_t, struct proc );
	231	static int bpf_setdlt(struct bpf_d *, u_int);
	232	static int bpf_set_traffic_class(struct bpf_d *, int);
	233	static void bpf_set_packet_service_class(struct mbuf *, int);
	234
	235	static void bpf_acquire_d(struct bpf_d *);
	236	static void bpf_release_d(struct bpf_d *);
	237
	238	static int bpf_devsw_installed;
	239
	240	void bpf_init(void *unused);
	241	static int bpf_tap_callback(struct ifnet ifp, struct mbuf m);
	242
	243	/*
	244	* Darwin differs from BSD here, the following are static
	245	* on BSD and not static on Darwin.
	246	*/
	247	d_open_t bpfopen;
	248	d_close_t bpfclose;
	249	d_read_t bpfread;
	250	d_write_t bpfwrite;
	251	ioctl_fcn_t bpfioctl;
	252	select_fcn_t bpfselect;
	253
	254	/* Darwin's cdevsw struct differs slightly from BSDs */
	255	#define CDEV_MAJOR 23
	256	static struct cdevsw bpf_cdevsw = {
	257	.d_open = bpfopen,
	258	.d_close = bpfclose,
	259	.d_read = bpfread,
	260	.d_write = bpfwrite,
	261	.d_ioctl = bpfioctl,
	262	.d_stop = eno_stop,
	263	.d_reset = eno_reset,
	264	.d_ttys = NULL,
	265	.d_select = bpfselect,
	266	.d_mmap = eno_mmap,
	267	.d_strategy = eno_strat,
	268	.d_reserved_1 = eno_getc,
	269	.d_reserved_2 = eno_putc,
	270	.d_type = 0
	271	};
	272
	273	#define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
	274
	275	static int
	276	bpf_movein(struct uio uio, int linktype, struct mbuf *mp,
	277	struct sockaddr sockp, int datlen)
	278	{
	279	struct mbuf *m;
	280	int error;
	281	int len;
	282	uint8_t sa_family;
	283	int hlen;
	284
	285	switch (linktype) {
	286	#if SLIP
	287	case DLT_SLIP:
	288	sa_family = AF_INET;
	289	hlen = 0;
	290	break;
	291	#endif /* SLIP */
	292
	293	case DLT_EN10MB:
	294	sa_family = AF_UNSPEC;
	295	/* XXX Would MAXLINKHDR be better? */
	296	hlen = sizeof(struct ether_header);
	297	break;
	298
	299	#if FDDI
	300	case DLT_FDDI:
	301	#if defined(__FreeBSD__) \|\| defined(__bsdi__)
	302	sa_family = AF_IMPLINK;
	303	hlen = 0;
	304	#else
	305	sa_family = AF_UNSPEC;
	306	/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
	307	hlen = 24;
	308	#endif
	309	break;
	310	#endif /* FDDI */
	311
	312	case DLT_RAW:
	313	case DLT_NULL:
	314	sa_family = AF_UNSPEC;
	315	hlen = 0;
	316	break;
	317
	318	#ifdef __FreeBSD__
	319	case DLT_ATM_RFC1483:
	320	/*
	321	* en atm driver requires 4-byte atm pseudo header.
	322	* though it isn't standard, vpi:vci needs to be
	323	* specified anyway.
	324	*/
	325	sa_family = AF_UNSPEC;
	326	hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
	327	break;
	328	#endif
	329
	330	case DLT_PPP:
	331	sa_family = AF_UNSPEC;
	332	hlen = 4; /* This should match PPP_HDRLEN */
	333	break;
	334
	335	case DLT_APPLE_IP_OVER_IEEE1394:
	336	sa_family = AF_UNSPEC;
	337	hlen = sizeof(struct firewire_header);
	338	break;
	339
	340	case DLT_IEEE802_11: /* IEEE 802.11 wireless */
	341	sa_family = AF_IEEE80211;
	342	hlen = 0;
	343	break;
	344
	345	case DLT_IEEE802_11_RADIO:
	346	sa_family = AF_IEEE80211;
	347	hlen = 0;
	348	break;
	349
	350	default:
	351	return EIO;
	352	}
	353
	354	// LP64todo - fix this!
	355	len = uio_resid(uio);
	356	*datlen = len - hlen;
	357	if ((unsigned)len > MCLBYTES) {
	358	return EIO;
	359	}
	360
	361	if (sockp) {
	362	/*
	363	* Build a sockaddr based on the data link layer type.
	364	* We do this at this level because the ethernet header
	365	* is copied directly into the data field of the sockaddr.
	366	* In the case of SLIP, there is no header and the packet
	367	* is forwarded as is.
	368	* Also, we are careful to leave room at the front of the mbuf
	369	* for the link level header.
	370	*/
	371	if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
	372	return EIO;
	373	}
	374	sockp->sa_family = sa_family;
	375	} else {
	376	/*
	377	* We're directly sending the packet data supplied by
	378	* the user; we don't need to make room for the link
	379	* header, and don't need the header length value any
	380	* more, so set it to 0.
	381	*/
	382	hlen = 0;
	383	}
	384
	385	MGETHDR(m, M_WAIT, MT_DATA);
	386	if (m == 0) {
	387	return ENOBUFS;
	388	}
	389	if ((unsigned)len > MHLEN) {
	390	MCLGET(m, M_WAIT);
	391	if ((m->m_flags & M_EXT) == 0) {
	392	error = ENOBUFS;
	393	goto bad;
	394	}
	395	}
	396	m->m_pkthdr.len = m->m_len = len;
	397	m->m_pkthdr.rcvif = NULL;
	398	*mp = m;
	399
	400	/*
	401	* Make room for link header.
	402	*/
	403	if (hlen != 0) {
	404	m->m_pkthdr.len -= hlen;
	405	m->m_len -= hlen;
	406	m->m_data += hlen; /* XXX */
	407	error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
	408	if (error) {
	409	goto bad;
	410	}
	411	}
	412	error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
	413	if (error) {
	414	goto bad;
	415	}
	416
	417	/* Check for multicast destination */
	418	switch (linktype) {
	419	case DLT_EN10MB: {
	420	struct ether_header *eh;
	421
	422	eh = mtod(m, struct ether_header *);
	423	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
	424	if (_ether_cmp(etherbroadcastaddr,
	425	eh->ether_dhost) == 0) {
	426	m->m_flags \|= M_BCAST;
	427	} else {
	428	m->m_flags \|= M_MCAST;
	429	}
	430	}
	431	break;
	432	}
	433	}
	434
	435	return 0;
	436	bad:
	437	m_freem(m);
	438	return error;
	439	}
	440
	441	#ifdef __APPLE__
	442
	443	/*
	444	* The dynamic addition of a new device node must block all processes that
	445	* are opening the last device so that no process will get an unexpected
	446	* ENOENT
	447	*/
	448	static void
	449	bpf_make_dev_t(int maj)
	450	{
	451	static int bpf_growing = 0;
	452	unsigned int cur_size = nbpfilter, i;
	453
	454	if (nbpfilter >= bpf_maxdevices) {
	455	return;
	456	}
	457
	458	while (bpf_growing) {
	459	/* Wait until new device has been created */
	460	(void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
	461	}
	462	if (nbpfilter > cur_size) {
	463	/* other thread grew it already */
	464	return;
	465	}
	466	bpf_growing = 1;
	467
	468	/* need to grow bpf_dtab first */
	469	if (nbpfilter == bpf_dtab_size) {
	470	int new_dtab_size;
	471	struct bpf_d **new_dtab = NULL;
	472	struct bpf_d **old_dtab = NULL;
	473
	474	new_dtab_size = bpf_dtab_size + NBPFILTER;
	475	new_dtab = (struct bpf_d **)_MALLOC(
	476	sizeof(struct bpf_d ) new_dtab_size, M_DEVBUF, M_WAIT);
	477	if (new_dtab == 0) {
	478	printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
	479	goto done;
	480	}
	481	if (bpf_dtab) {
	482	bcopy(bpf_dtab, new_dtab,
	483	sizeof(struct bpf_d ) bpf_dtab_size);
	484	}
	485	bzero(new_dtab + bpf_dtab_size,
	486	sizeof(struct bpf_d ) NBPFILTER);
	487	old_dtab = bpf_dtab;
	488	bpf_dtab = new_dtab;
	489	bpf_dtab_size = new_dtab_size;
	490	if (old_dtab != NULL) {
	491	_FREE(old_dtab, M_DEVBUF);
	492	}
	493	}
	494	i = nbpfilter++;
	495	(void) devfs_make_node(makedev(maj, i),
	496	DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
	497	"bpf%d", i);
	498	done:
	499	bpf_growing = 0;
	500	wakeup((caddr_t)&bpf_growing);
	501	}
	502
	503	#endif
	504
	505	/*
	506	* Attach file to the bpf interface, i.e. make d listen on bp.
	507	*/
	508	static errno_t
	509	bpf_attachd(struct bpf_d d, struct bpf_if bp)
	510	{
	511	int first = bp->bif_dlist == NULL;
	512	int error = 0;
	513
	514	/*
	515	* Point d at bp, and add d to the interface's list of listeners.
	516	* Finally, point the driver's bpf cookie at the interface so
	517	* it will divert packets to bpf.
	518	*/
	519	d->bd_bif = bp;
	520	d->bd_next = bp->bif_dlist;
	521	bp->bif_dlist = d;
	522
	523	/*
	524	* Take a reference on the device even if an error is returned
	525	* because we keep the device in the interface's list of listeners
	526	*/
	527	bpf_acquire_d(d);
	528
	529	if (first) {
	530	/* Find the default bpf entry for this ifp */
	531	if (bp->bif_ifp->if_bpf == NULL) {
	532	struct bpf_if tmp, primary = NULL;
	533
	534	for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
	535	if (tmp->bif_ifp == bp->bif_ifp) {
	536	primary = tmp;
	537	break;
	538	}
	539	}
	540	bp->bif_ifp->if_bpf = primary;
	541	}
	542	/* Only call dlil_set_bpf_tap for primary dlt */
	543	if (bp->bif_ifp->if_bpf == bp) {
	544	dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
	545	bpf_tap_callback);
	546	}
	547
	548	if (bp->bif_tap != NULL) {
	549	error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
	550	BPF_TAP_INPUT_OUTPUT);
	551	}
	552	}
	553
	554	/*
	555	* Reset the detach flags in case we previously detached an interface
	556	*/
	557	d->bd_flags &= ~(BPF_DETACHING \| BPF_DETACHED);
	558
	559	if (bp->bif_dlt == DLT_PKTAP) {
	560	d->bd_flags \|= BPF_FINALIZE_PKTAP;
	561	} else {
	562	d->bd_flags &= ~BPF_FINALIZE_PKTAP;
	563	}
	564	return error;
	565	}
	566
	567	/*
	568	* Detach a file from its interface.
	569	*
	570	* Return 1 if was closed by some thread, 0 otherwise
	571	*/
	572	static int
	573	bpf_detachd(struct bpf_d *d, int closing)
	574	{
	575	struct bpf_d **p;
	576	struct bpf_if *bp;
	577	struct ifnet *ifp;
	578
	579	int bpf_closed = d->bd_flags & BPF_CLOSING;
	580	/*
	581	* Some other thread already detached
	582	*/
	583	if ((d->bd_flags & (BPF_DETACHED \| BPF_DETACHING)) != 0) {
	584	goto done;
	585	}
	586	/*
	587	* This thread is doing the detach
	588	*/
	589	d->bd_flags \|= BPF_DETACHING;
	590
	591	ifp = d->bd_bif->bif_ifp;
	592	bp = d->bd_bif;
	593
	594	if (bpf_debug != 0) {
	595	printf("%s: %llx %s%s\n",
	596	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
	597	if_name(ifp), closing ? " closing" : "");
	598	}
	599
	600	/* Remove d from the interface's descriptor list. */
	601	p = &bp->bif_dlist;
	602	while (*p != d) {
	603	p = &(*p)->bd_next;
	604	if (*p == 0) {
	605	panic("bpf_detachd: descriptor not in list");
	606	}
	607	}
	608	p = (p)->bd_next;
	609	if (bp->bif_dlist == 0) {
	610	/*
	611	* Let the driver know that there are no more listeners.
	612	*/
	613	/* Only call dlil_set_bpf_tap for primary dlt */
	614	if (bp->bif_ifp->if_bpf == bp) {
	615	dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
	616	}
	617	if (bp->bif_tap) {
	618	bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
	619	}
	620
	621	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
	622	if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
	623	break;
	624	}
	625	}
	626	if (bp == NULL) {
	627	ifp->if_bpf = NULL;
	628	}
	629	}
	630	d->bd_bif = NULL;
	631	/*
	632	* Check if this descriptor had requested promiscuous mode.
	633	* If so, turn it off.
	634	*/
	635	if (d->bd_promisc) {
	636	d->bd_promisc = 0;
	637	lck_mtx_unlock(bpf_mlock);
	638	if (ifnet_set_promiscuous(ifp, 0)) {
	639	/*
	640	* Something is really wrong if we were able to put
	641	* the driver into promiscuous mode, but can't
	642	* take it out.
	643	* Most likely the network interface is gone.
	644	*/
	645	printf("%s: ifnet_set_promiscuous failed\n", __func__);
	646	}
	647	lck_mtx_lock(bpf_mlock);
	648	}
	649
	650	/*
	651	* Wake up other thread that are waiting for this thread to finish
	652	* detaching
	653	*/
	654	d->bd_flags &= ~BPF_DETACHING;
	655	d->bd_flags \|= BPF_DETACHED;
	656
	657	/* Refresh the local variable as d could have been modified */
	658	bpf_closed = d->bd_flags & BPF_CLOSING;
	659	/*
	660	* Note that We've kept the reference because we may have dropped
	661	* the lock when turning off promiscuous mode
	662	*/
	663	bpf_release_d(d);
	664
	665	done:
	666	/*
	667	* When closing makes sure no other thread refer to the bpf_d
	668	*/
	669	if (bpf_debug != 0) {
	670	printf("%s: %llx done\n",
	671	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
	672	}
	673	/*
	674	* Let the caller know the bpf_d is closed
	675	*/
	676	if (bpf_closed) {
	677	return 1;
	678	} else {
	679	return 0;
	680	}
	681	}
	682
	683	/*
	684	* Start asynchronous timer, if necessary.
	685	* Must be called with bpf_mlock held.
	686	*/
	687	static void
	688	bpf_start_timer(struct bpf_d *d)
	689	{
	690	uint64_t deadline;
	691	struct timeval tv;
	692
	693	if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
	694	tv.tv_sec = d->bd_rtout / hz;
	695	tv.tv_usec = (d->bd_rtout % hz) * tick;
	696
	697	clock_interval_to_deadline(
	698	(uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
	699	NSEC_PER_USEC, &deadline);
	700	/*
	701	* The state is BPF_IDLE, so the timer hasn't
	702	* been started yet, and hasn't gone off yet;
	703	* there is no thread call scheduled, so this
	704	* won't change the schedule.
	705	*
	706	* XXX - what if, by the time it gets entered,
	707	* the deadline has already passed?
	708	*/
	709	thread_call_enter_delayed(d->bd_thread_call, deadline);
	710	d->bd_state = BPF_WAITING;
	711	}
	712	}
	713
	714	/*
	715	* Cancel asynchronous timer.
	716	* Must be called with bpf_mlock held.
	717	*/
	718	static boolean_t
	719	bpf_stop_timer(struct bpf_d *d)
	720	{
	721	/*
	722	* If the timer has already gone off, this does nothing.
	723	* Our caller is expected to set d->bd_state to BPF_IDLE,
	724	* with the bpf_mlock, after we are called. bpf_timed_out()
	725	* also grabs bpf_mlock, so, if the timer has gone off and
	726	* bpf_timed_out() hasn't finished, it's waiting for the
	727	* lock; when this thread releases the lock, it will
	728	* find the state is BPF_IDLE, and just release the
	729	* lock and return.
	730	*/
	731	return thread_call_cancel(d->bd_thread_call);
	732	}
	733
	734	void
	735	bpf_acquire_d(struct bpf_d *d)
	736	{
	737	void *lr_saved = __builtin_return_address(0);
	738
	739	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
	740
	741	d->bd_refcnt += 1;
	742
	743	d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
	744	d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
	745	}
	746
	747	void
	748	bpf_release_d(struct bpf_d *d)
	749	{
	750	void *lr_saved = __builtin_return_address(0);
	751
	752	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
	753
	754	if (d->bd_refcnt <= 0) {
	755	panic("%s: %p refcnt <= 0", __func__, d);
	756	}
	757
	758	d->bd_refcnt -= 1;
	759
	760	d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
	761	d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
	762
	763	if (d->bd_refcnt == 0) {
	764	/* Assert the device is detached */
	765	if ((d->bd_flags & BPF_DETACHED) == 0) {
	766	panic("%s: %p BPF_DETACHED not set", __func__, d);
	767	}
	768
	769	_FREE(d, M_DEVBUF);
	770	}
	771	}
	772
	773	/*
	774	* Open ethernet device. Returns ENXIO for illegal minor device number,
	775	* EBUSY if file is open by another process.
	776	*/
	777	/* ARGSUSED */
	778	int
	779	bpfopen(dev_t dev, int flags, __unused int fmt,
	780	struct proc *p)
	781	{
	782	struct bpf_d *d;
	783
	784	lck_mtx_lock(bpf_mlock);
	785	if ((unsigned int) minor(dev) >= nbpfilter) {
	786	lck_mtx_unlock(bpf_mlock);
	787	return ENXIO;
	788	}
	789	/*
	790	* New device nodes are created on demand when opening the last one.
	791	* The programming model is for processes to loop on the minor starting
	792	* at 0 as long as EBUSY is returned. The loop stops when either the
	793	* open succeeds or an error other that EBUSY is returned. That means
	794	* that bpf_make_dev_t() must block all processes that are opening the
	795	* last node. If not all processes are blocked, they could unexpectedly
	796	* get ENOENT and abort their opening loop.
	797	*/
	798	if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
	799	bpf_make_dev_t(major(dev));
	800	}
	801
	802	/*
	803	* Each minor can be opened by only one process. If the requested
	804	* minor is in use, return EBUSY.
	805	*
	806	* Important: bpfopen() and bpfclose() have to check and set the status
	807	* of a device in the same lockin context otherwise the device may be
	808	* leaked because the vnode use count will be unpextectly greater than 1
	809	* when close() is called.
	810	*/
	811	if (bpf_dtab[minor(dev)] == NULL) {
	812	/* Reserve while opening */
	813	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
	814	} else {
	815	lck_mtx_unlock(bpf_mlock);
	816	return EBUSY;
	817	}
	818	d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
	819	M_WAIT \| M_ZERO);
	820	if (d == NULL) {
	821	/* this really is a catastrophic failure */
	822	printf("bpfopen: malloc bpf_d failed\n");
	823	bpf_dtab[minor(dev)] = NULL;
	824	lck_mtx_unlock(bpf_mlock);
	825	return ENOMEM;
	826	}
	827
	828	/* Mark "in use" and do most initialization. */
	829	bpf_acquire_d(d);
	830	d->bd_bufsize = bpf_bufsize;
	831	d->bd_sig = SIGIO;
	832	d->bd_seesent = 1;
	833	d->bd_oflags = flags;
	834	d->bd_state = BPF_IDLE;
	835	d->bd_traffic_class = SO_TC_BE;
	836	d->bd_flags \|= BPF_DETACHED;
	837	if (bpf_wantpktap) {
	838	d->bd_flags \|= BPF_WANT_PKTAP;
	839	} else {
	840	d->bd_flags &= ~BPF_WANT_PKTAP;
	841	}
	842	d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
	843	if (d->bd_thread_call == NULL) {
	844	printf("bpfopen: malloc thread call failed\n");
	845	bpf_dtab[minor(dev)] = NULL;
	846	bpf_release_d(d);
	847	lck_mtx_unlock(bpf_mlock);
	848
	849	return ENOMEM;
	850	}
	851	d->bd_opened_by = p;
	852	uuid_generate(d->bd_uuid);
	853
	854	#if CONFIG_MACF_NET
	855	mac_bpfdesc_label_init(d);
	856	mac_bpfdesc_label_associate(kauth_cred_get(), d);
	857	#endif
	858	bpf_dtab[minor(dev)] = d; /* Mark opened */
	859	lck_mtx_unlock(bpf_mlock);
	860
	861	return 0;
	862	}
	863
	864	/*
	865	* Close the descriptor by detaching it from its interface,
	866	* deallocating its buffers, and marking it free.
	867	*/
	868	/* ARGSUSED */
	869	int
	870	bpfclose(dev_t dev, __unused int flags, __unused int fmt,
	871	__unused struct proc *p)
	872	{
	873	struct bpf_d *d;
	874
	875	/* Take BPF lock to ensure no other thread is using the device */
	876	lck_mtx_lock(bpf_mlock);
	877
	878	d = bpf_dtab[minor(dev)];
	879	if (d == NULL \|\| d == BPF_DEV_RESERVED) {
	880	lck_mtx_unlock(bpf_mlock);
	881	return ENXIO;
	882	}
	883
	884	/*
	885	* Other threads may call bpd_detachd() if we drop the bpf_mlock
	886	*/
	887	d->bd_flags \|= BPF_CLOSING;
	888
	889	if (bpf_debug != 0) {
	890	printf("%s: %llx\n",
	891	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
	892	}
	893
	894	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
	895
	896	/*
	897	* Deal with any in-progress timeouts.
	898	*/
	899	switch (d->bd_state) {
	900	case BPF_IDLE:
	901	/*
	902	* Not waiting for a timeout, and no timeout happened.
	903	*/
	904	break;
	905
	906	case BPF_WAITING:
	907	/*
	908	* Waiting for a timeout.
	909	* Cancel any timer that has yet to go off,
	910	* and mark the state as "closing".
	911	* Then drop the lock to allow any timers that
	912	* have gone off to run to completion, and wait
	913	* for them to finish.
	914	*/
	915	if (!bpf_stop_timer(d)) {
	916	/*
	917	* There was no pending call, so the call must
	918	* have been in progress. Wait for the call to
	919	* complete; we have to drop the lock while
	920	* waiting. to let the in-progrss call complete
	921	*/
	922	d->bd_state = BPF_DRAINING;
	923	while (d->bd_state == BPF_DRAINING) {
	924	msleep((caddr_t)d, bpf_mlock, PRINET,
	925	"bpfdraining", NULL);
	926	}
	927	}
	928	d->bd_state = BPF_IDLE;
	929	break;
	930
	931	case BPF_TIMED_OUT:
	932	/*
	933	* Timer went off, and the timeout routine finished.
	934	*/
	935	d->bd_state = BPF_IDLE;
	936	break;
	937
	938	case BPF_DRAINING:
	939	/*
	940	* Another thread is blocked on a close waiting for
	941	* a timeout to finish.
	942	* This "shouldn't happen", as the first thread to enter
	943	* bpfclose() will set bpf_dtab[minor(dev)] to 1, and
	944	* all subsequent threads should see that and fail with
	945	* ENXIO.
	946	*/
	947	panic("Two threads blocked in a BPF close");
	948	break;
	949	}
	950
	951	if (d->bd_bif) {
	952	bpf_detachd(d, 1);
	953	}
	954	selthreadclear(&d->bd_sel);
	955	#if CONFIG_MACF_NET
	956	mac_bpfdesc_label_destroy(d);
	957	#endif
	958	thread_call_free(d->bd_thread_call);
	959
	960	while (d->bd_hbuf_read != 0) {
	961	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
	962	}
	963
	964	bpf_freed(d);
	965
	966	/* Mark free in same context as bpfopen comes to check */
	967	bpf_dtab[minor(dev)] = NULL; /* Mark closed */
	968
	969	bpf_release_d(d);
	970
	971	lck_mtx_unlock(bpf_mlock);
	972
	973	return 0;
	974	}
	975
	976	#define BPF_SLEEP bpf_sleep
	977
	978	static int
	979	bpf_sleep(struct bpf_d d, int pri, const char wmesg, int timo)
	980	{
	981	u_int64_t abstime = 0;
	982
	983	if (timo != 0) {
	984	clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
	985	}
	986
	987	return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
	988	}
	989
	990	static void
	991	bpf_finalize_pktap(struct bpf_hdr hp, struct pktap_header pktaphdr)
	992	{
	993	if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
	994	struct pktap_v2_hdr *pktap_v2_hdr;
	995
	996	pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
	997
	998	if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
	999	pktap_v2_finalize_proc_info(pktap_v2_hdr);
	1000	}
	1001	} else {
	1002	if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
	1003	pktap_finalize_proc_info(pktaphdr);
	1004	}
	1005
	1006	if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
	1007	hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
	1008	hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
	1009	}
	1010	}
	1011	}
	1012
	1013	/*
	1014	* Rotate the packet buffers in descriptor d. Move the store buffer
	1015	* into the hold slot, and the free buffer into the store slot.
	1016	* Zero the length of the new store buffer.
	1017	*/
	1018	#define ROTATE_BUFFERS(d) \
	1019	if (d->bd_hbuf_read != 0) \
	1020	panic("rotating bpf buffers during read"); \
	1021	(d)->bd_hbuf = (d)->bd_sbuf; \
	1022	(d)->bd_hlen = (d)->bd_slen; \
	1023	(d)->bd_hcnt = (d)->bd_scnt; \
	1024	(d)->bd_sbuf = (d)->bd_fbuf; \
	1025	(d)->bd_slen = 0; \
	1026	(d)->bd_scnt = 0; \
	1027	(d)->bd_fbuf = NULL;
	1028	/*
	1029	* bpfread - read next chunk of packets from buffers
	1030	*/
	1031	int
	1032	bpfread(dev_t dev, struct uio *uio, int ioflag)
	1033	{
	1034	struct bpf_d *d;
	1035	caddr_t hbuf;
	1036	int timed_out, hbuf_len;
	1037	int error;
	1038	int flags;
	1039
	1040	lck_mtx_lock(bpf_mlock);
	1041
	1042	d = bpf_dtab[minor(dev)];
	1043	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
	1044	(d->bd_flags & BPF_CLOSING) != 0) {
	1045	lck_mtx_unlock(bpf_mlock);
	1046	return ENXIO;
	1047	}
	1048
	1049	bpf_acquire_d(d);
	1050
	1051	/*
	1052	* Restrict application to use a buffer the same size as
	1053	* as kernel buffers.
	1054	*/
	1055	if (uio_resid(uio) != d->bd_bufsize) {
	1056	bpf_release_d(d);
	1057	lck_mtx_unlock(bpf_mlock);
	1058	return EINVAL;
	1059	}
	1060
	1061	if (d->bd_state == BPF_WAITING) {
	1062	bpf_stop_timer(d);
	1063	}
	1064
	1065	timed_out = (d->bd_state == BPF_TIMED_OUT);
	1066	d->bd_state = BPF_IDLE;
	1067
	1068	while (d->bd_hbuf_read != 0) {
	1069	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
	1070	}
	1071
	1072	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1073	bpf_release_d(d);
	1074	lck_mtx_unlock(bpf_mlock);
	1075	return ENXIO;
	1076	}
	1077	/*
	1078	* If the hold buffer is empty, then do a timed sleep, which
	1079	* ends when the timeout expires or when enough packets
	1080	* have arrived to fill the store buffer.
	1081	*/
	1082	while (d->bd_hbuf == 0) {
	1083	if ((d->bd_immediate \|\| timed_out \|\| (ioflag & IO_NDELAY)) &&
	1084	d->bd_slen != 0) {
	1085	/*
	1086	* We're in immediate mode, or are reading
	1087	* in non-blocking mode, or a timer was
	1088	* started before the read (e.g., by select()
	1089	* or poll()) and has expired and a packet(s)
	1090	* either arrived since the previous
	1091	* read or arrived while we were asleep.
	1092	* Rotate the buffers and return what's here.
	1093	*/
	1094	ROTATE_BUFFERS(d);
	1095	break;
	1096	}
	1097
	1098	/*
	1099	* No data is available, check to see if the bpf device
	1100	* is still pointed at a real interface. If not, return
	1101	* ENXIO so that the userland process knows to rebind
	1102	* it before using it again.
	1103	*/
	1104	if (d->bd_bif == NULL) {
	1105	bpf_release_d(d);
	1106	lck_mtx_unlock(bpf_mlock);
	1107	return ENXIO;
	1108	}
	1109	if (ioflag & IO_NDELAY) {
	1110	bpf_release_d(d);
	1111	lck_mtx_unlock(bpf_mlock);
	1112	return EWOULDBLOCK;
	1113	}
	1114	error = BPF_SLEEP(d, PRINET \| PCATCH, "bpf", d->bd_rtout);
	1115	/*
	1116	* Make sure device is still opened
	1117	*/
	1118	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1119	bpf_release_d(d);
	1120	lck_mtx_unlock(bpf_mlock);
	1121	return ENXIO;
	1122	}
	1123
	1124	while (d->bd_hbuf_read != 0) {
	1125	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
	1126	NULL);
	1127	}
	1128
	1129	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1130	bpf_release_d(d);
	1131	lck_mtx_unlock(bpf_mlock);
	1132	return ENXIO;
	1133	}
	1134
	1135	if (error == EINTR \|\| error == ERESTART) {
	1136	if (d->bd_hbuf != NULL) {
	1137	/*
	1138	* Because we msleep, the hold buffer might
	1139	* be filled when we wake up. Avoid rotating
	1140	* in this case.
	1141	*/
	1142	break;
	1143	}
	1144	if (d->bd_slen != 0) {
	1145	/*
	1146	* Sometimes we may be interrupted often and
	1147	* the sleep above will not timeout.
	1148	* Regardless, we should rotate the buffers
	1149	* if there's any new data pending and
	1150	* return it.
	1151	*/
	1152	ROTATE_BUFFERS(d);
	1153	break;
	1154	}
	1155	bpf_release_d(d);
	1156	lck_mtx_unlock(bpf_mlock);
	1157	if (error == ERESTART) {
	1158	printf("%s: %llx ERESTART to EINTR\n",
	1159	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
	1160	error = EINTR;
	1161	}
	1162	return error;
	1163	}
	1164	if (error == EWOULDBLOCK) {
	1165	/*
	1166	* On a timeout, return what's in the buffer,
	1167	* which may be nothing. If there is something
	1168	* in the store buffer, we can rotate the buffers.
	1169	*/
	1170	if (d->bd_hbuf) {
	1171	/*
	1172	* We filled up the buffer in between
	1173	* getting the timeout and arriving
	1174	* here, so we don't need to rotate.
	1175	*/
	1176	break;
	1177	}
	1178
	1179	if (d->bd_slen == 0) {
	1180	bpf_release_d(d);
	1181	lck_mtx_unlock(bpf_mlock);
	1182	return 0;
	1183	}
	1184	ROTATE_BUFFERS(d);
	1185	break;
	1186	}
	1187	}
	1188	/*
	1189	* At this point, we know we have something in the hold slot.
	1190	*/
	1191
	1192	/*
	1193	* Set the hold buffer read. So we do not
	1194	* rotate the buffers until the hold buffer
	1195	* read is complete. Also to avoid issues resulting
	1196	* from page faults during disk sleep (<rdar://problem/13436396>).
	1197	*/
	1198	d->bd_hbuf_read = 1;
	1199	hbuf = d->bd_hbuf;
	1200	hbuf_len = d->bd_hlen;
	1201	flags = d->bd_flags;
	1202	lck_mtx_unlock(bpf_mlock);
	1203
	1204	#ifdef __APPLE__
	1205	/*
	1206	* Before we move data to userland, we fill out the extended
	1207	* header fields.
	1208	*/
	1209	if (flags & BPF_EXTENDED_HDR) {
	1210	char *p;
	1211
	1212	p = hbuf;
	1213	while (p < hbuf + hbuf_len) {
	1214	struct bpf_hdr_ext *ehp;
	1215	uint32_t flowid;
	1216	struct so_procinfo soprocinfo;
	1217	int found = 0;
	1218
	1219	ehp = (struct bpf_hdr_ext )(void )p;
	1220	if ((flowid = ehp->bh_flowid) != 0) {
	1221	if (ehp->bh_proto == IPPROTO_TCP) {
	1222	found = inp_findinpcb_procinfo(&tcbinfo,
	1223	flowid, &soprocinfo);
	1224	} else if (ehp->bh_proto == IPPROTO_UDP) {
	1225	found = inp_findinpcb_procinfo(&udbinfo,
	1226	flowid, &soprocinfo);
	1227	}
	1228	if (found == 1) {
	1229	ehp->bh_pid = soprocinfo.spi_pid;
	1230	strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
	1231	}
	1232	ehp->bh_flowid = 0;
	1233	}
	1234
	1235	if (flags & BPF_FINALIZE_PKTAP) {
	1236	struct pktap_header *pktaphdr;
	1237
	1238	pktaphdr = (struct pktap_header )(void )
	1239	(p + BPF_WORDALIGN(ehp->bh_hdrlen));
	1240
	1241	bpf_finalize_pktap((struct bpf_hdr *) ehp,
	1242	pktaphdr);
	1243	}
	1244	p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
	1245	}
	1246	} else if (flags & BPF_FINALIZE_PKTAP) {
	1247	char *p;
	1248
	1249	p = hbuf;
	1250	while (p < hbuf + hbuf_len) {
	1251	struct bpf_hdr *hp;
	1252	struct pktap_header *pktaphdr;
	1253
	1254	hp = (struct bpf_hdr )(void )p;
	1255	pktaphdr = (struct pktap_header )(void )
	1256	(p + BPF_WORDALIGN(hp->bh_hdrlen));
	1257
	1258	bpf_finalize_pktap(hp, pktaphdr);
	1259
	1260	p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
	1261	}
	1262	}
	1263	#endif
	1264
	1265	/*
	1266	* Move data from hold buffer into user space.
	1267	* We know the entire buffer is transferred since
	1268	* we checked above that the read buffer is bpf_bufsize bytes.
	1269	*/
	1270	error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
	1271
	1272	lck_mtx_lock(bpf_mlock);
	1273	/*
	1274	* Make sure device is still opened
	1275	*/
	1276	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1277	bpf_release_d(d);
	1278	lck_mtx_unlock(bpf_mlock);
	1279	return ENXIO;
	1280	}
	1281
	1282	d->bd_hbuf_read = 0;
	1283	d->bd_fbuf = d->bd_hbuf;
	1284	d->bd_hbuf = NULL;
	1285	d->bd_hlen = 0;
	1286	d->bd_hcnt = 0;
	1287	wakeup((caddr_t)d);
	1288
	1289	bpf_release_d(d);
	1290	lck_mtx_unlock(bpf_mlock);
	1291	return error;
	1292	}
	1293
	1294	/*
	1295	* If there are processes sleeping on this descriptor, wake them up.
	1296	*/
	1297	static void
	1298	bpf_wakeup(struct bpf_d *d)
	1299	{
	1300	if (d->bd_state == BPF_WAITING) {
	1301	bpf_stop_timer(d);
	1302	d->bd_state = BPF_IDLE;
	1303	}
	1304	wakeup((caddr_t)d);
	1305	if (d->bd_async && d->bd_sig && d->bd_sigio) {
	1306	pgsigio(d->bd_sigio, d->bd_sig);
	1307	}
	1308
	1309	selwakeup(&d->bd_sel);
	1310	if ((d->bd_flags & BPF_KNOTE)) {
	1311	KNOTE(&d->bd_sel.si_note, 1);
	1312	}
	1313	}
	1314
	1315	static void
	1316	bpf_timed_out(void arg, __unused void dummy)
	1317	{
	1318	struct bpf_d d = (struct bpf_d )arg;
	1319
	1320	lck_mtx_lock(bpf_mlock);
	1321	if (d->bd_state == BPF_WAITING) {
	1322	/*
	1323	* There's a select or kqueue waiting for this; if there's
	1324	* now stuff to read, wake it up.
	1325	*/
	1326	d->bd_state = BPF_TIMED_OUT;
	1327	if (d->bd_slen != 0) {
	1328	bpf_wakeup(d);
	1329	}
	1330	} else if (d->bd_state == BPF_DRAINING) {
	1331	/*
	1332	* A close is waiting for this to finish.
	1333	* Mark it as finished, and wake the close up.
	1334	*/
	1335	d->bd_state = BPF_IDLE;
	1336	bpf_wakeup(d);
	1337	}
	1338	lck_mtx_unlock(bpf_mlock);
	1339	}
	1340
	1341	/* keep in sync with bpf_movein above: */
	1342	#define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
	1343
	1344	int
	1345	bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
	1346	{
	1347	struct bpf_d *d;
	1348	struct ifnet *ifp;
	1349	struct mbuf *m = NULL;
	1350	int error;
	1351	char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
	1352	int datlen = 0;
	1353	int bif_dlt;
	1354	int bd_hdrcmplt;
	1355
	1356	lck_mtx_lock(bpf_mlock);
	1357
	1358	d = bpf_dtab[minor(dev)];
	1359	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
	1360	(d->bd_flags & BPF_CLOSING) != 0) {
	1361	lck_mtx_unlock(bpf_mlock);
	1362	return ENXIO;
	1363	}
	1364
	1365	bpf_acquire_d(d);
	1366
	1367	if (d->bd_bif == 0) {
	1368	bpf_release_d(d);
	1369	lck_mtx_unlock(bpf_mlock);
	1370	return ENXIO;
	1371	}
	1372
	1373	ifp = d->bd_bif->bif_ifp;
	1374
	1375	if ((ifp->if_flags & IFF_UP) == 0) {
	1376	bpf_release_d(d);
	1377	lck_mtx_unlock(bpf_mlock);
	1378	return ENETDOWN;
	1379	}
	1380	if (uio_resid(uio) == 0) {
	1381	bpf_release_d(d);
	1382	lck_mtx_unlock(bpf_mlock);
	1383	return 0;
	1384	}
	1385	((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
	1386
	1387	/*
	1388	* fix for PR-6849527
	1389	* geting variables onto stack before dropping lock for bpf_movein()
	1390	*/
	1391	bif_dlt = (int)d->bd_bif->bif_dlt;
	1392	bd_hdrcmplt = d->bd_hdrcmplt;
	1393
	1394	/* bpf_movein allocating mbufs; drop lock */
	1395	lck_mtx_unlock(bpf_mlock);
	1396
	1397	error = bpf_movein(uio, bif_dlt, &m,
	1398	bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
	1399	&datlen);
	1400
	1401	/* take the lock again */
	1402	lck_mtx_lock(bpf_mlock);
	1403	if (error) {
	1404	bpf_release_d(d);
	1405	lck_mtx_unlock(bpf_mlock);
	1406	return error;
	1407	}
	1408
	1409	/* verify the device is still open */
	1410	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1411	bpf_release_d(d);
	1412	lck_mtx_unlock(bpf_mlock);
	1413	m_freem(m);
	1414	return ENXIO;
	1415	}
	1416
	1417	if (d->bd_bif == NULL) {
	1418	bpf_release_d(d);
	1419	lck_mtx_unlock(bpf_mlock);
	1420	m_free(m);
	1421	return ENXIO;
	1422	}
	1423
	1424	if ((unsigned)datlen > ifp->if_mtu) {
	1425	bpf_release_d(d);
	1426	lck_mtx_unlock(bpf_mlock);
	1427	m_freem(m);
	1428	return EMSGSIZE;
	1429	}
	1430
	1431	#if CONFIG_MACF_NET
	1432	mac_mbuf_label_associate_bpfdesc(d, m);
	1433	#endif
	1434
	1435	bpf_set_packet_service_class(m, d->bd_traffic_class);
	1436
	1437	lck_mtx_unlock(bpf_mlock);
	1438
	1439	/*
	1440	* The driver frees the mbuf.
	1441	*/
	1442	if (d->bd_hdrcmplt) {
	1443	if (d->bd_bif->bif_send) {
	1444	error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
	1445	} else {
	1446	error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
	1447	}
	1448	} else {
	1449	error = dlil_output(ifp, PF_INET, m, NULL,
	1450	(struct sockaddr *)dst_buf, 0, NULL);
	1451	}
	1452
	1453	lck_mtx_lock(bpf_mlock);
	1454	bpf_release_d(d);
	1455	lck_mtx_unlock(bpf_mlock);
	1456
	1457	return error;
	1458	}
	1459
	1460	/*
	1461	* Reset a descriptor by flushing its packet buffer and clearing the
	1462	* receive and drop counts.
	1463	*/
	1464	static void
	1465	reset_d(struct bpf_d *d)
	1466	{
	1467	if (d->bd_hbuf_read != 0) {
	1468	panic("resetting buffers during read");
	1469	}
	1470
	1471	if (d->bd_hbuf) {
	1472	/* Free the hold buffer. */
	1473	d->bd_fbuf = d->bd_hbuf;
	1474	d->bd_hbuf = NULL;
	1475	}
	1476	d->bd_slen = 0;
	1477	d->bd_hlen = 0;
	1478	d->bd_scnt = 0;
	1479	d->bd_hcnt = 0;
	1480	d->bd_rcount = 0;
	1481	d->bd_dcount = 0;
	1482	}
	1483
	1484	static struct bpf_d *
	1485	bpf_get_device_from_uuid(uuid_t uuid)
	1486	{
	1487	unsigned int i;
	1488
	1489	for (i = 0; i < nbpfilter; i++) {
	1490	struct bpf_d *d = bpf_dtab[i];
	1491
	1492	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
	1493	(d->bd_flags & BPF_CLOSING) != 0) {
	1494	continue;
	1495	}
	1496	if (uuid_compare(uuid, d->bd_uuid) == 0) {
	1497	return d;
	1498	}
	1499	}
	1500
	1501	return NULL;
	1502	}
	1503
	1504	/*
	1505	* The BIOCSETUP command "atomically" attach to the interface and
	1506	* copy the buffer from another interface. This minimizes the risk
	1507	* of missing packet because this is done while holding
	1508	* the BPF global lock
	1509	*/
	1510	static int
	1511	bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
	1512	{
	1513	struct bpf_d *d_from;
	1514	int error = 0;
	1515
	1516	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
	1517
	1518	/*
	1519	* Sanity checks
	1520	*/
	1521	d_from = bpf_get_device_from_uuid(uuid_from);
	1522	if (d_from == NULL) {
	1523	error = ENOENT;
	1524	os_log_info(OS_LOG_DEFAULT,
	1525	"%s: uuids not found error %d",
	1526	__func__, error);
	1527	return error;
	1528	}
	1529	if (d_from->bd_opened_by != d_to->bd_opened_by) {
	1530	error = EACCES;
	1531	os_log_info(OS_LOG_DEFAULT,
	1532	"%s: processes not matching error %d",
	1533	__func__, error);
	1534	return error;
	1535	}
	1536
	1537	/*
	1538	* Prevent any read while copying
	1539	*/
	1540	while (d_to->bd_hbuf_read != 0) {
	1541	msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
	1542	}
	1543	d_to->bd_hbuf_read = 1;
	1544
	1545	while (d_from->bd_hbuf_read != 0) {
	1546	msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
	1547	}
	1548	d_from->bd_hbuf_read = 1;
	1549
	1550	/*
	1551	* Verify the devices have not been closed
	1552	*/
	1553	if (d_to->bd_flags & BPF_CLOSING) {
	1554	error = ENXIO;
	1555	os_log_info(OS_LOG_DEFAULT,
	1556	"%s: d_to is closing error %d",
	1557	__func__, error);
	1558	goto done;
	1559	}
	1560	if (d_from->bd_flags & BPF_CLOSING) {
	1561	error = ENXIO;
	1562	os_log_info(OS_LOG_DEFAULT,
	1563	"%s: d_from is closing error %d",
	1564	__func__, error);
	1565	goto done;
	1566	}
	1567
	1568	/*
	1569	* For now require the same buffer size
	1570	*/
	1571	if (d_from->bd_bufsize != d_to->bd_bufsize) {
	1572	error = EINVAL;
	1573	os_log_info(OS_LOG_DEFAULT,
	1574	"%s: bufsizes not matching error %d",
	1575	__func__, error);
	1576	goto done;
	1577	}
	1578
	1579	/*
	1580	* Attach to the interface
	1581	*/
	1582	error = bpf_setif(d_to, ifp, false, true);
	1583	if (error != 0) {
	1584	os_log_info(OS_LOG_DEFAULT,
	1585	"%s: bpf_setif() failed error %d",
	1586	__func__, error);
	1587	goto done;
	1588	}
	1589
	1590	/*
	1591	* Make sure the buffers are setup as expected by bpf_setif()
	1592	*/
	1593	ASSERT(d_to->bd_hbuf == NULL);
	1594	ASSERT(d_to->bd_sbuf != NULL);
	1595	ASSERT(d_to->bd_fbuf != NULL);
	1596
	1597	/*
	1598	* Copy the buffers and update the pointers and counts
	1599	*/
	1600	memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
	1601	d_to->bd_slen = d_from->bd_slen;
	1602	d_to->bd_scnt = d_from->bd_scnt;
	1603
	1604	if (d_from->bd_hbuf != NULL) {
	1605	d_to->bd_hbuf = d_to->bd_fbuf;
	1606	d_to->bd_fbuf = NULL;
	1607	memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
	1608	}
	1609	d_to->bd_hlen = d_from->bd_hlen;
	1610	d_to->bd_hcnt = d_from->bd_hcnt;
	1611
	1612	if (bpf_debug > 0) {
	1613	os_log_info(OS_LOG_DEFAULT,
	1614	"%s: done slen %u scnt %u hlen %u hcnt %u",
	1615	__func__, d_to->bd_slen, d_to->bd_scnt,
	1616	d_to->bd_hlen, d_to->bd_hcnt);
	1617	}
	1618	done:
	1619	d_from->bd_hbuf_read = 0;
	1620	wakeup((caddr_t)d_from);
	1621
	1622	d_to->bd_hbuf_read = 0;
	1623	wakeup((caddr_t)d_to);
	1624
	1625	return error;
	1626	}
	1627
	1628	/*
	1629	* FIONREAD Check for read packet available.
	1630	* SIOCGIFADDR Get interface address - convenient hook to driver.
	1631	* BIOCGBLEN Get buffer len [for read()].
	1632	* BIOCSETF Set ethernet read filter.
	1633	* BIOCFLUSH Flush read packet buffer.
	1634	* BIOCPROMISC Put interface into promiscuous mode.
	1635	* BIOCGDLT Get link layer type.
	1636	* BIOCGETIF Get interface name.
	1637	* BIOCSETIF Set interface.
	1638	* BIOCSRTIMEOUT Set read timeout.
	1639	* BIOCGRTIMEOUT Get read timeout.
	1640	* BIOCGSTATS Get packet stats.
	1641	* BIOCIMMEDIATE Set immediate mode.
	1642	* BIOCVERSION Get filter language version.
	1643	* BIOCGHDRCMPLT Get "header already complete" flag
	1644	* BIOCSHDRCMPLT Set "header already complete" flag
	1645	* BIOCGSEESENT Get "see packets sent" flag
	1646	* BIOCSSEESENT Set "see packets sent" flag
	1647	* BIOCSETTC Set traffic class.
	1648	* BIOCGETTC Get traffic class.
	1649	* BIOCSEXTHDR Set "extended header" flag
	1650	* BIOCSHEADDROP Drop head of the buffer if user is not reading
	1651	* BIOCGHEADDROP Get "head-drop" flag
	1652	*/
	1653	/* ARGSUSED */
	1654	int
	1655	bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
	1656	struct proc *p)
	1657	{
	1658	struct bpf_d *d;
	1659	int error = 0;
	1660	u_int int_arg;
	1661	struct ifreq ifr;
	1662
	1663	lck_mtx_lock(bpf_mlock);
	1664
	1665	d = bpf_dtab[minor(dev)];
	1666	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
	1667	(d->bd_flags & BPF_CLOSING) != 0) {
	1668	lck_mtx_unlock(bpf_mlock);
	1669	return ENXIO;
	1670	}
	1671
	1672	bpf_acquire_d(d);
	1673
	1674	if (d->bd_state == BPF_WAITING) {
	1675	bpf_stop_timer(d);
	1676	}
	1677	d->bd_state = BPF_IDLE;
	1678
	1679	switch (cmd) {
	1680	default:
	1681	error = EINVAL;
	1682	break;
	1683
	1684	/*
	1685	* Check for read packet available.
	1686	*/
	1687	case FIONREAD: /* int */
	1688	{
	1689	int n;
	1690
	1691	n = d->bd_slen;
	1692	if (d->bd_hbuf && d->bd_hbuf_read == 0) {
	1693	n += d->bd_hlen;
	1694	}
	1695
	1696	bcopy(&n, addr, sizeof(n));
	1697	break;
	1698	}
	1699
	1700	case SIOCGIFADDR: /* struct ifreq */
	1701	{
	1702	struct ifnet *ifp;
	1703
	1704	if (d->bd_bif == 0) {
	1705	error = EINVAL;
	1706	} else {
	1707	ifp = d->bd_bif->bif_ifp;
	1708	error = ifnet_ioctl(ifp, 0, cmd, addr);
	1709	}
	1710	break;
	1711	}
	1712
	1713	/*
	1714	* Get buffer len [for read()].
	1715	*/
	1716	case BIOCGBLEN: /* u_int */
	1717	bcopy(&d->bd_bufsize, addr, sizeof(u_int));
	1718	break;
	1719
	1720	/*
	1721	* Set buffer length.
	1722	*/
	1723	case BIOCSBLEN: { /* u_int */
	1724	u_int size;
	1725	unsigned int maxbufsize = bpf_maxbufsize;
	1726
	1727	/*
	1728	* Allow larger buffer in head drop mode to with the
	1729	* assumption the reading process may be low priority but
	1730	* is interested in the most recent traffic
	1731	*/
	1732	if (d->bd_headdrop != 0) {
	1733	maxbufsize = 2 * bpf_maxbufsize;
	1734	}
	1735
	1736	if (d->bd_bif != 0 \|\| (d->bd_flags & BPF_DETACHING)) {
	1737	/*
	1738	* Interface already attached, unable to change buffers
	1739	*/
	1740	error = EINVAL;
	1741	break;
	1742	}
	1743	bcopy(addr, &size, sizeof(size));
	1744
	1745	if (size > maxbufsize) {
	1746	d->bd_bufsize = maxbufsize;
	1747
	1748	os_log_info(OS_LOG_DEFAULT,
	1749	"%s bufsize capped to %u from %u",
	1750	__func__, d->bd_bufsize, size);
	1751	} else if (size < BPF_MINBUFSIZE) {
	1752	d->bd_bufsize = BPF_MINBUFSIZE;
	1753
	1754	os_log_info(OS_LOG_DEFAULT,
	1755	"%s bufsize bumped to %u from %u",
	1756	__func__, d->bd_bufsize, size);
	1757	} else {
	1758	d->bd_bufsize = size;
	1759	}
	1760
	1761	/* It's a read/write ioctl */
	1762	bcopy(&d->bd_bufsize, addr, sizeof(u_int));
	1763	break;
	1764	}
	1765	/*
	1766	* Set link layer read filter.
	1767	*/
	1768	case BIOCSETF32:
	1769	case BIOCSETFNR32: { /* struct bpf_program32 */
	1770	struct bpf_program32 prg32;
	1771
	1772	bcopy(addr, &prg32, sizeof(prg32));
	1773	error = bpf_setf(d, prg32.bf_len,
	1774	CAST_USER_ADDR_T(prg32.bf_insns), cmd);
	1775	break;
	1776	}
	1777
	1778	case BIOCSETF64:
	1779	case BIOCSETFNR64: { /* struct bpf_program64 */
	1780	struct bpf_program64 prg64;
	1781
	1782	bcopy(addr, &prg64, sizeof(prg64));
	1783	error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
	1784	break;
	1785	}
	1786
	1787	/*
	1788	* Flush read packet buffer.
	1789	*/
	1790	case BIOCFLUSH:
	1791	while (d->bd_hbuf_read != 0) {
	1792	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
	1793	NULL);
	1794	}
	1795	if ((d->bd_flags & BPF_CLOSING) != 0) {
	1796	error = ENXIO;
	1797	break;
	1798	}
	1799	reset_d(d);
	1800	break;
	1801
	1802	/*
	1803	* Put interface into promiscuous mode.
	1804	*/
	1805	case BIOCPROMISC:
	1806	if (d->bd_bif == 0) {
	1807	/*
	1808	* No interface attached yet.
	1809	*/
	1810	error = EINVAL;
	1811	break;
	1812	}
	1813	if (d->bd_promisc == 0) {
	1814	lck_mtx_unlock(bpf_mlock);
	1815	error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
	1816	lck_mtx_lock(bpf_mlock);
	1817	if (error == 0) {
	1818	d->bd_promisc = 1;
	1819	}
	1820	}
	1821	break;
	1822
	1823	/*
	1824	* Get device parameters.
	1825	*/
	1826	case BIOCGDLT: /* u_int */
	1827	if (d->bd_bif == 0) {
	1828	error = EINVAL;
	1829	} else {
	1830	bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
	1831	}
	1832	break;
	1833
	1834	/*
	1835	* Get a list of supported data link types.
	1836	*/
	1837	case BIOCGDLTLIST: /* struct bpf_dltlist */
	1838	if (d->bd_bif == NULL) {
	1839	error = EINVAL;
	1840	} else {
	1841	error = bpf_getdltlist(d, addr, p);
	1842	}
	1843	break;
	1844
	1845	/*
	1846	* Set data link type.
	1847	*/
	1848	case BIOCSDLT: /* u_int */
	1849	if (d->bd_bif == NULL) {
	1850	error = EINVAL;
	1851	} else {
	1852	u_int dlt;
	1853
	1854	bcopy(addr, &dlt, sizeof(dlt));
	1855
	1856	if (dlt == DLT_PKTAP &&
	1857	!(d->bd_flags & BPF_WANT_PKTAP)) {
	1858	dlt = DLT_RAW;
	1859	}
	1860	error = bpf_setdlt(d, dlt);
	1861	}
	1862	break;
	1863
	1864	/*
	1865	* Get interface name.
	1866	*/
	1867	case BIOCGETIF: /* struct ifreq */
	1868	if (d->bd_bif == 0) {
	1869	error = EINVAL;
	1870	} else {
	1871	struct ifnet *const ifp = d->bd_bif->bif_ifp;
	1872
	1873	snprintf(((struct ifreq )(void )addr)->ifr_name,
	1874	sizeof(ifr.ifr_name), "%s", if_name(ifp));
	1875	}
	1876	break;
	1877
	1878	/*
	1879	* Set interface.
	1880	*/
	1881	case BIOCSETIF: { /* struct ifreq */
	1882	ifnet_t ifp;
	1883
	1884	bcopy(addr, &ifr, sizeof(ifr));
	1885	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
	1886	ifp = ifunit(ifr.ifr_name);
	1887	if (ifp == NULL) {
	1888	error = ENXIO;
	1889	} else {
	1890	error = bpf_setif(d, ifp, true, false);
	1891	}
	1892	break;
	1893	}
	1894
	1895	/*
	1896	* Set read timeout.
	1897	*/
	1898	case BIOCSRTIMEOUT32: { /* struct user32_timeval */
	1899	struct user32_timeval _tv;
	1900	struct timeval tv;
	1901
	1902	bcopy(addr, &_tv, sizeof(_tv));
	1903	tv.tv_sec = _tv.tv_sec;
	1904	tv.tv_usec = _tv.tv_usec;
	1905
	1906	/*
	1907	* Subtract 1 tick from tvtohz() since this isn't
	1908	* a one-shot timer.
	1909	*/
	1910	if ((error = itimerfix(&tv)) == 0) {
	1911	d->bd_rtout = tvtohz(&tv) - 1;
	1912	}
	1913	break;
	1914	}
	1915
	1916	case BIOCSRTIMEOUT64: { /* struct user64_timeval */
	1917	struct user64_timeval _tv;
	1918	struct timeval tv;
	1919
	1920	bcopy(addr, &_tv, sizeof(_tv));
	1921	tv.tv_sec = _tv.tv_sec;
	1922	tv.tv_usec = _tv.tv_usec;
	1923
	1924	/*
	1925	* Subtract 1 tick from tvtohz() since this isn't
	1926	* a one-shot timer.
	1927	*/
	1928	if ((error = itimerfix(&tv)) == 0) {
	1929	d->bd_rtout = tvtohz(&tv) - 1;
	1930	}
	1931	break;
	1932	}
	1933
	1934	/*
	1935	* Get read timeout.
	1936	*/
	1937	case BIOCGRTIMEOUT32: { /* struct user32_timeval */
	1938	struct user32_timeval tv;
	1939
	1940	bzero(&tv, sizeof(tv));
	1941	tv.tv_sec = d->bd_rtout / hz;
	1942	tv.tv_usec = (d->bd_rtout % hz) * tick;
	1943	bcopy(&tv, addr, sizeof(tv));
	1944	break;
	1945	}
	1946
	1947	case BIOCGRTIMEOUT64: { /* struct user64_timeval */
	1948	struct user64_timeval tv;
	1949
	1950	bzero(&tv, sizeof(tv));
	1951	tv.tv_sec = d->bd_rtout / hz;
	1952	tv.tv_usec = (d->bd_rtout % hz) * tick;
	1953	bcopy(&tv, addr, sizeof(tv));
	1954	break;
	1955	}
	1956
	1957	/*
	1958	* Get packet stats.
	1959	*/
	1960	case BIOCGSTATS: { /* struct bpf_stat */
	1961	struct bpf_stat bs;
	1962
	1963	bzero(&bs, sizeof(bs));
	1964	bs.bs_recv = d->bd_rcount;
	1965	bs.bs_drop = d->bd_dcount;
	1966	bcopy(&bs, addr, sizeof(bs));
	1967	break;
	1968	}
	1969
	1970	/*
	1971	* Set immediate mode.
	1972	*/
	1973	case BIOCIMMEDIATE: /* u_int */
	1974	d->bd_immediate = (u_int )(void *)addr;
	1975	break;
	1976
	1977	case BIOCVERSION: { /* struct bpf_version */
	1978	struct bpf_version bv;
	1979
	1980	bzero(&bv, sizeof(bv));
	1981	bv.bv_major = BPF_MAJOR_VERSION;
	1982	bv.bv_minor = BPF_MINOR_VERSION;
	1983	bcopy(&bv, addr, sizeof(bv));
	1984	break;
	1985	}
	1986
	1987	/*
	1988	* Get "header already complete" flag
	1989	*/
	1990	case BIOCGHDRCMPLT: /* u_int */
	1991	bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
	1992	break;
	1993
	1994	/*
	1995	* Set "header already complete" flag
	1996	*/
	1997	case BIOCSHDRCMPLT: /* u_int */
	1998	bcopy(addr, &int_arg, sizeof(int_arg));
	1999	d->bd_hdrcmplt = int_arg ? 1 : 0;
	2000	break;
	2001
	2002	/*
	2003	* Get "see sent packets" flag
	2004	*/
	2005	case BIOCGSEESENT: /* u_int */
	2006	bcopy(&d->bd_seesent, addr, sizeof(u_int));
	2007	break;
	2008
	2009	/*
	2010	* Set "see sent packets" flag
	2011	*/
	2012	case BIOCSSEESENT: /* u_int */
	2013	bcopy(addr, &d->bd_seesent, sizeof(u_int));
	2014	break;
	2015
	2016	/*
	2017	* Set traffic service class
	2018	*/
	2019	case BIOCSETTC: { /* int */
	2020	int tc;
	2021
	2022	bcopy(addr, &tc, sizeof(int));
	2023	error = bpf_set_traffic_class(d, tc);
	2024	break;
	2025	}
	2026
	2027	/*
	2028	* Get traffic service class
	2029	*/
	2030	case BIOCGETTC: /* int */
	2031	bcopy(&d->bd_traffic_class, addr, sizeof(int));
	2032	break;
	2033
	2034	case FIONBIO: /* Non-blocking I/O; int */
	2035	break;
	2036
	2037	case FIOASYNC: /* Send signal on receive packets; int */
	2038	bcopy(addr, &d->bd_async, sizeof(int));
	2039	break;
	2040	#ifndef __APPLE__
	2041	case FIOSETOWN:
	2042	error = fsetown((int )addr, &d->bd_sigio);
	2043	break;
	2044
	2045	case FIOGETOWN:
	2046	(int )addr = fgetown(d->bd_sigio);
	2047	break;
	2048
	2049	/* This is deprecated, FIOSETOWN should be used instead. */
	2050	case TIOCSPGRP:
	2051	error = fsetown(-((int )addr), &d->bd_sigio);
	2052	break;
	2053
	2054	/* This is deprecated, FIOGETOWN should be used instead. */
	2055	case TIOCGPGRP:
	2056	(int )addr = -fgetown(d->bd_sigio);
	2057	break;
	2058	#endif
	2059	case BIOCSRSIG: { /* Set receive signal; u_int */
	2060	u_int sig;
	2061
	2062	bcopy(addr, &sig, sizeof(u_int));
	2063
	2064	if (sig >= NSIG) {
	2065	error = EINVAL;
	2066	} else {
	2067	d->bd_sig = sig;
	2068	}
	2069	break;
	2070	}
	2071	case BIOCGRSIG: /* u_int */
	2072	bcopy(&d->bd_sig, addr, sizeof(u_int));
	2073	break;
	2074	#ifdef __APPLE__
	2075	case BIOCSEXTHDR: /* u_int */
	2076	bcopy(addr, &int_arg, sizeof(int_arg));
	2077	if (int_arg) {
	2078	d->bd_flags \|= BPF_EXTENDED_HDR;
	2079	} else {
	2080	d->bd_flags &= ~BPF_EXTENDED_HDR;
	2081	}
	2082	break;
	2083
	2084	case BIOCGIFATTACHCOUNT: { /* struct ifreq */
	2085	ifnet_t ifp;
	2086	struct bpf_if *bp;
	2087
	2088	bcopy(addr, &ifr, sizeof(ifr));
	2089	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
	2090	ifp = ifunit(ifr.ifr_name);
	2091	if (ifp == NULL) {
	2092	error = ENXIO;
	2093	break;
	2094	}
	2095	ifr.ifr_intval = 0;
	2096	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
	2097	struct bpf_d *bpf_d;
	2098
	2099	if (bp->bif_ifp == NULL \|\| bp->bif_ifp != ifp) {
	2100	continue;
	2101	}
	2102	for (bpf_d = bp->bif_dlist; bpf_d;
	2103	bpf_d = bpf_d->bd_next) {
	2104	ifr.ifr_intval += 1;
	2105	}
	2106	}
	2107	bcopy(&ifr, addr, sizeof(ifr));
	2108	break;
	2109	}
	2110	case BIOCGWANTPKTAP: /* u_int */
	2111	int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
	2112	bcopy(&int_arg, addr, sizeof(int_arg));
	2113	break;
	2114
	2115	case BIOCSWANTPKTAP: /* u_int */
	2116	bcopy(addr, &int_arg, sizeof(int_arg));
	2117	if (int_arg) {
	2118	d->bd_flags \|= BPF_WANT_PKTAP;
	2119	} else {
	2120	d->bd_flags &= ~BPF_WANT_PKTAP;
	2121	}
	2122	break;
	2123	#endif
	2124
	2125	case BIOCSHEADDROP:
	2126	bcopy(addr, &int_arg, sizeof(int_arg));
	2127	d->bd_headdrop = int_arg ? 1 : 0;
	2128	break;
	2129
	2130	case BIOCGHEADDROP:
	2131	bcopy(&d->bd_headdrop, addr, sizeof(int));
	2132	break;
	2133
	2134	case BIOCSTRUNCATE:
	2135	bcopy(addr, &int_arg, sizeof(int_arg));
	2136	if (int_arg) {
	2137	d->bd_flags \|= BPF_TRUNCATE;
	2138	} else {
	2139	d->bd_flags &= ~BPF_TRUNCATE;
	2140	}
	2141	break;
	2142
	2143	case BIOCGETUUID:
	2144	bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
	2145	break;
	2146
	2147	case BIOCSETUP: {
	2148	struct bpf_setup_args bsa;
	2149	ifnet_t ifp;
	2150
	2151	bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
	2152	bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
	2153	ifp = ifunit(bsa.bsa_ifname);
	2154	if (ifp == NULL) {
	2155	error = ENXIO;
	2156	os_log_info(OS_LOG_DEFAULT,
	2157	"%s: ifnet not found for %s error %d",
	2158	__func__, bsa.bsa_ifname, error);
	2159	break;
	2160	}
	2161
	2162	error = bpf_setup(d, bsa.bsa_uuid, ifp);
	2163	break;
	2164	}
	2165	case BIOCSPKTHDRV2:
	2166	bcopy(addr, &int_arg, sizeof(int_arg));
	2167	if (int_arg != 0) {
	2168	d->bd_flags \|= BPF_PKTHDRV2;
	2169	} else {
	2170	d->bd_flags &= ~BPF_PKTHDRV2;
	2171	}
	2172	break;
	2173
	2174	case BIOCGPKTHDRV2:
	2175	int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
	2176	bcopy(&int_arg, addr, sizeof(int));
	2177	break;
	2178	}
	2179
	2180	bpf_release_d(d);
	2181	lck_mtx_unlock(bpf_mlock);
	2182
	2183	return error;
	2184	}
	2185
	2186	/*
	2187	* Set d's packet filter program to fp. If this file already has a filter,
	2188	* free it and replace it. Returns EINVAL for bogus requests.
	2189	*/
	2190	static int
	2191	bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
	2192	u_long cmd)
	2193	{
	2194	struct bpf_insn fcode, old;
	2195	u_int flen, size;
	2196
	2197	while (d->bd_hbuf_read != 0) {
	2198	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
	2199	}
	2200
	2201	if ((d->bd_flags & BPF_CLOSING) != 0) {
	2202	return ENXIO;
	2203	}
	2204
	2205	old = d->bd_filter;
	2206	if (bf_insns == USER_ADDR_NULL) {
	2207	if (bf_len != 0) {
	2208	return EINVAL;
	2209	}
	2210	d->bd_filter = NULL;
	2211	reset_d(d);
	2212	if (old != 0) {
	2213	FREE(old, M_DEVBUF);
	2214	}
	2215	return 0;
	2216	}
	2217	flen = bf_len;
	2218	if (flen > BPF_MAXINSNS) {
	2219	return EINVAL;
	2220	}
	2221
	2222	size = flen * sizeof(struct bpf_insn);
	2223	fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
	2224	#ifdef __APPLE__
	2225	if (fcode == NULL) {
	2226	return ENOBUFS;
	2227	}
	2228	#endif
	2229	if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
	2230	bpf_validate(fcode, (int)flen)) {
	2231	d->bd_filter = fcode;
	2232
	2233	if (cmd == BIOCSETF32 \|\| cmd == BIOCSETF64) {
	2234	reset_d(d);
	2235	}
	2236
	2237	if (old != 0) {
	2238	FREE(old, M_DEVBUF);
	2239	}
	2240
	2241	return 0;
	2242	}
	2243	FREE(fcode, M_DEVBUF);
	2244	return EINVAL;
	2245	}
	2246
	2247	/*
	2248	* Detach a file from its current interface (if attached at all) and attach
	2249	* to the interface indicated by the name stored in ifr.
	2250	* Return an errno or 0.
	2251	*/
	2252	static int
	2253	bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read)
	2254	{
	2255	struct bpf_if *bp;
	2256	int error;
	2257
	2258	while (d->bd_hbuf_read != 0 && !has_hbuf_read) {
	2259	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
	2260	}
	2261
	2262	if ((d->bd_flags & BPF_CLOSING) != 0) {
	2263	return ENXIO;
	2264	}
	2265
	2266	/*
	2267	* Look through attached interfaces for the named one.
	2268	*/
	2269	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
	2270	struct ifnet *ifp = bp->bif_ifp;
	2271
	2272	if (ifp == 0 \|\| ifp != theywant) {
	2273	continue;
	2274	}
	2275	/*
	2276	* Do not use DLT_PKTAP, unless requested explicitly
	2277	*/
	2278	if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
	2279	continue;
	2280	}
	2281	/*
	2282	* Skip the coprocessor interface
	2283	*/
	2284	if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
	2285	continue;
	2286	}
	2287	/*
	2288	* We found the requested interface.
	2289	* Allocate the packet buffers.
	2290	*/
	2291	error = bpf_allocbufs(d);
	2292	if (error != 0) {
	2293	return error;
	2294	}
	2295	/*
	2296	* Detach if attached to something else.
	2297	*/
	2298	if (bp != d->bd_bif) {
	2299	if (d->bd_bif != NULL) {
	2300	if (bpf_detachd(d, 0) != 0) {
	2301	return ENXIO;
	2302	}
	2303	}
	2304	if (bpf_attachd(d, bp) != 0) {
	2305	return ENXIO;
	2306	}
	2307	}
	2308	if (do_reset) {
	2309	reset_d(d);
	2310	}
	2311	return 0;
	2312	}
	2313	/* Not found. */
	2314	return ENXIO;
	2315	}
	2316
	2317	/*
	2318	* Get a list of available data link type of the interface.
	2319	*/
	2320	static int
	2321	bpf_getdltlist(struct bpf_d d, caddr_t addr, struct proc p)
	2322	{
	2323	u_int n;
	2324	int error;
	2325	struct ifnet *ifp;
	2326	struct bpf_if *bp;
	2327	user_addr_t dlist;
	2328	struct bpf_dltlist bfl;
	2329
	2330	bcopy(addr, &bfl, sizeof(bfl));
	2331	if (proc_is64bit(p)) {
	2332	dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
	2333	} else {
	2334	dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
	2335	}
	2336
	2337	ifp = d->bd_bif->bif_ifp;
	2338	n = 0;
	2339	error = 0;
	2340
	2341	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
	2342	if (bp->bif_ifp != ifp) {
	2343	continue;
	2344	}
	2345	/*
	2346	* Do not use DLT_PKTAP, unless requested explicitly
	2347	*/
	2348	if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
	2349	continue;
	2350	}
	2351	if (dlist != USER_ADDR_NULL) {
	2352	if (n >= bfl.bfl_len) {
	2353	return ENOMEM;
	2354	}
	2355	error = copyout(&bp->bif_dlt, dlist,
	2356	sizeof(bp->bif_dlt));
	2357	if (error != 0) {
	2358	break;
	2359	}
	2360	dlist += sizeof(bp->bif_dlt);
	2361	}
	2362	n++;
	2363	}
	2364	bfl.bfl_len = n;
	2365	bcopy(&bfl, addr, sizeof(bfl));
	2366
	2367	return error;
	2368	}
	2369
	2370	/*
	2371	* Set the data link type of a BPF instance.
	2372	*/
	2373	static int
	2374	bpf_setdlt(struct bpf_d *d, uint32_t dlt)
	2375	{
	2376	int error, opromisc;
	2377	struct ifnet *ifp;
	2378	struct bpf_if *bp;
	2379
	2380	if (d->bd_bif->bif_dlt == dlt) {
	2381	return 0;
	2382	}
	2383
	2384	while (d->bd_hbuf_read != 0) {
	2385	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
	2386	}
	2387
	2388	if ((d->bd_flags & BPF_CLOSING) != 0) {
	2389	return ENXIO;
	2390	}
	2391
	2392	ifp = d->bd_bif->bif_ifp;
	2393	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
	2394	if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
	2395	/*
	2396	* Do not use DLT_PKTAP, unless requested explicitly
	2397	*/
	2398	if (bp->bif_dlt == DLT_PKTAP &&
	2399	!(d->bd_flags & BPF_WANT_PKTAP)) {
	2400	continue;
	2401	}
	2402	break;
	2403	}
	2404	}
	2405	if (bp != NULL) {
	2406	opromisc = d->bd_promisc;
	2407	if (bpf_detachd(d, 0) != 0) {
	2408	return ENXIO;
	2409	}
	2410	error = bpf_attachd(d, bp);
	2411	if (error) {
	2412	printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
	2413	ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp),
	2414	error);
	2415	return error;
	2416	}
	2417	reset_d(d);
	2418	if (opromisc) {
	2419	lck_mtx_unlock(bpf_mlock);
	2420	error = ifnet_set_promiscuous(bp->bif_ifp, 1);
	2421	lck_mtx_lock(bpf_mlock);
	2422	if (error) {
	2423	printf("%s: ifpromisc %s%d failed (%d)\n",
	2424	__func__, ifnet_name(bp->bif_ifp),
	2425	ifnet_unit(bp->bif_ifp), error);
	2426	} else {
	2427	d->bd_promisc = 1;
	2428	}
	2429	}
	2430	}
	2431	return bp == NULL ? EINVAL : 0;
	2432	}
	2433
	2434	static int
	2435	bpf_set_traffic_class(struct bpf_d *d, int tc)
	2436	{
	2437	int error = 0;
	2438
	2439	if (!SO_VALID_TC(tc)) {
	2440	error = EINVAL;
	2441	} else {
	2442	d->bd_traffic_class = tc;
	2443	}
	2444
	2445	return error;
	2446	}
	2447
	2448	static void
	2449	bpf_set_packet_service_class(struct mbuf *m, int tc)
	2450	{
	2451	if (!(m->m_flags & M_PKTHDR)) {
	2452	return;
	2453	}
	2454
	2455	VERIFY(SO_VALID_TC(tc));
	2456	(void) m_set_service_class(m, so_tc2msc(tc));
	2457	}
	2458
	2459	/*
	2460	* Support for select()
	2461	*
	2462	* Return true iff the specific operation will not block indefinitely.
	2463	* Otherwise, return false but make a note that a selwakeup() must be done.
	2464	*/
	2465	int
	2466	bpfselect(dev_t dev, int which, void * wql, struct proc *p)
	2467	{
	2468	struct bpf_d *d;
	2469	int ret = 0;
	2470
	2471	lck_mtx_lock(bpf_mlock);
	2472
	2473	d = bpf_dtab[minor(dev)];
	2474	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
	2475	(d->bd_flags & BPF_CLOSING) != 0) {
	2476	lck_mtx_unlock(bpf_mlock);
	2477	return ENXIO;
	2478	}
	2479
	2480	bpf_acquire_d(d);
	2481
	2482	if (d->bd_bif == NULL) {
	2483	bpf_release_d(d);
	2484	lck_mtx_unlock(bpf_mlock);
	2485	return ENXIO;
	2486	}
	2487
	2488	while (d->bd_hbuf_read != 0) {
	2489	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
	2490	}
	2491
	2492	if ((d->bd_flags & BPF_CLOSING) != 0) {
	2493	bpf_release_d(d);
	2494	lck_mtx_unlock(bpf_mlock);
	2495	return ENXIO;
	2496	}
	2497
	2498	switch (which) {
	2499	case FREAD:
	2500	if (d->bd_hlen != 0 \|\|
	2501	((d->bd_immediate \|\|
	2502	d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
	2503	ret = 1; /* read has data to return */
	2504	} else {
	2505	/*
	2506	* Read has no data to return.
	2507	* Make the select wait, and start a timer if
	2508	* necessary.
	2509	*/
	2510	selrecord(p, &d->bd_sel, wql);
	2511	bpf_start_timer(d);
	2512	}
	2513	break;
	2514
	2515	case FWRITE:
	2516	/* can't determine whether a write would block */
	2517	ret = 1;
	2518	break;
	2519	}
	2520
	2521	bpf_release_d(d);
	2522	lck_mtx_unlock(bpf_mlock);
	2523
	2524	return ret;
	2525	}
	2526
	2527	/*
	2528	* Support for kevent() system call. Register EVFILT_READ filters and
	2529	* reject all others.
	2530	*/
	2531	int bpfkqfilter(dev_t dev, struct knote *kn);
	2532	static void filt_bpfdetach(struct knote *);
	2533	static int filt_bpfread(struct knote *, long);
	2534	static int filt_bpftouch(struct knote kn, struct kevent_qos_s kev);
	2535	static int filt_bpfprocess(struct knote kn, struct kevent_qos_s kev);
	2536
	2537	SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
	2538	.f_isfd = 1,
	2539	.f_detach = filt_bpfdetach,
	2540	.f_event = filt_bpfread,
	2541	.f_touch = filt_bpftouch,
	2542	.f_process = filt_bpfprocess,
	2543	};
	2544
	2545	static int
	2546	filt_bpfread_common(struct knote kn, struct kevent_qos_s kev, struct bpf_d *d)
	2547	{
	2548	int ready = 0;
	2549	int64_t data = 0;
	2550
	2551	if (d->bd_immediate) {
	2552	/*
	2553	* If there's data in the hold buffer, it's the
	2554	* amount of data a read will return.
	2555	*
	2556	* If there's no data in the hold buffer, but
	2557	* there's data in the store buffer, a read will
	2558	* immediately rotate the store buffer to the
	2559	* hold buffer, the amount of data in the store
	2560	* buffer is the amount of data a read will
	2561	* return.
	2562	*
	2563	* If there's no data in either buffer, we're not
	2564	* ready to read.
	2565	*/
	2566	data = (d->bd_hlen == 0 \|\| d->bd_hbuf_read != 0 ?
	2567	d->bd_slen : d->bd_hlen);
	2568	int64_t lowwat = knote_low_watermark(kn);
	2569	if (lowwat > d->bd_bufsize) {
	2570	lowwat = d->bd_bufsize;
	2571	}
	2572	ready = (data >= lowwat);
	2573	} else {
	2574	/*
	2575	* If there's data in the hold buffer, it's the
	2576	* amount of data a read will return.
	2577	*
	2578	* If there's no data in the hold buffer, but
	2579	* there's data in the store buffer, if the
	2580	* timer has expired a read will immediately
	2581	* rotate the store buffer to the hold buffer,
	2582	* so the amount of data in the store buffer is
	2583	* the amount of data a read will return.
	2584	*
	2585	* If there's no data in either buffer, or there's
	2586	* no data in the hold buffer and the timer hasn't
	2587	* expired, we're not ready to read.
	2588	*/
	2589	data = ((d->bd_hlen == 0 \|\| d->bd_hbuf_read != 0) &&
	2590	d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
	2591	ready = (data > 0);
	2592	}
	2593	if (!ready) {
	2594	bpf_start_timer(d);
	2595	} else if (kev) {
	2596	knote_fill_kevent(kn, kev, data);
	2597	}
	2598
	2599	return ready;
	2600	}
	2601
	2602	int
	2603	bpfkqfilter(dev_t dev, struct knote *kn)
	2604	{
	2605	struct bpf_d *d;
	2606	int res;
	2607
	2608	/*
	2609	* Is this device a bpf?
	2610	*/
	2611	if (major(dev) != CDEV_MAJOR \|\| kn->kn_filter != EVFILT_READ) {
	2612	knote_set_error(kn, EINVAL);
	2613	return 0;
	2614	}
	2615
	2616	lck_mtx_lock(bpf_mlock);
	2617
	2618	d = bpf_dtab[minor(dev)];
	2619
	2620	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
	2621	(d->bd_flags & BPF_CLOSING) != 0 \|\|
	2622	d->bd_bif == NULL) {
	2623	lck_mtx_unlock(bpf_mlock);
	2624	knote_set_error(kn, ENXIO);
	2625	return 0;
	2626	}
	2627
	2628	kn->kn_hook = d;
	2629	kn->kn_filtid = EVFILTID_BPFREAD;
	2630	KNOTE_ATTACH(&d->bd_sel.si_note, kn);
	2631	d->bd_flags \|= BPF_KNOTE;
	2632
	2633	/* capture the current state */
	2634	res = filt_bpfread_common(kn, NULL, d);
	2635
	2636	lck_mtx_unlock(bpf_mlock);
	2637
	2638	return res;
	2639	}
	2640
	2641	static void
	2642	filt_bpfdetach(struct knote *kn)
	2643	{
	2644	struct bpf_d d = (struct bpf_d )kn->kn_hook;
	2645
	2646	lck_mtx_lock(bpf_mlock);
	2647	if (d->bd_flags & BPF_KNOTE) {
	2648	KNOTE_DETACH(&d->bd_sel.si_note, kn);
	2649	d->bd_flags &= ~BPF_KNOTE;
	2650	}
	2651	lck_mtx_unlock(bpf_mlock);
	2652	}
	2653
	2654	static int
	2655	filt_bpfread(struct knote *kn, long hint)
	2656	{
	2657	#pragma unused(hint)
	2658	struct bpf_d d = (struct bpf_d )kn->kn_hook;
	2659
	2660	return filt_bpfread_common(kn, NULL, d);
	2661	}
	2662
	2663	static int
	2664	filt_bpftouch(struct knote kn, struct kevent_qos_s kev)
	2665	{
	2666	struct bpf_d d = (struct bpf_d )kn->kn_hook;
	2667	int res;
	2668
	2669	lck_mtx_lock(bpf_mlock);
	2670
	2671	/* save off the lowat threshold and flag */
	2672	kn->kn_sdata = kev->data;
	2673	kn->kn_sfflags = kev->fflags;
	2674
	2675	/* output data will be re-generated here */
	2676	res = filt_bpfread_common(kn, NULL, d);
	2677
	2678	lck_mtx_unlock(bpf_mlock);
	2679
	2680	return res;
	2681	}
	2682
	2683	static int
	2684	filt_bpfprocess(struct knote kn, struct kevent_qos_s kev)
	2685	{
	2686	struct bpf_d d = (struct bpf_d )kn->kn_hook;
	2687	int res;
	2688
	2689	lck_mtx_lock(bpf_mlock);
	2690	res = filt_bpfread_common(kn, kev, d);
	2691	lck_mtx_unlock(bpf_mlock);
	2692
	2693	return res;
	2694	}
	2695
	2696	/*
	2697	* Copy data from an mbuf chain into a buffer. This code is derived
	2698	* from m_copydata in kern/uipc_mbuf.c.
	2699	*/
	2700	static void
	2701	bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
	2702	{
	2703	u_int count;
	2704	u_char *dst;
	2705
	2706	dst = dst_arg;
	2707	while (len > 0) {
	2708	if (m == 0) {
	2709	panic("bpf_mcopy");
	2710	}
	2711	count = min(m->m_len, len);
	2712	bcopy(mbuf_data(m), dst, count);
	2713	m = m->m_next;
	2714	dst += count;
	2715	len -= count;
	2716	}
	2717	}
	2718
	2719	static inline void
	2720	bpf_tap_imp(
	2721	ifnet_t ifp,
	2722	u_int32_t dlt,
	2723	struct bpf_packet *bpf_pkt,
	2724	int outbound)
	2725	{
	2726	struct bpf_d *d;
	2727	u_int slen;
	2728	struct bpf_if *bp;
	2729
	2730	/*
	2731	* It's possible that we get here after the bpf descriptor has been
	2732	* detached from the interface; in such a case we simply return.
	2733	* Lock ordering is important since we can be called asynchronously
	2734	* (from IOKit) to process an inbound packet; when that happens
	2735	* we would have been holding its "gateLock" and will be acquiring
	2736	* "bpf_mlock" upon entering this routine. Due to that, we release
	2737	* "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
	2738	* acquire "gateLock" in the IOKit), in order to avoid a deadlock
	2739	* when a ifnet_set_promiscuous request simultaneously collides with
	2740	* an inbound packet being passed into the tap callback.
	2741	*/
	2742	lck_mtx_lock(bpf_mlock);
	2743	if (ifp->if_bpf == NULL) {
	2744	lck_mtx_unlock(bpf_mlock);
	2745	return;
	2746	}
	2747	for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
	2748	if (bp->bif_ifp != ifp) {
	2749	/* wrong interface */
	2750	bp = NULL;
	2751	break;
	2752	}
	2753	if (dlt == 0 \|\| bp->bif_dlt == dlt) {
	2754	/* tapping default DLT or DLT matches */
	2755	break;
	2756	}
	2757	}
	2758	if (bp == NULL) {
	2759	goto done;
	2760	}
	2761	for (d = bp->bif_dlist; d; d = d->bd_next) {
	2762	struct bpf_packet *bpf_pkt_saved = bpf_pkt;
	2763	struct bpf_packet bpf_pkt_tmp;
	2764	struct pktap_header_buffer bpfp_header_tmp;
	2765
	2766	if (outbound && !d->bd_seesent) {
	2767	continue;
	2768	}
	2769
	2770	++d->bd_rcount;
	2771	slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
	2772	bpf_pkt->bpfp_total_length, 0);
	2773	if (bp->bif_ifp->if_type == IFT_PKTAP &&
	2774	bp->bif_dlt == DLT_PKTAP) {
	2775	/*
	2776	* Need to copy the bpf_pkt because the conversion
	2777	* to v2 pktap header modifies the content of the
	2778	* bpfp_header
	2779	*/
	2780	if ((d->bd_flags & BPF_PKTHDRV2) &&
	2781	bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
	2782	bpf_pkt_tmp = *bpf_pkt;
	2783
	2784	bpf_pkt = &bpf_pkt_tmp;
	2785
	2786	memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
	2787	bpf_pkt->bpfp_header_length);
	2788
	2789	bpf_pkt->bpfp_header = &bpfp_header_tmp;
	2790
	2791	convert_to_pktap_header_to_v2(bpf_pkt,
	2792	!!(d->bd_flags & BPF_TRUNCATE));
	2793	}
	2794
	2795	if (d->bd_flags & BPF_TRUNCATE) {
	2796	slen = min(slen,
	2797	get_pkt_trunc_len((u_char *)bpf_pkt,
	2798	bpf_pkt->bpfp_total_length));
	2799	}
	2800	}
	2801	if (slen != 0) {
	2802	#if CONFIG_MACF_NET
	2803	if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0) {
	2804	continue;
	2805	}
	2806	#endif
	2807	catchpacket(d, bpf_pkt, slen, outbound);
	2808	}
	2809	bpf_pkt = bpf_pkt_saved;
	2810	}
	2811
	2812	done:
	2813	lck_mtx_unlock(bpf_mlock);
	2814	}
	2815
	2816	static inline void
	2817	bpf_tap_mbuf(
	2818	ifnet_t ifp,
	2819	u_int32_t dlt,
	2820	mbuf_t m,
	2821	void* hdr,
	2822	size_t hlen,
	2823	int outbound)
	2824	{
	2825	struct bpf_packet bpf_pkt;
	2826	struct mbuf *m0;
	2827
	2828	if (ifp->if_bpf == NULL) {
	2829	/* quickly check without taking lock */
	2830	return;
	2831	}
	2832	bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
	2833	bpf_pkt.bpfp_mbuf = m;
	2834	bpf_pkt.bpfp_total_length = 0;
	2835	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
	2836	bpf_pkt.bpfp_total_length += m0->m_len;
	2837	}
	2838	bpf_pkt.bpfp_header = hdr;
	2839	if (hdr != NULL) {
	2840	bpf_pkt.bpfp_total_length += hlen;
	2841	bpf_pkt.bpfp_header_length = hlen;
	2842	} else {
	2843	bpf_pkt.bpfp_header_length = 0;
	2844	}
	2845	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
	2846	}
	2847
	2848	void
	2849	bpf_tap_out(
	2850	ifnet_t ifp,
	2851	u_int32_t dlt,
	2852	mbuf_t m,
	2853	void* hdr,
	2854	size_t hlen)
	2855	{
	2856	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
	2857	}
	2858
	2859	void
	2860	bpf_tap_in(
	2861	ifnet_t ifp,
	2862	u_int32_t dlt,
	2863	mbuf_t m,
	2864	void* hdr,
	2865	size_t hlen)
	2866	{
	2867	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
	2868	}
	2869
	2870	/* Callback registered with Ethernet driver. */
	2871	static int
	2872	bpf_tap_callback(struct ifnet ifp, struct mbuf m)
	2873	{
	2874	bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
	2875
	2876	return 0;
	2877	}
	2878
	2879
	2880	static errno_t
	2881	bpf_copydata(struct bpf_packet pkt, size_t off, size_t len, void out_data)
	2882	{
	2883	errno_t err = 0;
	2884	if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
	2885	err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
	2886	} else {
	2887	err = EINVAL;
	2888	}
	2889
	2890	return err;
	2891	}
	2892
	2893	static void
	2894	copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
	2895	{
	2896	/* copy the optional header */
	2897	if (pkt->bpfp_header_length != 0) {
	2898	size_t count = min(len, pkt->bpfp_header_length);
	2899	bcopy(pkt->bpfp_header, dst, count);
	2900	len -= count;
	2901	dst += count;
	2902	}
	2903	if (len == 0) {
	2904	/* nothing past the header */
	2905	return;
	2906	}
	2907	/* copy the packet */
	2908	switch (pkt->bpfp_type) {
	2909	case BPF_PACKET_TYPE_MBUF:
	2910	bpf_mcopy(pkt->bpfp_mbuf, dst, len);
	2911	break;
	2912	default:
	2913	break;
	2914	}
	2915	}
	2916
	2917	static uint16_t
	2918	get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
	2919	const uint16_t remaining_caplen)
	2920	{
	2921	/*
	2922	* For some reason tcpdump expects to have one byte beyond the ESP header
	2923	*/
	2924	uint16_t trunc_len = ESP_HDR_SIZE + 1;
	2925
	2926	if (trunc_len > remaining_caplen) {
	2927	return remaining_caplen;
	2928	}
	2929
	2930	return trunc_len;
	2931	}
	2932
	2933	static uint16_t
	2934	get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
	2935	const uint16_t remaining_caplen)
	2936	{
	2937	/*
	2938	* Include the payload generic header
	2939	*/
	2940	uint16_t trunc_len = ISAKMP_HDR_SIZE;
	2941
	2942	if (trunc_len > remaining_caplen) {
	2943	return remaining_caplen;
	2944	}
	2945
	2946	return trunc_len;
	2947	}
	2948
	2949	static uint16_t
	2950	get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint16_t off,
	2951	const uint16_t remaining_caplen)
	2952	{
	2953	int err = 0;
	2954	uint16_t trunc_len = 0;
	2955	char payload[remaining_caplen];
	2956
	2957	err = bpf_copydata(pkt, off, remaining_caplen, payload);
	2958	if (err != 0) {
	2959	return remaining_caplen;
	2960	}
	2961	/*
	2962	* They are three cases:
	2963	* - IKE: payload start with 4 bytes header set to zero before ISAKMP header
	2964	* - keep alive: 1 byte payload
	2965	* - otherwise it's ESP
	2966	*/
	2967	if (remaining_caplen >= 4 &&
	2968	payload[0] == 0 && payload[1] == 0 &&
	2969	payload[2] == 0 && payload[3] == 0) {
	2970	trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
	2971	} else if (remaining_caplen == 1) {
	2972	trunc_len = 1;
	2973	} else {
	2974	trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
	2975	}
	2976
	2977	if (trunc_len > remaining_caplen) {
	2978	return remaining_caplen;
	2979	}
	2980
	2981	return trunc_len;
	2982	}
	2983
	2984	static uint16_t
	2985	get_udp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	2986	{
	2987	int err = 0;
	2988	uint16_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
	2989
	2990	if (trunc_len >= remaining_caplen) {
	2991	return remaining_caplen;
	2992	}
	2993
	2994	struct udphdr udphdr;
	2995	err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
	2996	if (err != 0) {
	2997	return remaining_caplen;
	2998	}
	2999
	3000	u_short sport, dport;
	3001
	3002	sport = EXTRACT_SHORT(&udphdr.uh_sport);
	3003	dport = EXTRACT_SHORT(&udphdr.uh_dport);
	3004
	3005	if (dport == PORT_DNS \|\| sport == PORT_DNS) {
	3006	/*
	3007	* Full UDP payload for DNS
	3008	*/
	3009	trunc_len = remaining_caplen;
	3010	} else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) \|\|
	3011	(sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
	3012	/*
	3013	* Full UDP payload for BOOTP and DHCP
	3014	*/
	3015	trunc_len = remaining_caplen;
	3016	} else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
	3017	/*
	3018	* Return the ISAKMP header
	3019	*/
	3020	trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
	3021	remaining_caplen - sizeof(struct udphdr));
	3022	} else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
	3023	trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
	3024	remaining_caplen - sizeof(struct udphdr));
	3025	}
	3026	if (trunc_len >= remaining_caplen) {
	3027	return remaining_caplen;
	3028	}
	3029
	3030	return trunc_len;
	3031	}
	3032
	3033	static uint16_t
	3034	get_tcp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	3035	{
	3036	int err = 0;
	3037	uint16_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
	3038	if (trunc_len >= remaining_caplen) {
	3039	return remaining_caplen;
	3040	}
	3041
	3042	struct tcphdr tcphdr;
	3043	err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
	3044	if (err != 0) {
	3045	return remaining_caplen;
	3046	}
	3047
	3048	u_short sport, dport;
	3049	sport = EXTRACT_SHORT(&tcphdr.th_sport);
	3050	dport = EXTRACT_SHORT(&tcphdr.th_dport);
	3051
	3052	if (dport == PORT_DNS \|\| sport == PORT_DNS) {
	3053	/*
	3054	* Full TCP payload for DNS
	3055	*/
	3056	trunc_len = remaining_caplen;
	3057	} else {
	3058	trunc_len = tcphdr.th_off << 2;
	3059	}
	3060	if (trunc_len >= remaining_caplen) {
	3061	return remaining_caplen;
	3062	}
	3063
	3064	return trunc_len;
	3065	}
	3066
	3067	static uint16_t
	3068	get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	3069	{
	3070	uint16_t trunc_len;
	3071
	3072	switch (proto) {
	3073	case IPPROTO_ICMP: {
	3074	/*
	3075	* Full IMCP payload
	3076	*/
	3077	trunc_len = remaining_caplen;
	3078	break;
	3079	}
	3080	case IPPROTO_ICMPV6: {
	3081	/*
	3082	* Full IMCPV6 payload
	3083	*/
	3084	trunc_len = remaining_caplen;
	3085	break;
	3086	}
	3087	case IPPROTO_IGMP: {
	3088	/*
	3089	* Full IGMP payload
	3090	*/
	3091	trunc_len = remaining_caplen;
	3092	break;
	3093	}
	3094	case IPPROTO_UDP: {
	3095	trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
	3096	break;
	3097	}
	3098	case IPPROTO_TCP: {
	3099	trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
	3100	break;
	3101	}
	3102	case IPPROTO_ESP: {
	3103	trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
	3104	break;
	3105	}
	3106	default: {
	3107	/*
	3108	* By default we only include the IP header
	3109	*/
	3110	trunc_len = 0;
	3111	break;
	3112	}
	3113	}
	3114	if (trunc_len >= remaining_caplen) {
	3115	return remaining_caplen;
	3116	}
	3117
	3118	return trunc_len;
	3119	}
	3120
	3121	static uint16_t
	3122	get_ip_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	3123	{
	3124	int err = 0;
	3125	uint16_t iplen = sizeof(struct ip);
	3126	if (iplen >= remaining_caplen) {
	3127	return remaining_caplen;
	3128	}
	3129
	3130	struct ip iphdr;
	3131	err = bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
	3132	if (err != 0) {
	3133	return remaining_caplen;
	3134	}
	3135
	3136	uint8_t proto = 0;
	3137
	3138	iplen = iphdr.ip_hl << 2;
	3139	if (iplen >= remaining_caplen) {
	3140	return remaining_caplen;
	3141	}
	3142
	3143	proto = iphdr.ip_p;
	3144	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
	3145
	3146	if (iplen >= remaining_caplen) {
	3147	return remaining_caplen;
	3148	}
	3149
	3150	return iplen;
	3151	}
	3152
	3153	static uint16_t
	3154	get_ip6_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
	3155	{
	3156	int err = 0;
	3157	uint16_t iplen = sizeof(struct ip6_hdr);
	3158	if (iplen >= remaining_caplen) {
	3159	return remaining_caplen;
	3160	}
	3161
	3162	struct ip6_hdr ip6hdr;
	3163	err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
	3164	if (err != 0) {
	3165	return remaining_caplen;
	3166	}
	3167
	3168	uint8_t proto = 0;
	3169
	3170	/*
	3171	* TBD: process the extension headers
	3172	*/
	3173	proto = ip6hdr.ip6_nxt;
	3174	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
	3175
	3176	if (iplen >= remaining_caplen) {
	3177	return remaining_caplen;
	3178	}
	3179
	3180	return iplen;
	3181	}
	3182
	3183	static uint16_t
	3184	get_ether_trunc_len(struct bpf_packet *pkt, int off, const uint16_t remaining_caplen)
	3185	{
	3186	int err = 0;
	3187	uint16_t ethlen = sizeof(struct ether_header);
	3188	if (ethlen >= remaining_caplen) {
	3189	return remaining_caplen;
	3190	}
	3191
	3192	struct ether_header eh;
	3193	u_short type;
	3194	err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
	3195	if (err != 0) {
	3196	return remaining_caplen;
	3197	}
	3198
	3199	type = EXTRACT_SHORT(&eh.ether_type);
	3200	/* Include full ARP */
	3201	if (type == ETHERTYPE_ARP) {
	3202	ethlen = remaining_caplen;
	3203	} else if (type != ETHERTYPE_IP && type != ETHERTYPE_IPV6) {
	3204	ethlen = min(BPF_MIN_PKT_SIZE, remaining_caplen);
	3205	} else {
	3206	if (type == ETHERTYPE_IP) {
	3207	ethlen += get_ip_trunc_len(pkt, sizeof(struct ether_header),
	3208	remaining_caplen);
	3209	} else if (type == ETHERTYPE_IPV6) {
	3210	ethlen += get_ip6_trunc_len(pkt, sizeof(struct ether_header),
	3211	remaining_caplen);
	3212	}
	3213	}
	3214	return ethlen;
	3215	}
	3216
	3217	static uint32_t
	3218	get_pkt_trunc_len(u_char *p, u_int len)
	3219	{
	3220	struct bpf_packet pkt = (struct bpf_packet )(void *) p;
	3221	struct pktap_header pktap = (struct pktap_header ) (pkt->bpfp_header);
	3222	uint32_t out_pkt_len = 0, tlen = 0;
	3223	/*
	3224	* pktap->pth_frame_pre_length is L2 header length and accounts
	3225	* for both pre and pre_adjust.
	3226	* pktap->pth_length is sizeof(pktap_header) (excl the pre/pre_adjust)
	3227	* pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
	3228	* pre is the offset to the L3 header after the bpfp_header, or length
	3229	* of L2 header after bpfp_header, if present.
	3230	*/
	3231	int32_t pre = pktap->pth_frame_pre_length -
	3232	(pkt->bpfp_header_length - pktap->pth_length);
	3233
	3234	/* Length of the input packet starting from L3 header */
	3235	uint32_t in_pkt_len = len - pkt->bpfp_header_length - pre;
	3236	if (pktap->pth_protocol_family == AF_INET \|\|
	3237	pktap->pth_protocol_family == AF_INET6) {
	3238	/* Contains L2 header */
	3239	if (pre > 0) {
	3240	if (pre < (int32_t)sizeof(struct ether_header)) {
	3241	goto too_short;
	3242	}
	3243
	3244	out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
	3245	} else if (pre == 0) {
	3246	if (pktap->pth_protocol_family == AF_INET) {
	3247	out_pkt_len = get_ip_trunc_len(pkt, pre, in_pkt_len);
	3248	} else if (pktap->pth_protocol_family == AF_INET6) {
	3249	out_pkt_len = get_ip6_trunc_len(pkt, pre, in_pkt_len);
	3250	}
	3251	} else {
	3252	/* Ideally pre should be >= 0. This is an exception */
	3253	out_pkt_len = min(BPF_MIN_PKT_SIZE, in_pkt_len);
	3254	}
	3255	} else {
	3256	if (pktap->pth_iftype == IFT_ETHER) {
	3257	if (in_pkt_len < sizeof(struct ether_header)) {
	3258	goto too_short;
	3259	}
	3260	/* At most include the Ethernet header and 16 bytes */
	3261	out_pkt_len = MIN(sizeof(struct ether_header) + 16,
	3262	in_pkt_len);
	3263	} else {
	3264	/*
	3265	* For unknown protocols include at most 16 bytes
	3266	*/
	3267	out_pkt_len = MIN(16, in_pkt_len);
	3268	}
	3269	}
	3270	done:
	3271	tlen = pkt->bpfp_header_length + out_pkt_len + pre;
	3272	return tlen;
	3273	too_short:
	3274	out_pkt_len = in_pkt_len;
	3275	goto done;
	3276	}
	3277
	3278	/*
	3279	* Move the packet data from interface memory (pkt) into the
	3280	* store buffer. Return 1 if it's time to wakeup a listener (buffer full),
	3281	* otherwise 0.
	3282	*/
	3283	static void
	3284	catchpacket(struct bpf_d d, struct bpf_packet pkt,
	3285	u_int snaplen, int outbound)
	3286	{
	3287	struct bpf_hdr *hp;
	3288	struct bpf_hdr_ext *ehp;
	3289	int totlen, curlen;
	3290	int hdrlen, caplen;
	3291	int do_wakeup = 0;
	3292	u_char *payload;
	3293	struct timeval tv;
	3294
	3295	hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
	3296	d->bd_bif->bif_hdrlen;
	3297	/*
	3298	* Figure out how many bytes to move. If the packet is
	3299	* greater or equal to the snapshot length, transfer that
	3300	* much. Otherwise, transfer the whole packet (unless
	3301	* we hit the buffer size limit).
	3302	*/
	3303	totlen = hdrlen + min(snaplen, pkt->bpfp_total_length);
	3304	if (totlen > d->bd_bufsize) {
	3305	totlen = d->bd_bufsize;
	3306	}
	3307
	3308	if (hdrlen > totlen) {
	3309	return;
	3310	}
	3311
	3312	/*
	3313	* Round up the end of the previous packet to the next longword.
	3314	*/
	3315	curlen = BPF_WORDALIGN(d->bd_slen);
	3316	if (curlen + totlen > d->bd_bufsize) {
	3317	/*
	3318	* This packet will overflow the storage buffer.
	3319	* Rotate the buffers if we can, then wakeup any
	3320	* pending reads.
	3321	*
	3322	* We cannot rotate buffers if a read is in progress
	3323	* so drop the packet
	3324	*/
	3325	if (d->bd_hbuf_read != 0) {
	3326	++d->bd_dcount;
	3327	return;
	3328	}
	3329
	3330	if (d->bd_fbuf == NULL) {
	3331	if (d->bd_headdrop == 0) {
	3332	/*
	3333	* We haven't completed the previous read yet,
	3334	* so drop the packet.
	3335	*/
	3336	++d->bd_dcount;
	3337	return;
	3338	}
	3339	/*
	3340	* Drop the hold buffer as it contains older packets
	3341	*/
	3342	d->bd_dcount += d->bd_hcnt;
	3343	d->bd_fbuf = d->bd_hbuf;
	3344	ROTATE_BUFFERS(d);
	3345	} else {
	3346	ROTATE_BUFFERS(d);
	3347	}
	3348	do_wakeup = 1;
	3349	curlen = 0;
	3350	} else if (d->bd_immediate \|\| d->bd_state == BPF_TIMED_OUT) {
	3351	/*
	3352	* Immediate mode is set, or the read timeout has
	3353	* already expired during a select call. A packet
	3354	* arrived, so the reader should be woken up.
	3355	*/
	3356	do_wakeup = 1;
	3357	}
	3358
	3359	/*
	3360	* Append the bpf header.
	3361	*/
	3362	microtime(&tv);
	3363	if (d->bd_flags & BPF_EXTENDED_HDR) {
	3364	struct mbuf *m;
	3365
	3366	m = (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF)
	3367	? pkt->bpfp_mbuf : NULL;
	3368	ehp = (struct bpf_hdr_ext )(void )(d->bd_sbuf + curlen);
	3369	memset(ehp, 0, sizeof(*ehp));
	3370	ehp->bh_tstamp.tv_sec = tv.tv_sec;
	3371	ehp->bh_tstamp.tv_usec = tv.tv_usec;
	3372
	3373	ehp->bh_datalen = pkt->bpfp_total_length;
	3374	ehp->bh_hdrlen = hdrlen;
	3375	caplen = ehp->bh_caplen = totlen - hdrlen;
	3376	if (m == NULL) {
	3377	if (outbound) {
	3378	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_OUT;
	3379	} else {
	3380	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_IN;
	3381	}
	3382	} else if (outbound) {
	3383	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_OUT;
	3384
	3385	/* only do lookups on non-raw INPCB */
	3386	if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID \|
	3387	PKTF_FLOW_LOCALSRC \| PKTF_FLOW_RAWSOCK)) ==
	3388	(PKTF_FLOW_ID \| PKTF_FLOW_LOCALSRC) &&
	3389	m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
	3390	ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
	3391	ehp->bh_proto = m->m_pkthdr.pkt_proto;
	3392	}
	3393	ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
	3394	if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
	3395	ehp->bh_pktflags \|= BPF_PKTFLAGS_TCP_REXMT;
	3396	}
	3397	if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
	3398	ehp->bh_pktflags \|= BPF_PKTFLAGS_START_SEQ;
	3399	}
	3400	if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
	3401	ehp->bh_pktflags \|= BPF_PKTFLAGS_LAST_PKT;
	3402	}
	3403	if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
	3404	ehp->bh_unsent_bytes =
	3405	m->m_pkthdr.bufstatus_if;
	3406	ehp->bh_unsent_snd =
	3407	m->m_pkthdr.bufstatus_sndbuf;
	3408	}
	3409	} else {
	3410	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_IN;
	3411	}
	3412	payload = (u_char *)ehp + hdrlen;
	3413	} else {
	3414	hp = (struct bpf_hdr )(void )(d->bd_sbuf + curlen);
	3415	hp->bh_tstamp.tv_sec = tv.tv_sec;
	3416	hp->bh_tstamp.tv_usec = tv.tv_usec;
	3417	hp->bh_datalen = pkt->bpfp_total_length;
	3418	hp->bh_hdrlen = hdrlen;
	3419	caplen = hp->bh_caplen = totlen - hdrlen;
	3420	payload = (u_char *)hp + hdrlen;
	3421	}
	3422	/*
	3423	* Copy the packet data into the store buffer and update its length.
	3424	*/
	3425	copy_bpf_packet(pkt, payload, caplen);
	3426	d->bd_slen = curlen + totlen;
	3427	d->bd_scnt += 1;
	3428
	3429	if (do_wakeup) {
	3430	bpf_wakeup(d);
	3431	}
	3432	}
	3433
	3434	/*
	3435	* Initialize all nonzero fields of a descriptor.
	3436	*/
	3437	static int
	3438	bpf_allocbufs(struct bpf_d *d)
	3439	{
	3440	if (d->bd_sbuf != NULL) {
	3441	FREE(d->bd_sbuf, M_DEVBUF);
	3442	d->bd_sbuf = NULL;
	3443	}
	3444	if (d->bd_hbuf != NULL) {
	3445	FREE(d->bd_hbuf, M_DEVBUF);
	3446	d->bd_hbuf = NULL;
	3447	}
	3448	if (d->bd_fbuf != NULL) {
	3449	FREE(d->bd_fbuf, M_DEVBUF);
	3450	d->bd_fbuf = NULL;
	3451	}
	3452
	3453	d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
	3454	if (d->bd_fbuf == NULL) {
	3455	return ENOBUFS;
	3456	}
	3457
	3458	d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
	3459	if (d->bd_sbuf == NULL) {
	3460	FREE(d->bd_fbuf, M_DEVBUF);
	3461	d->bd_fbuf = NULL;
	3462	return ENOBUFS;
	3463	}
	3464	d->bd_slen = 0;
	3465	d->bd_hlen = 0;
	3466	d->bd_scnt = 0;
	3467	d->bd_hcnt = 0;
	3468	return 0;
	3469	}
	3470
	3471	/*
	3472	* Free buffers currently in use by a descriptor.
	3473	* Called on close.
	3474	*/
	3475	static void
	3476	bpf_freed(struct bpf_d *d)
	3477	{
	3478	/*
	3479	* We don't need to lock out interrupts since this descriptor has
	3480	* been detached from its interface and it yet hasn't been marked
	3481	* free.
	3482	*/
	3483	if (d->bd_hbuf_read != 0) {
	3484	panic("bpf buffer freed during read");
	3485	}
	3486
	3487	if (d->bd_sbuf != 0) {
	3488	FREE(d->bd_sbuf, M_DEVBUF);
	3489	if (d->bd_hbuf != 0) {
	3490	FREE(d->bd_hbuf, M_DEVBUF);
	3491	}
	3492	if (d->bd_fbuf != 0) {
	3493	FREE(d->bd_fbuf, M_DEVBUF);
	3494	}
	3495	}
	3496	if (d->bd_filter) {
	3497	FREE(d->bd_filter, M_DEVBUF);
	3498	}
	3499	}
	3500
	3501	/*
	3502	* Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
	3503	* in the driver's softc; dlt is the link layer type; hdrlen is the fixed
	3504	* size of the link header (variable length headers not yet supported).
	3505	*/
	3506	void
	3507	bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
	3508	{
	3509	bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
	3510	}
	3511
	3512	errno_t
	3513	bpf_attach(
	3514	ifnet_t ifp,
	3515	u_int32_t dlt,
	3516	u_int32_t hdrlen,
	3517	bpf_send_func send,
	3518	bpf_tap_func tap)
	3519	{
	3520	struct bpf_if *bp;
	3521	struct bpf_if *bp_new;
	3522	struct bpf_if *bp_before_first = NULL;
	3523	struct bpf_if *bp_first = NULL;
	3524	struct bpf_if *bp_last = NULL;
	3525	boolean_t found;
	3526
	3527	bp_new = (struct bpf_if ) _MALLOC(sizeof(bp_new), M_DEVBUF,
	3528	M_WAIT \| M_ZERO);
	3529	if (bp_new == 0) {
	3530	panic("bpfattach");
	3531	}
	3532
	3533	lck_mtx_lock(bpf_mlock);
	3534
	3535	/*
	3536	* Check if this interface/dlt is already attached. Remember the
	3537	* first and last attachment for this interface, as well as the
	3538	* element before the first attachment.
	3539	*/
	3540	found = FALSE;
	3541	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
	3542	if (bp->bif_ifp != ifp) {
	3543	if (bp_first != NULL) {
	3544	/* no more elements for this interface */
	3545	break;
	3546	}
	3547	bp_before_first = bp;
	3548	} else {
	3549	if (bp->bif_dlt == dlt) {
	3550	found = TRUE;
	3551	break;
	3552	}
	3553	if (bp_first == NULL) {
	3554	bp_first = bp;
	3555	}
	3556	bp_last = bp;
	3557	}
	3558	}
	3559	if (found) {
	3560	lck_mtx_unlock(bpf_mlock);
	3561	printf("bpfattach - %s with dlt %d is already attached\n",
	3562	if_name(ifp), dlt);
	3563	FREE(bp_new, M_DEVBUF);
	3564	return EEXIST;
	3565	}
	3566
	3567	bp_new->bif_ifp = ifp;
	3568	bp_new->bif_dlt = dlt;
	3569	bp_new->bif_send = send;
	3570	bp_new->bif_tap = tap;
	3571
	3572	if (bp_first == NULL) {
	3573	/* No other entries for this ifp */
	3574	bp_new->bif_next = bpf_iflist;
	3575	bpf_iflist = bp_new;
	3576	} else {
	3577	if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
	3578	/* Make this the first entry for this interface */
	3579	if (bp_before_first != NULL) {
	3580	/* point the previous to us */
	3581	bp_before_first->bif_next = bp_new;
	3582	} else {
	3583	/* we're the new head */
	3584	bpf_iflist = bp_new;
	3585	}
	3586	bp_new->bif_next = bp_first;
	3587	} else {
	3588	/* Add this after the last entry for this interface */
	3589	bp_new->bif_next = bp_last->bif_next;
	3590	bp_last->bif_next = bp_new;
	3591	}
	3592	}
	3593
	3594	/*
	3595	* Compute the length of the bpf header. This is not necessarily
	3596	* equal to SIZEOF_BPF_HDR because we want to insert spacing such
	3597	* that the network layer header begins on a longword boundary (for
	3598	* performance reasons and to alleviate alignment restrictions).
	3599	*/
	3600	bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
	3601	bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
	3602	sizeof(struct bpf_hdr_ext)) - hdrlen;
	3603
	3604	/* Take a reference on the interface */
	3605	ifnet_reference(ifp);
	3606
	3607	lck_mtx_unlock(bpf_mlock);
	3608
	3609	#ifndef __APPLE__
	3610	if (bootverbose) {
	3611	printf("bpf: %s attached\n", if_name(ifp));
	3612	}
	3613	#endif
	3614
	3615	return 0;
	3616	}
	3617
	3618	/*
	3619	* Detach bpf from an interface. This involves detaching each descriptor
	3620	* associated with the interface, and leaving bd_bif NULL. Notify each
	3621	* descriptor as it's detached so that any sleepers wake up and get
	3622	* ENXIO.
	3623	*/
	3624	void
	3625	bpfdetach(struct ifnet *ifp)
	3626	{
	3627	struct bpf_if bp, bp_prev, *bp_next;
	3628	struct bpf_d *d;
	3629
	3630	if (bpf_debug != 0) {
	3631	printf("%s: %s\n", __func__, if_name(ifp));
	3632	}
	3633
	3634	lck_mtx_lock(bpf_mlock);
	3635
	3636	/*
	3637	* Build the list of devices attached to that interface
	3638	* that we need to free while keeping the lock to maintain
	3639	* the integrity of the interface list
	3640	*/
	3641	bp_prev = NULL;
	3642	for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
	3643	bp_next = bp->bif_next;
	3644
	3645	if (ifp != bp->bif_ifp) {
	3646	bp_prev = bp;
	3647	continue;
	3648	}
	3649	/* Unlink from the interface list */
	3650	if (bp_prev) {
	3651	bp_prev->bif_next = bp->bif_next;
	3652	} else {
	3653	bpf_iflist = bp->bif_next;
	3654	}
	3655
	3656	/* Detach the devices attached to the interface */
	3657	while ((d = bp->bif_dlist) != NULL) {
	3658	/*
	3659	* Take an extra reference to prevent the device
	3660	* from being freed when bpf_detachd() releases
	3661	* the reference for the interface list
	3662	*/
	3663	bpf_acquire_d(d);
	3664	bpf_detachd(d, 0);
	3665	bpf_wakeup(d);
	3666	bpf_release_d(d);
	3667	}
	3668	ifnet_release(ifp);
	3669	}
	3670
	3671	lck_mtx_unlock(bpf_mlock);
	3672	}
	3673
	3674	void
	3675	bpf_init(__unused void *unused)
	3676	{
	3677	#ifdef __APPLE__
	3678	int i;
	3679	int maj;
	3680
	3681	if (bpf_devsw_installed == 0) {
	3682	bpf_devsw_installed = 1;
	3683	bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
	3684	bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
	3685	bpf_mlock_attr = lck_attr_alloc_init();
	3686	lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
	3687	maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
	3688	if (maj == -1) {
	3689	if (bpf_mlock_attr) {
	3690	lck_attr_free(bpf_mlock_attr);
	3691	}
	3692	if (bpf_mlock_grp) {
	3693	lck_grp_free(bpf_mlock_grp);
	3694	}
	3695	if (bpf_mlock_grp_attr) {
	3696	lck_grp_attr_free(bpf_mlock_grp_attr);
	3697	}
	3698
	3699	bpf_mlock = NULL;
	3700	bpf_mlock_attr = NULL;
	3701	bpf_mlock_grp = NULL;
	3702	bpf_mlock_grp_attr = NULL;
	3703	bpf_devsw_installed = 0;
	3704	printf("bpf_init: failed to allocate a major number\n");
	3705	return;
	3706	}
	3707
	3708	for (i = 0; i < NBPFILTER; i++) {
	3709	bpf_make_dev_t(maj);
	3710	}
	3711	}
	3712	#else
	3713	cdevsw_add(&bpf_cdevsw);
	3714	#endif
	3715	}
	3716
	3717	#ifndef __APPLE__
	3718	SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE + CDEV_MAJOR, bpf_drvinit, NULL);
	3719	#endif
	3720
	3721	#if CONFIG_MACF_NET
	3722	struct label *
	3723	mac_bpfdesc_label_get(struct bpf_d *d)
	3724	{
	3725	return d->bd_label;
	3726	}
	3727
	3728	void
	3729	mac_bpfdesc_label_set(struct bpf_d d, struct label label)
	3730	{
	3731	d->bd_label = label;
	3732	}
	3733	#endif
	3734
	3735	static int
	3736	sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
	3737	{
	3738	#pragma unused(arg1, arg2)
	3739	int i, err;
	3740
	3741	i = bpf_maxbufsize;
	3742
	3743	err = sysctl_handle_int(oidp, &i, 0, req);
	3744	if (err != 0 \|\| req->newptr == USER_ADDR_NULL) {
	3745	return err;
	3746	}
	3747
	3748	if (i < 0 \|\| i > BPF_MAXSIZE_CAP) {
	3749	i = BPF_MAXSIZE_CAP;
	3750	}
	3751
	3752	bpf_maxbufsize = i;
	3753	return err;
	3754	}