]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/dlil.c
xnu-3247.10.11.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
CommitLineData
1c79356b 1/*
3e170ce0 2 * Copyright (c) 1999-2015 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
2d21ac55
A
28/*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
39236c6e 34#include <stddef.h>
1c79356b 35
1c79356b
A
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/socket.h>
91447636
A
42#include <sys/domain.h>
43#include <sys/user.h>
2d21ac55 44#include <sys/random.h>
316670eb 45#include <sys/socketvar.h>
1c79356b
A
46#include <net/if_dl.h>
47#include <net/if.h>
91447636 48#include <net/route.h>
1c79356b
A
49#include <net/if_var.h>
50#include <net/dlil.h>
91447636 51#include <net/if_arp.h>
316670eb 52#include <net/iptap.h>
39236c6e 53#include <net/pktap.h>
1c79356b
A
54#include <sys/kern_event.h>
55#include <sys/kdebug.h>
6d2010ae 56#include <sys/mcache.h>
39236c6e
A
57#include <sys/syslog.h>
58#include <sys/protosw.h>
59#include <sys/priv.h>
1c79356b 60
91447636 61#include <kern/assert.h>
1c79356b 62#include <kern/task.h>
9bccf70c
A
63#include <kern/thread.h>
64#include <kern/sched_prim.h>
91447636 65#include <kern/locks.h>
6d2010ae 66#include <kern/zalloc.h>
9bccf70c 67
39236c6e 68#include <net/kpi_protocol.h>
1c79356b 69#include <net/if_types.h>
6d2010ae 70#include <net/if_llreach.h>
91447636 71#include <net/kpi_interfacefilter.h>
316670eb
A
72#include <net/classq/classq.h>
73#include <net/classq/classq_sfb.h>
39236c6e
A
74#include <net/flowhash.h>
75#include <net/ntstat.h>
91447636 76
6d2010ae
A
77#if INET
78#include <netinet/in_var.h>
79#include <netinet/igmp_var.h>
316670eb
A
80#include <netinet/ip_var.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_var.h>
83#include <netinet/udp.h>
84#include <netinet/udp_var.h>
85#include <netinet/if_ether.h>
86#include <netinet/in_pcb.h>
6d2010ae
A
87#endif /* INET */
88
89#if INET6
90#include <netinet6/in6_var.h>
91#include <netinet6/nd6.h>
92#include <netinet6/mld6_var.h>
39236c6e 93#include <netinet6/scope6_var.h>
6d2010ae
A
94#endif /* INET6 */
95
91447636 96#include <libkern/OSAtomic.h>
39236c6e 97#include <libkern/tree.h>
1c79356b 98
39236c6e 99#include <dev/random/randomdev.h>
d52fe63f 100#include <machine/machine_routines.h>
1c79356b 101
2d21ac55 102#include <mach/thread_act.h>
6d2010ae 103#include <mach/sdt.h>
2d21ac55 104
39236c6e
A
105#if CONFIG_MACF
106#include <sys/kauth.h>
2d21ac55 107#include <security/mac_framework.h>
39236c6e
A
108#include <net/ethernet.h>
109#include <net/firewire.h>
110#endif
2d21ac55 111
b0d623f7
A
112#if PF
113#include <net/pfvar.h>
114#endif /* PF */
316670eb
A
115#if PF_ALTQ
116#include <net/altq/altq.h>
117#endif /* PF_ALTQ */
118#include <net/pktsched/pktsched.h>
b0d623f7 119
6d2010ae
A
120#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
121#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
1c79356b
A
122#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
123#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
124#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
125
1c79356b
A
126#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
127#define MAX_LINKADDR 4 /* LONGWORDS */
128#define M_NKE M_IFADDR
129
2d21ac55 130#if 1
91447636
A
131#define DLIL_PRINTF printf
132#else
133#define DLIL_PRINTF kprintf
134#endif
135
6d2010ae
A
136#define IF_DATA_REQUIRE_ALIGNED_64(f) \
137 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
d1ecb069 138
6d2010ae
A
139#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
140 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
141
91447636 142enum {
2d21ac55
A
143 kProtoKPI_v1 = 1,
144 kProtoKPI_v2 = 2
91447636
A
145};
146
6d2010ae
A
147/*
148 * List of if_proto structures in if_proto_hash[] is protected by
149 * the ifnet lock. The rest of the fields are initialized at protocol
150 * attach time and never change, thus no lock required as long as
151 * a reference to it is valid, via if_proto_ref().
152 */
91447636 153struct if_proto {
6d2010ae
A
154 SLIST_ENTRY(if_proto) next_hash;
155 u_int32_t refcount;
156 u_int32_t detached;
157 struct ifnet *ifp;
91447636 158 protocol_family_t protocol_family;
6d2010ae 159 int proto_kpi;
91447636 160 union {
91447636 161 struct {
6d2010ae
A
162 proto_media_input input;
163 proto_media_preout pre_output;
164 proto_media_event event;
165 proto_media_ioctl ioctl;
91447636
A
166 proto_media_detached detached;
167 proto_media_resolve_multi resolve_multi;
168 proto_media_send_arp send_arp;
169 } v1;
2d21ac55
A
170 struct {
171 proto_media_input_v2 input;
6d2010ae
A
172 proto_media_preout pre_output;
173 proto_media_event event;
174 proto_media_ioctl ioctl;
2d21ac55
A
175 proto_media_detached detached;
176 proto_media_resolve_multi resolve_multi;
177 proto_media_send_arp send_arp;
178 } v2;
91447636 179 } kpi;
1c79356b
A
180};
181
91447636
A
182SLIST_HEAD(proto_hash_entry, if_proto);
183
6d2010ae
A
184#define DLIL_SDLMAXLEN 64
185#define DLIL_SDLDATALEN \
186 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
1c79356b 187
9bccf70c 188struct dlil_ifnet {
6d2010ae
A
189 struct ifnet dl_if; /* public ifnet */
190 /*
316670eb 191 * DLIL private fields, protected by dl_if_lock
6d2010ae
A
192 */
193 decl_lck_mtx_data(, dl_if_lock);
194 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
195 u_int32_t dl_if_flags; /* flags (below) */
196 u_int32_t dl_if_refcnt; /* refcnt */
197 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
198 void *dl_if_uniqueid; /* unique interface id */
199 size_t dl_if_uniqueid_len; /* length of the unique id */
200 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
39236c6e 201 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
6d2010ae
A
202 struct {
203 struct ifaddr ifa; /* lladdr ifa */
204 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
205 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
206 } dl_if_lladdr;
316670eb
A
207 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
208 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
6d2010ae
A
209 ctrace_t dl_if_attach; /* attach PC stacktrace */
210 ctrace_t dl_if_detach; /* detach PC stacktrace */
211};
212
213/* Values for dl_if_flags (private to DLIL) */
214#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
215#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
216#define DLIF_DEBUG 0x4 /* has debugging info */
217
218#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
219
220/* For gdb */
221__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
222
223struct dlil_ifnet_dbg {
224 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
225 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
226 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
227 /*
228 * Circular lists of ifnet_{reference,release} callers.
229 */
230 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
231 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
1c79356b
A
232};
233
6d2010ae
A
234#define DLIL_TO_IFP(s) (&s->dl_if)
235#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
236
91447636
A
237struct ifnet_filter {
238 TAILQ_ENTRY(ifnet_filter) filt_next;
6d2010ae 239 u_int32_t filt_skip;
39236c6e 240 u_int32_t filt_flags;
6d2010ae
A
241 ifnet_t filt_ifp;
242 const char *filt_name;
243 void *filt_cookie;
244 protocol_family_t filt_protocol;
245 iff_input_func filt_input;
246 iff_output_func filt_output;
247 iff_event_func filt_event;
248 iff_ioctl_func filt_ioctl;
249 iff_detached_func filt_detached;
1c79356b
A
250};
251
2d21ac55 252struct proto_input_entry;
55e303ae 253
91447636 254static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
91447636 255static lck_grp_t *dlil_lock_group;
6d2010ae 256lck_grp_t *ifnet_lock_group;
91447636 257static lck_grp_t *ifnet_head_lock_group;
316670eb
A
258static lck_grp_t *ifnet_snd_lock_group;
259static lck_grp_t *ifnet_rcv_lock_group;
6d2010ae 260lck_attr_t *ifnet_lock_attr;
7ddcb079
A
261decl_lck_rw_data(static, ifnet_head_lock);
262decl_lck_mtx_data(static, dlil_ifnet_lock);
39236c6e 263u_int32_t dlil_filter_disable_tso_count = 0;
316670eb 264
6d2010ae
A
265#if DEBUG
266static unsigned int ifnet_debug = 1; /* debugging (enabled) */
267#else
268static unsigned int ifnet_debug; /* debugging (disabled) */
269#endif /* !DEBUG */
270static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
271static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
272static struct zone *dlif_zone; /* zone for dlil_ifnet */
273
274#define DLIF_ZONE_MAX 64 /* maximum elements in zone */
275#define DLIF_ZONE_NAME "ifnet" /* zone name */
276
277static unsigned int dlif_filt_size; /* size of ifnet_filter */
278static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
279
280#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
281#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
282
6d2010ae
A
283static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
284static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
285
286#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
287#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
288
289static unsigned int dlif_proto_size; /* size of if_proto */
290static struct zone *dlif_proto_zone; /* zone for if_proto */
291
292#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
293#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
294
316670eb
A
295static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
296static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
297static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
298
299#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
300#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
301
302static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
303static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
304static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
305
306#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
307#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
308
d1ecb069
A
309/*
310 * Updating this variable should be done by first acquiring the global
311 * radix node head (rnh_lock), in tandem with settting/clearing the
312 * PR_AGGDRAIN for routedomain.
313 */
314u_int32_t ifnet_aggressive_drainers;
315static u_int32_t net_rtref;
d1ecb069 316
316670eb
A
317static struct dlil_main_threading_info dlil_main_input_thread_info;
318__private_extern__ struct dlil_threading_info *dlil_main_input_thread =
319 (struct dlil_threading_info *)&dlil_main_input_thread_info;
2d21ac55 320
91447636 321static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg);
91447636 322static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
6d2010ae
A
323static void dlil_if_trace(struct dlil_ifnet *, int);
324static void if_proto_ref(struct if_proto *);
325static void if_proto_free(struct if_proto *);
326static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
327static int dlil_ifp_proto_count(struct ifnet *);
328static void if_flt_monitor_busy(struct ifnet *);
329static void if_flt_monitor_unbusy(struct ifnet *);
330static void if_flt_monitor_enter(struct ifnet *);
331static void if_flt_monitor_leave(struct ifnet *);
332static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
333 char **, protocol_family_t);
334static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
335 protocol_family_t);
336static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
337 const struct sockaddr_dl *);
338static int ifnet_lookup(struct ifnet *);
339static void if_purgeaddrs(struct ifnet *);
340
341static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
342 struct mbuf *, char *);
343static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
344 struct mbuf *);
345static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
346 mbuf_t *, const struct sockaddr *, void *, char *, char *);
347static void ifproto_media_event(struct ifnet *, protocol_family_t,
348 const struct kev_msg *);
349static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
350 unsigned long, void *);
351static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
352 struct sockaddr_dl *, size_t);
353static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
354 const struct sockaddr_dl *, const struct sockaddr *,
355 const struct sockaddr_dl *, const struct sockaddr *);
356
357static errno_t ifp_if_output(struct ifnet *, struct mbuf *);
316670eb
A
358static void ifp_if_start(struct ifnet *);
359static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
360 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
361static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
6d2010ae
A
362static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
363 protocol_family_t *);
364static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
365 const struct ifnet_demux_desc *, u_int32_t);
366static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
367static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
368static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
39236c6e
A
369 const struct sockaddr *, const char *, const char *);
370static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
371 const struct sockaddr *, const char *, const char *,
372 u_int32_t *, u_int32_t *);
6d2010ae
A
373static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
374static void ifp_if_free(struct ifnet *);
375static void ifp_if_event(struct ifnet *, const struct kev_msg *);
316670eb
A
376static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
377static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
6d2010ae 378
316670eb
A
379static void dlil_main_input_thread_func(void *, wait_result_t);
380static void dlil_input_thread_func(void *, wait_result_t);
381static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
6d2010ae 382static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
316670eb
A
383static void dlil_terminate_input_thread(struct dlil_threading_info *);
384static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
385 struct dlil_threading_info *, boolean_t);
386static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
387static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
388 u_int32_t, ifnet_model_t, boolean_t);
389static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
390 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
391
39236c6e
A
392#if DEBUG
393static void dlil_verify_sum16(void);
394#endif /* DEBUG */
395static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
396 protocol_family_t);
397static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
398 protocol_family_t);
399
316670eb
A
400static void ifnet_detacher_thread_func(void *, wait_result_t);
401static int ifnet_detacher_thread_cont(int);
6d2010ae
A
402static void ifnet_detach_final(struct ifnet *);
403static void ifnet_detaching_enqueue(struct ifnet *);
404static struct ifnet *ifnet_detaching_dequeue(void);
405
316670eb
A
406static void ifnet_start_thread_fn(void *, wait_result_t);
407static void ifnet_poll_thread_fn(void *, wait_result_t);
408static void ifnet_poll(struct ifnet *);
409
6d2010ae
A
410static void ifp_src_route_copyout(struct ifnet *, struct route *);
411static void ifp_src_route_copyin(struct ifnet *, struct route *);
412#if INET6
413static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
414static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
415#endif /* INET6 */
416
316670eb 417static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
39236c6e
A
418static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
419static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
420static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
421static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
422static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
316670eb
A
423static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
424static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
39236c6e
A
425static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
426static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
427static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
3e170ce0
A
428static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS;
429
430struct chain_len_stats tx_chain_len_stats;
431static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
316670eb 432
6d2010ae
A
433/* The following are protected by dlil_ifnet_lock */
434static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
435static u_int32_t ifnet_detaching_cnt;
436static void *ifnet_delayed_run; /* wait channel for detaching thread */
437
39236c6e
A
438decl_lck_mtx_data(static, ifnet_fc_lock);
439
440static uint32_t ifnet_flowhash_seed;
441
442struct ifnet_flowhash_key {
443 char ifk_name[IFNAMSIZ];
444 uint32_t ifk_unit;
445 uint32_t ifk_flags;
446 uint32_t ifk_eflags;
447 uint32_t ifk_capabilities;
448 uint32_t ifk_capenable;
449 uint32_t ifk_output_sched_model;
450 uint32_t ifk_rand1;
451 uint32_t ifk_rand2;
452};
453
454/* Flow control entry per interface */
455struct ifnet_fc_entry {
456 RB_ENTRY(ifnet_fc_entry) ifce_entry;
457 u_int32_t ifce_flowhash;
458 struct ifnet *ifce_ifp;
459};
460
461static uint32_t ifnet_calc_flowhash(struct ifnet *);
462static int ifce_cmp(const struct ifnet_fc_entry *,
463 const struct ifnet_fc_entry *);
464static int ifnet_fc_add(struct ifnet *);
465static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
466static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
467
468/* protected by ifnet_fc_lock */
469RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
470RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
471RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
472
473static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
474static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
475
476#define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
477#define IFNET_FC_ZONE_MAX 32
478
6d2010ae
A
479extern void bpfdetach(struct ifnet*);
480extern void proto_input_run(void);
91447636 481
316670eb
A
482extern uint32_t udp_count_opportunistic(unsigned int ifindex,
483 u_int32_t flags);
484extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
485 u_int32_t flags);
486
6d2010ae 487__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
91447636 488
39236c6e
A
489#if CONFIG_MACF
490int dlil_lladdr_ckreq = 0;
491#endif
492
b0d623f7 493#if DEBUG
39236c6e 494int dlil_verbose = 1;
b0d623f7 495#else
39236c6e 496int dlil_verbose = 0;
b0d623f7 497#endif /* DEBUG */
6d2010ae 498#if IFNET_INPUT_SANITY_CHK
6d2010ae 499/* sanity checking of input packet lists received */
316670eb
A
500static u_int32_t dlil_input_sanity_check = 0;
501#endif /* IFNET_INPUT_SANITY_CHK */
502/* rate limit debug messages */
503struct timespec dlil_dbgrate = { 1, 0 };
b0d623f7 504
6d2010ae 505SYSCTL_DECL(_net_link_generic_system);
91447636 506
39236c6e
A
507#if CONFIG_MACF
508SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_lladdr_ckreq,
509 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_lladdr_ckreq, 0,
510 "Require MACF system info check to expose link-layer address");
511#endif
512
316670eb
A
513SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
514 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
515
516#define IF_SNDQ_MINLEN 32
517u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
518SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
519 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
520 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
521
522#define IF_RCVQ_MINLEN 32
523#define IF_RCVQ_MAXLEN 256
524u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
525SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
526 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
527 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
528
39236c6e 529#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
316670eb
A
530static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
531SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
532 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
533 "ilog2 of EWMA decay rate of avg inbound packets");
534
39236c6e
A
535#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
536#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
316670eb 537static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
39236c6e
A
538SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
539 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
540 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
541 "Q", "input poll mode freeze time");
316670eb 542
39236c6e
A
543#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
544#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
316670eb 545static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
39236c6e
A
546SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
547 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
548 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
549 "Q", "input poll sampling time");
550
551#define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
552#define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
553static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
554SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
555 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
556 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
557 "Q", "input poll interval (time)");
558
559#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
316670eb
A
560static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
561SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
562 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
563 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
564
39236c6e 565#define IF_RXPOLL_WLOWAT 10
316670eb 566static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
39236c6e
A
567SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
568 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
569 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
570 "I", "input poll wakeup low watermark");
316670eb 571
39236c6e 572#define IF_RXPOLL_WHIWAT 100
316670eb 573static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
39236c6e
A
574SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
575 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
576 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
577 "I", "input poll wakeup high watermark");
316670eb
A
578
579static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
580SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
581 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
582 "max packets per poll call");
583
584static u_int32_t if_rxpoll = 1;
585SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
586 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
587 sysctl_rxpoll, "I", "enable opportunistic input polling");
588
589u_int32_t if_bw_smoothing_val = 3;
590SYSCTL_UINT(_net_link_generic_system, OID_AUTO, if_bw_smoothing_val,
591 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_smoothing_val, 0, "");
592
593u_int32_t if_bw_measure_size = 10;
594SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_bw_measure_size,
595 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_measure_size, 0, "");
596
597static u_int32_t cur_dlil_input_threads = 0;
598SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
599 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads , 0,
600 "Current number of DLIL input threads");
91447636 601
6d2010ae 602#if IFNET_INPUT_SANITY_CHK
316670eb
A
603SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
604 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check , 0,
6d2010ae 605 "Turn on sanity checking in DLIL input");
316670eb 606#endif /* IFNET_INPUT_SANITY_CHK */
1c79356b 607
316670eb
A
608static u_int32_t if_flowadv = 1;
609SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
610 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
611 "enable flow-advisory mechanism");
612
fe8ab488
A
613static u_int32_t if_delaybased_queue = 1;
614SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
615 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
616 "enable delay based dynamic queue sizing");
617
39236c6e
A
618static uint64_t hwcksum_in_invalidated = 0;
619SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
620 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
621 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
622
623uint32_t hwcksum_dbg = 0;
624SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
625 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
626 "enable hardware cksum debugging");
627
3e170ce0
A
628u_int32_t ifnet_start_delayed = 0;
629SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
630 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
631 "number of times start was delayed");
632
633u_int32_t ifnet_delay_start_disabled = 0;
634SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
635 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
636 "number of times start was delayed");
637
39236c6e
A
638#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
639#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
640#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
641#define HWCKSUM_DBG_MASK \
642 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
643 HWCKSUM_DBG_FINALIZE_FORCED)
644
645static uint32_t hwcksum_dbg_mode = 0;
646SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
647 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
648 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
649
650static uint64_t hwcksum_dbg_partial_forced = 0;
651SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
652 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
653 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
654
655static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
656SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
657 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
658 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
659
660static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
661SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
662 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
663 &hwcksum_dbg_partial_rxoff_forced, 0,
664 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
665 "forced partial cksum rx offset");
666
667static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
668SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
669 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
670 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
671 "adjusted partial cksum rx offset");
672
673static uint64_t hwcksum_dbg_verified = 0;
674SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
675 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
676 &hwcksum_dbg_verified, "packets verified for having good checksum");
677
678static uint64_t hwcksum_dbg_bad_cksum = 0;
679SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
680 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
681 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
682
683static uint64_t hwcksum_dbg_bad_rxoff = 0;
684SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
685 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
686 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
687
688static uint64_t hwcksum_dbg_adjusted = 0;
689SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
690 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
691 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
692
693static uint64_t hwcksum_dbg_finalized_hdr = 0;
694SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
695 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
696 &hwcksum_dbg_finalized_hdr, "finalized headers");
697
698static uint64_t hwcksum_dbg_finalized_data = 0;
699SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
700 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
701 &hwcksum_dbg_finalized_data, "finalized payloads");
702
703uint32_t hwcksum_tx = 1;
704SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
705 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
706 "enable transmit hardware checksum offload");
707
708uint32_t hwcksum_rx = 1;
709SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
710 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
711 "enable receive hardware checksum offload");
712
3e170ce0
A
713SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
714 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
715 sysctl_tx_chain_len_stats, "S", "");
716
717uint32_t tx_chain_len_count = 0;
718SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
719 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0,
720 "");
721
722SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_ports_used,
723 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_ports_used, "");
724
316670eb 725unsigned int net_rxpoll = 1;
6d2010ae
A
726unsigned int net_affinity = 1;
727static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
1c79356b 728
b36670ce
A
729extern u_int32_t inject_buckets;
730
2d21ac55
A
731static lck_grp_attr_t *dlil_grp_attributes = NULL;
732static lck_attr_t *dlil_lck_attributes = NULL;
91447636 733
91447636 734
316670eb
A
735#define DLIL_INPUT_CHECK(m, ifp) { \
736 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
737 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
738 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
739 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
740 /* NOTREACHED */ \
741 } \
742}
743
744#define DLIL_EWMA(old, new, decay) do { \
745 u_int32_t _avg; \
746 if ((_avg = (old)) > 0) \
747 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
748 else \
749 _avg = (new); \
750 (old) = _avg; \
751} while (0)
752
753#define MBPS (1ULL * 1000 * 1000)
754#define GBPS (MBPS * 1000)
755
756struct rxpoll_time_tbl {
757 u_int64_t speed; /* downlink speed */
758 u_int32_t plowat; /* packets low watermark */
759 u_int32_t phiwat; /* packets high watermark */
760 u_int32_t blowat; /* bytes low watermark */
761 u_int32_t bhiwat; /* bytes high watermark */
762};
763
764static struct rxpoll_time_tbl rxpoll_tbl[] = {
765 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
766 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
767 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
768 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
769 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
770 { 0, 0, 0, 0, 0 }
771};
772
39236c6e 773int
b0d623f7 774proto_hash_value(u_int32_t protocol_family)
91447636 775{
4a3eedf9
A
776 /*
777 * dlil_proto_unplumb_all() depends on the mapping between
778 * the hash bucket index and the protocol family defined
779 * here; future changes must be applied there as well.
780 */
91447636
A
781 switch(protocol_family) {
782 case PF_INET:
6d2010ae 783 return (0);
91447636 784 case PF_INET6:
6d2010ae 785 return (1);
91447636 786 case PF_VLAN:
39236c6e 787 return (2);
6d2010ae 788 case PF_UNSPEC:
91447636 789 default:
39236c6e 790 return (3);
91447636
A
791 }
792}
793
6d2010ae
A
794/*
795 * Caller must already be holding ifnet lock.
796 */
797static struct if_proto *
b0d623f7 798find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
1c79356b 799{
91447636 800 struct if_proto *proto = NULL;
b0d623f7 801 u_int32_t i = proto_hash_value(protocol_family);
6d2010ae
A
802
803 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
804
805 if (ifp->if_proto_hash != NULL)
91447636 806 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6d2010ae
A
807
808 while (proto != NULL && proto->protocol_family != protocol_family)
91447636 809 proto = SLIST_NEXT(proto, next_hash);
6d2010ae
A
810
811 if (proto != NULL)
812 if_proto_ref(proto);
813
814 return (proto);
1c79356b
A
815}
816
91447636
A
817static void
818if_proto_ref(struct if_proto *proto)
1c79356b 819{
6d2010ae 820 atomic_add_32(&proto->refcount, 1);
1c79356b
A
821}
822
6d2010ae
A
823extern void if_rtproto_del(struct ifnet *ifp, int protocol);
824
91447636
A
825static void
826if_proto_free(struct if_proto *proto)
0b4e3aa0 827{
6d2010ae
A
828 u_int32_t oldval;
829 struct ifnet *ifp = proto->ifp;
830 u_int32_t proto_family = proto->protocol_family;
831 struct kev_dl_proto_data ev_pr_data;
832
833 oldval = atomic_add_32_ov(&proto->refcount, -1);
834 if (oldval > 1)
835 return;
836
837 /* No more reference on this, protocol must have been detached */
838 VERIFY(proto->detached);
839
840 if (proto->proto_kpi == kProtoKPI_v1) {
841 if (proto->kpi.v1.detached)
842 proto->kpi.v1.detached(ifp, proto->protocol_family);
843 }
844 if (proto->proto_kpi == kProtoKPI_v2) {
845 if (proto->kpi.v2.detached)
846 proto->kpi.v2.detached(ifp, proto->protocol_family);
91447636 847 }
6d2010ae
A
848
849 /*
850 * Cleanup routes that may still be in the routing table for that
851 * interface/protocol pair.
852 */
853 if_rtproto_del(ifp, proto_family);
854
855 /*
856 * The reserved field carries the number of protocol still attached
857 * (subject to change)
858 */
859 ifnet_lock_shared(ifp);
860 ev_pr_data.proto_family = proto_family;
861 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
862 ifnet_lock_done(ifp);
863
864 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
865 (struct net_event_data *)&ev_pr_data,
866 sizeof(struct kev_dl_proto_data));
867
868 zfree(dlif_proto_zone, proto);
0b4e3aa0
A
869}
870
91447636 871__private_extern__ void
6d2010ae 872ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
1c79356b 873{
6d2010ae
A
874 unsigned int type = 0;
875 int ass = 1;
876
877 switch (what) {
878 case IFNET_LCK_ASSERT_EXCLUSIVE:
879 type = LCK_RW_ASSERT_EXCLUSIVE;
880 break;
881
882 case IFNET_LCK_ASSERT_SHARED:
883 type = LCK_RW_ASSERT_SHARED;
884 break;
885
886 case IFNET_LCK_ASSERT_OWNED:
887 type = LCK_RW_ASSERT_HELD;
888 break;
889
890 case IFNET_LCK_ASSERT_NOTOWNED:
891 /* nothing to do here for RW lock; bypass assert */
892 ass = 0;
893 break;
894
895 default:
896 panic("bad ifnet assert type: %d", what);
897 /* NOTREACHED */
898 }
899 if (ass)
900 lck_rw_assert(&ifp->if_lock, type);
1c79356b
A
901}
902
91447636 903__private_extern__ void
6d2010ae 904ifnet_lock_shared(struct ifnet *ifp)
1c79356b 905{
6d2010ae 906 lck_rw_lock_shared(&ifp->if_lock);
1c79356b
A
907}
908
91447636 909__private_extern__ void
6d2010ae 910ifnet_lock_exclusive(struct ifnet *ifp)
0b4e3aa0 911{
6d2010ae 912 lck_rw_lock_exclusive(&ifp->if_lock);
0b4e3aa0
A
913}
914
91447636 915__private_extern__ void
6d2010ae 916ifnet_lock_done(struct ifnet *ifp)
1c79356b 917{
6d2010ae 918 lck_rw_done(&ifp->if_lock);
1c79356b
A
919}
920
3e170ce0
A
921#if INET
922__private_extern__ void
923if_inetdata_lock_shared(struct ifnet *ifp)
924{
925 lck_rw_lock_shared(&ifp->if_inetdata_lock);
926}
927
928__private_extern__ void
929if_inetdata_lock_exclusive(struct ifnet *ifp)
930{
931 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
932}
933
934__private_extern__ void
935if_inetdata_lock_done(struct ifnet *ifp)
936{
937 lck_rw_done(&ifp->if_inetdata_lock);
938}
939#endif
940
39236c6e
A
941#if INET6
942__private_extern__ void
943if_inet6data_lock_shared(struct ifnet *ifp)
944{
945 lck_rw_lock_shared(&ifp->if_inet6data_lock);
946}
947
948__private_extern__ void
949if_inet6data_lock_exclusive(struct ifnet *ifp)
950{
951 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
952}
953
954__private_extern__ void
955if_inet6data_lock_done(struct ifnet *ifp)
956{
957 lck_rw_done(&ifp->if_inet6data_lock);
958}
959#endif
960
91447636 961__private_extern__ void
2d21ac55 962ifnet_head_lock_shared(void)
1c79356b 963{
6d2010ae 964 lck_rw_lock_shared(&ifnet_head_lock);
1c79356b
A
965}
966
91447636 967__private_extern__ void
2d21ac55 968ifnet_head_lock_exclusive(void)
91447636 969{
6d2010ae 970 lck_rw_lock_exclusive(&ifnet_head_lock);
91447636 971}
1c79356b 972
91447636 973__private_extern__ void
2d21ac55 974ifnet_head_done(void)
1c79356b 975{
6d2010ae 976 lck_rw_done(&ifnet_head_lock);
91447636 977}
1c79356b 978
6d2010ae
A
979/*
980 * Caller must already be holding ifnet lock.
981 */
982static int
983dlil_ifp_proto_count(struct ifnet * ifp)
91447636 984{
6d2010ae
A
985 int i, count = 0;
986
987 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
988
989 if (ifp->if_proto_hash == NULL)
990 goto done;
991
992 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
993 struct if_proto *proto;
994 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
995 count++;
91447636
A
996 }
997 }
6d2010ae
A
998done:
999 return (count);
91447636 1000}
1c79356b 1001
91447636 1002__private_extern__ void
6d2010ae
A
1003dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1004 u_int32_t event_code, struct net_event_data *event_data,
1005 u_int32_t event_data_len)
91447636 1006{
6d2010ae
A
1007 struct net_event_data ev_data;
1008 struct kev_msg ev_msg;
1009
1010 bzero(&ev_msg, sizeof (ev_msg));
1011 bzero(&ev_data, sizeof (ev_data));
1012 /*
2d21ac55 1013 * a net event always starts with a net_event_data structure
91447636
A
1014 * but the caller can generate a simple net event or
1015 * provide a longer event structure to post
1016 */
6d2010ae
A
1017 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1018 ev_msg.kev_class = KEV_NETWORK_CLASS;
1019 ev_msg.kev_subclass = event_subclass;
1020 ev_msg.event_code = event_code;
1021
1022 if (event_data == NULL) {
91447636
A
1023 event_data = &ev_data;
1024 event_data_len = sizeof(struct net_event_data);
1025 }
6d2010ae 1026
fe8ab488 1027 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
91447636 1028 event_data->if_family = ifp->if_family;
b0d623f7 1029 event_data->if_unit = (u_int32_t) ifp->if_unit;
6d2010ae 1030
91447636 1031 ev_msg.dv[0].data_length = event_data_len;
6d2010ae 1032 ev_msg.dv[0].data_ptr = event_data;
91447636 1033 ev_msg.dv[1].data_length = 0;
6d2010ae 1034
91447636 1035 dlil_event_internal(ifp, &ev_msg);
1c79356b
A
1036}
1037
316670eb
A
1038__private_extern__ int
1039dlil_alloc_local_stats(struct ifnet *ifp)
1040{
1041 int ret = EINVAL;
1042 void *buf, *base, **pbuf;
1043
1044 if (ifp == NULL)
1045 goto end;
1046
1047 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1048 /* allocate tcpstat_local structure */
1049 buf = zalloc(dlif_tcpstat_zone);
1050 if (buf == NULL) {
1051 ret = ENOMEM;
1052 goto end;
1053 }
1054 bzero(buf, dlif_tcpstat_bufsize);
1055
1056 /* Get the 64-bit aligned base address for this object */
1057 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1058 sizeof (u_int64_t));
1059 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1060 ((intptr_t)buf + dlif_tcpstat_bufsize));
1061
1062 /*
1063 * Wind back a pointer size from the aligned base and
1064 * save the original address so we can free it later.
1065 */
1066 pbuf = (void **)((intptr_t)base - sizeof (void *));
1067 *pbuf = buf;
1068 ifp->if_tcp_stat = base;
1069
1070 /* allocate udpstat_local structure */
1071 buf = zalloc(dlif_udpstat_zone);
1072 if (buf == NULL) {
1073 ret = ENOMEM;
1074 goto end;
1075 }
1076 bzero(buf, dlif_udpstat_bufsize);
1077
1078 /* Get the 64-bit aligned base address for this object */
1079 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1080 sizeof (u_int64_t));
1081 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1082 ((intptr_t)buf + dlif_udpstat_bufsize));
1083
1084 /*
1085 * Wind back a pointer size from the aligned base and
1086 * save the original address so we can free it later.
1087 */
1088 pbuf = (void **)((intptr_t)base - sizeof (void *));
1089 *pbuf = buf;
1090 ifp->if_udp_stat = base;
1091
1092 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
1093 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
1094
1095 ret = 0;
1096 }
1097
1098end:
1099 if (ret != 0) {
1100 if (ifp->if_tcp_stat != NULL) {
1101 pbuf = (void **)
1102 ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
1103 zfree(dlif_tcpstat_zone, *pbuf);
1104 ifp->if_tcp_stat = NULL;
1105 }
1106 if (ifp->if_udp_stat != NULL) {
1107 pbuf = (void **)
1108 ((intptr_t)ifp->if_udp_stat - sizeof (void *));
1109 zfree(dlif_udpstat_zone, *pbuf);
1110 ifp->if_udp_stat = NULL;
1111 }
1112 }
1113
1114 return (ret);
1115}
1116
6d2010ae 1117static int
316670eb 1118dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
2d21ac55 1119{
316670eb
A
1120 thread_continue_t func;
1121 u_int32_t limit;
2d21ac55
A
1122 int error;
1123
316670eb
A
1124 /* NULL ifp indicates the main input thread, called at dlil_init time */
1125 if (ifp == NULL) {
1126 func = dlil_main_input_thread_func;
1127 VERIFY(inp == dlil_main_input_thread);
1128 (void) strlcat(inp->input_name,
1129 "main_input", DLIL_THREADNAME_LEN);
1130 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1131 func = dlil_rxpoll_input_thread_func;
1132 VERIFY(inp != dlil_main_input_thread);
1133 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
39236c6e 1134 "%s_input_poll", if_name(ifp));
6d2010ae 1135 } else {
316670eb
A
1136 func = dlil_input_thread_func;
1137 VERIFY(inp != dlil_main_input_thread);
1138 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
39236c6e 1139 "%s_input", if_name(ifp));
6d2010ae 1140 }
316670eb 1141 VERIFY(inp->input_thr == THREAD_NULL);
2d21ac55 1142
316670eb
A
1143 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1144 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1145
1146 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1147 inp->ifp = ifp; /* NULL for main input thread */
1148
1149 net_timerclear(&inp->mode_holdtime);
1150 net_timerclear(&inp->mode_lasttime);
1151 net_timerclear(&inp->sample_holdtime);
1152 net_timerclear(&inp->sample_lasttime);
1153 net_timerclear(&inp->dbg_lasttime);
1154
1155 /*
1156 * For interfaces that support opportunistic polling, set the
1157 * low and high watermarks for outstanding inbound packets/bytes.
1158 * Also define freeze times for transitioning between modes
1159 * and updating the average.
1160 */
1161 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1162 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
39236c6e 1163 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
316670eb
A
1164 } else {
1165 limit = (u_int32_t)-1;
1166 }
1167
1168 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit);
1169 if (inp == dlil_main_input_thread) {
1170 struct dlil_main_threading_info *inpm =
1171 (struct dlil_main_threading_info *)inp;
1172 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit);
1173 }
2d21ac55 1174
316670eb
A
1175 error = kernel_thread_start(func, inp, &inp->input_thr);
1176 if (error == KERN_SUCCESS) {
1177 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
6d2010ae 1178 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
2d21ac55 1179 /*
316670eb
A
1180 * We create an affinity set so that the matching workloop
1181 * thread or the starter thread (for loopback) can be
1182 * scheduled on the same processor set as the input thread.
2d21ac55 1183 */
316670eb
A
1184 if (net_affinity) {
1185 struct thread *tp = inp->input_thr;
2d21ac55
A
1186 u_int32_t tag;
1187 /*
1188 * Randomize to reduce the probability
1189 * of affinity tag namespace collision.
1190 */
1191 read_random(&tag, sizeof (tag));
1192 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1193 thread_reference(tp);
316670eb
A
1194 inp->tag = tag;
1195 inp->net_affinity = TRUE;
2d21ac55
A
1196 }
1197 }
316670eb
A
1198 } else if (inp == dlil_main_input_thread) {
1199 panic_plain("%s: couldn't create main input thread", __func__);
1200 /* NOTREACHED */
2d21ac55 1201 } else {
39236c6e
A
1202 panic_plain("%s: couldn't create %s input thread", __func__,
1203 if_name(ifp));
6d2010ae 1204 /* NOTREACHED */
2d21ac55 1205 }
b0d623f7 1206 OSAddAtomic(1, &cur_dlil_input_threads);
316670eb 1207
6d2010ae 1208 return (error);
2d21ac55
A
1209}
1210
316670eb
A
1211static void
1212dlil_terminate_input_thread(struct dlil_threading_info *inp)
1213{
1214 struct ifnet *ifp;
1215
1216 VERIFY(current_thread() == inp->input_thr);
1217 VERIFY(inp != dlil_main_input_thread);
1218
1219 OSAddAtomic(-1, &cur_dlil_input_threads);
1220
1221 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1222 lck_grp_free(inp->lck_grp);
1223
1224 inp->input_waiting = 0;
1225 inp->wtot = 0;
1226 bzero(inp->input_name, sizeof (inp->input_name));
1227 ifp = inp->ifp;
1228 inp->ifp = NULL;
1229 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1230 qlimit(&inp->rcvq_pkts) = 0;
1231 bzero(&inp->stats, sizeof (inp->stats));
1232
1233 VERIFY(!inp->net_affinity);
1234 inp->input_thr = THREAD_NULL;
1235 VERIFY(inp->wloop_thr == THREAD_NULL);
1236 VERIFY(inp->poll_thr == THREAD_NULL);
1237 VERIFY(inp->tag == 0);
1238
1239 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1240 bzero(&inp->tstats, sizeof (inp->tstats));
1241 bzero(&inp->pstats, sizeof (inp->pstats));
1242 bzero(&inp->sstats, sizeof (inp->sstats));
1243
1244 net_timerclear(&inp->mode_holdtime);
1245 net_timerclear(&inp->mode_lasttime);
1246 net_timerclear(&inp->sample_holdtime);
1247 net_timerclear(&inp->sample_lasttime);
1248 net_timerclear(&inp->dbg_lasttime);
1249
1250#if IFNET_INPUT_SANITY_CHK
1251 inp->input_mbuf_cnt = 0;
1252#endif /* IFNET_INPUT_SANITY_CHK */
1253
1254 if (dlil_verbose) {
39236c6e
A
1255 printf("%s: input thread terminated\n",
1256 if_name(ifp));
316670eb
A
1257 }
1258
1259 /* for the extra refcnt from kernel_thread_start() */
1260 thread_deallocate(current_thread());
1261
1262 /* this is the end */
1263 thread_terminate(current_thread());
1264 /* NOTREACHED */
1265}
1266
2d21ac55
A
1267static kern_return_t
1268dlil_affinity_set(struct thread *tp, u_int32_t tag)
1269{
1270 thread_affinity_policy_data_t policy;
1271
1272 bzero(&policy, sizeof (policy));
1273 policy.affinity_tag = tag;
1274 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1275 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
1276}
1277
91447636
A
1278void
1279dlil_init(void)
1280{
6d2010ae
A
1281 thread_t thread = THREAD_NULL;
1282
1283 /*
1284 * The following fields must be 64-bit aligned for atomic operations.
1285 */
1286 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1287 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
1288 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1289 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1290 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1291 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1292 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1293 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1294 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1295 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1296 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1297 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1298 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1299 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1300 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1301
1302 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1303 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
1304 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1305 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1306 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1307 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1308 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1309 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1310 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1311 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1312 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1313 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1314 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1315 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1316 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1317
1318 /*
1319 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1320 */
1321 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1322 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1323 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1324 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1325 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
39236c6e
A
1326 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1327 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1328 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1329 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
6d2010ae
A
1330 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1331 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1332 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1333 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1334
39236c6e
A
1335 /*
1336 * ... as well as the mbuf checksum flags counterparts.
1337 */
1338 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1339 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1340 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1341 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1342 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1343 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1344 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1345 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1346 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1347 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1348
6d2010ae
A
1349 /*
1350 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1351 */
1352 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
316670eb 1353 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
6d2010ae 1354
39236c6e
A
1355 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1356 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1357 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1358 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1359
1360 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1361 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1362 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1363
1364 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1365 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1366 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1367 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1368 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1369 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1370 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1371 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1372 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1373 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1374 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1375 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1376 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1377 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1378 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1379 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1380
1381 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1382 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1383 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1384 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1385 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
fe8ab488 1386 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
39236c6e
A
1387
1388 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1389 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1390
6d2010ae
A
1391 PE_parse_boot_argn("net_affinity", &net_affinity,
1392 sizeof (net_affinity));
b0d623f7 1393
316670eb
A
1394 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
1395
d1ecb069 1396 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
6d2010ae
A
1397
1398 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
1399
1400 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
1401 sizeof (struct dlil_ifnet_dbg);
1402 /* Enforce 64-bit alignment for dlil_ifnet structure */
1403 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
1404 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
1405 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1406 0, DLIF_ZONE_NAME);
1407 if (dlif_zone == NULL) {
316670eb
A
1408 panic_plain("%s: failed allocating %s", __func__,
1409 DLIF_ZONE_NAME);
6d2010ae
A
1410 /* NOTREACHED */
1411 }
1412 zone_change(dlif_zone, Z_EXPAND, TRUE);
1413 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1414
1415 dlif_filt_size = sizeof (struct ifnet_filter);
1416 dlif_filt_zone = zinit(dlif_filt_size,
1417 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1418 if (dlif_filt_zone == NULL) {
316670eb 1419 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1420 DLIF_FILT_ZONE_NAME);
1421 /* NOTREACHED */
1422 }
1423 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1424 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1425
6d2010ae
A
1426 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
1427 dlif_phash_zone = zinit(dlif_phash_size,
1428 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1429 if (dlif_phash_zone == NULL) {
316670eb 1430 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1431 DLIF_PHASH_ZONE_NAME);
1432 /* NOTREACHED */
1433 }
1434 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1435 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1436
1437 dlif_proto_size = sizeof (struct if_proto);
1438 dlif_proto_zone = zinit(dlif_proto_size,
1439 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1440 if (dlif_proto_zone == NULL) {
316670eb 1441 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1442 DLIF_PROTO_ZONE_NAME);
1443 /* NOTREACHED */
1444 }
1445 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1446 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1447
316670eb
A
1448 dlif_tcpstat_size = sizeof (struct tcpstat_local);
1449 /* Enforce 64-bit alignment for tcpstat_local structure */
1450 dlif_tcpstat_bufsize =
1451 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
1452 dlif_tcpstat_bufsize =
1453 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
1454 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1455 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1456 DLIF_TCPSTAT_ZONE_NAME);
1457 if (dlif_tcpstat_zone == NULL) {
1458 panic_plain("%s: failed allocating %s", __func__,
1459 DLIF_TCPSTAT_ZONE_NAME);
1460 /* NOTREACHED */
1461 }
1462 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1463 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1464
1465 dlif_udpstat_size = sizeof (struct udpstat_local);
1466 /* Enforce 64-bit alignment for udpstat_local structure */
1467 dlif_udpstat_bufsize =
1468 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
1469 dlif_udpstat_bufsize =
1470 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
1471 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1472 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1473 DLIF_UDPSTAT_ZONE_NAME);
1474 if (dlif_udpstat_zone == NULL) {
1475 panic_plain("%s: failed allocating %s", __func__,
1476 DLIF_UDPSTAT_ZONE_NAME);
1477 /* NOTREACHED */
1478 }
1479 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1480 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1481
6d2010ae 1482 ifnet_llreach_init();
d1ecb069 1483
91447636 1484 TAILQ_INIT(&dlil_ifnet_head);
91447636 1485 TAILQ_INIT(&ifnet_head);
6d2010ae
A
1486 TAILQ_INIT(&ifnet_detaching_head);
1487
91447636 1488 /* Setup the lock groups we will use */
2d21ac55 1489 dlil_grp_attributes = lck_grp_attr_alloc_init();
91447636 1490
316670eb 1491 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
6d2010ae
A
1492 dlil_grp_attributes);
1493 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1494 dlil_grp_attributes);
1495 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1496 dlil_grp_attributes);
316670eb
A
1497 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1498 dlil_grp_attributes);
1499 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
6d2010ae
A
1500 dlil_grp_attributes);
1501
91447636 1502 /* Setup the lock attributes we will use */
2d21ac55 1503 dlil_lck_attributes = lck_attr_alloc_init();
6d2010ae 1504
91447636 1505 ifnet_lock_attr = lck_attr_alloc_init();
6d2010ae
A
1506
1507 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1508 dlil_lck_attributes);
1509 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1510
39236c6e
A
1511 /* Setup interface flow control related items */
1512 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
316670eb 1513
39236c6e
A
1514 ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry);
1515 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1516 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1517 if (ifnet_fc_zone == NULL) {
1518 panic_plain("%s: failed allocating %s", __func__,
1519 IFNET_FC_ZONE_NAME);
1520 /* NOTREACHED */
1521 }
1522 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1523 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
6d2010ae 1524
39236c6e 1525 /* Initialize interface address subsystem */
6d2010ae 1526 ifa_init();
39236c6e
A
1527
1528#if PF
1529 /* Initialize the packet filter */
1530 pfinit();
1531#endif /* PF */
1532
1533 /* Initialize queue algorithms */
1534 classq_init();
1535
1536 /* Initialize packet schedulers */
1537 pktsched_init();
1538
1539 /* Initialize flow advisory subsystem */
1540 flowadv_init();
1541
1542 /* Initialize the pktap virtual interface */
1543 pktap_init();
1544
1545#if DEBUG
1546 /* Run self-tests */
1547 dlil_verify_sum16();
1548#endif /* DEBUG */
1549
91447636 1550 /*
316670eb
A
1551 * Create and start up the main DLIL input thread and the interface
1552 * detacher threads once everything is initialized.
91447636 1553 */
316670eb 1554 dlil_create_input_thread(NULL, dlil_main_input_thread);
2d21ac55 1555
316670eb
A
1556 if (kernel_thread_start(ifnet_detacher_thread_func,
1557 NULL, &thread) != KERN_SUCCESS) {
1558 panic_plain("%s: couldn't create detacher thread", __func__);
6d2010ae
A
1559 /* NOTREACHED */
1560 }
b0d623f7 1561 thread_deallocate(thread);
91447636 1562}
1c79356b 1563
6d2010ae
A
1564static void
1565if_flt_monitor_busy(struct ifnet *ifp)
1566{
1567 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1568
1569 ++ifp->if_flt_busy;
1570 VERIFY(ifp->if_flt_busy != 0);
1571}
1572
1573static void
1574if_flt_monitor_unbusy(struct ifnet *ifp)
1575{
1576 if_flt_monitor_leave(ifp);
1577}
1578
1579static void
1580if_flt_monitor_enter(struct ifnet *ifp)
1581{
1582 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1583
1584 while (ifp->if_flt_busy) {
1585 ++ifp->if_flt_waiters;
1586 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1587 (PZERO - 1), "if_flt_monitor", NULL);
1588 }
1589 if_flt_monitor_busy(ifp);
1590}
1591
1592static void
1593if_flt_monitor_leave(struct ifnet *ifp)
1594{
1595 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1596
1597 VERIFY(ifp->if_flt_busy != 0);
1598 --ifp->if_flt_busy;
1599
1600 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1601 ifp->if_flt_waiters = 0;
1602 wakeup(&ifp->if_flt_head);
1603 }
1604}
1605
2d21ac55 1606__private_extern__ int
6d2010ae 1607dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
39236c6e 1608 interface_filter_t *filter_ref, u_int32_t flags)
6d2010ae
A
1609{
1610 int retval = 0;
1611 struct ifnet_filter *filter = NULL;
9bccf70c 1612
6d2010ae
A
1613 ifnet_head_lock_shared();
1614 /* Check that the interface is in the global list */
1615 if (!ifnet_lookup(ifp)) {
1616 retval = ENXIO;
1617 goto done;
1618 }
1619
1620 filter = zalloc(dlif_filt_zone);
1621 if (filter == NULL) {
1622 retval = ENOMEM;
1623 goto done;
1624 }
1625 bzero(filter, dlif_filt_size);
1626
1627 /* refcnt held above during lookup */
39236c6e 1628 filter->filt_flags = flags;
91447636
A
1629 filter->filt_ifp = ifp;
1630 filter->filt_cookie = if_filter->iff_cookie;
1631 filter->filt_name = if_filter->iff_name;
1632 filter->filt_protocol = if_filter->iff_protocol;
1633 filter->filt_input = if_filter->iff_input;
1634 filter->filt_output = if_filter->iff_output;
1635 filter->filt_event = if_filter->iff_event;
1636 filter->filt_ioctl = if_filter->iff_ioctl;
1637 filter->filt_detached = if_filter->iff_detached;
6d2010ae
A
1638
1639 lck_mtx_lock(&ifp->if_flt_lock);
1640 if_flt_monitor_enter(ifp);
1641
1642 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636 1643 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
6d2010ae
A
1644
1645 if_flt_monitor_leave(ifp);
1646 lck_mtx_unlock(&ifp->if_flt_lock);
1647
91447636 1648 *filter_ref = filter;
b0d623f7
A
1649
1650 /*
1651 * Bump filter count and route_generation ID to let TCP
1652 * know it shouldn't do TSO on this connection
1653 */
39236c6e
A
1654 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1655 OSAddAtomic(1, &dlil_filter_disable_tso_count);
b0d623f7 1656 routegenid_update();
39236c6e 1657 }
6d2010ae 1658 if (dlil_verbose) {
39236c6e
A
1659 printf("%s: %s filter attached\n", if_name(ifp),
1660 if_filter->iff_name);
6d2010ae
A
1661 }
1662done:
1663 ifnet_head_done();
1664 if (retval != 0 && ifp != NULL) {
39236c6e
A
1665 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1666 if_name(ifp), if_filter->iff_name, retval);
6d2010ae
A
1667 }
1668 if (retval != 0 && filter != NULL)
1669 zfree(dlif_filt_zone, filter);
1670
1671 return (retval);
1c79356b
A
1672}
1673
91447636 1674static int
6d2010ae 1675dlil_detach_filter_internal(interface_filter_t filter, int detached)
1c79356b 1676{
91447636 1677 int retval = 0;
6d2010ae 1678
3a60a9f5 1679 if (detached == 0) {
6d2010ae
A
1680 ifnet_t ifp = NULL;
1681
3a60a9f5
A
1682 ifnet_head_lock_shared();
1683 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6d2010ae
A
1684 interface_filter_t entry = NULL;
1685
1686 lck_mtx_lock(&ifp->if_flt_lock);
3a60a9f5 1687 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
6d2010ae
A
1688 if (entry != filter || entry->filt_skip)
1689 continue;
1690 /*
1691 * We've found a match; since it's possible
1692 * that the thread gets blocked in the monitor,
1693 * we do the lock dance. Interface should
1694 * not be detached since we still have a use
1695 * count held during filter attach.
1696 */
1697 entry->filt_skip = 1; /* skip input/output */
1698 lck_mtx_unlock(&ifp->if_flt_lock);
1699 ifnet_head_done();
1700
1701 lck_mtx_lock(&ifp->if_flt_lock);
1702 if_flt_monitor_enter(ifp);
1703 lck_mtx_assert(&ifp->if_flt_lock,
1704 LCK_MTX_ASSERT_OWNED);
1705
1706 /* Remove the filter from the list */
1707 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1708 filt_next);
1709
1710 if_flt_monitor_leave(ifp);
1711 lck_mtx_unlock(&ifp->if_flt_lock);
1712 if (dlil_verbose) {
39236c6e
A
1713 printf("%s: %s filter detached\n",
1714 if_name(ifp), filter->filt_name);
6d2010ae
A
1715 }
1716 goto destroy;
3a60a9f5 1717 }
6d2010ae 1718 lck_mtx_unlock(&ifp->if_flt_lock);
3a60a9f5
A
1719 }
1720 ifnet_head_done();
6d2010ae
A
1721
1722 /* filter parameter is not a valid filter ref */
1723 retval = EINVAL;
1724 goto done;
3a60a9f5 1725 }
6d2010ae
A
1726
1727 if (dlil_verbose)
1728 printf("%s filter detached\n", filter->filt_name);
1729
1730destroy:
1731
1732 /* Call the detached function if there is one */
91447636
A
1733 if (filter->filt_detached)
1734 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
9bccf70c 1735
3a60a9f5 1736 /* Free the filter */
6d2010ae
A
1737 zfree(dlif_filt_zone, filter);
1738
b0d623f7
A
1739 /*
1740 * Decrease filter count and route_generation ID to let TCP
1741 * know it should reevalute doing TSO or not
1742 */
39236c6e
A
1743 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1744 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
b0d623f7 1745 routegenid_update();
39236c6e 1746 }
6d2010ae
A
1747done:
1748 if (retval != 0) {
1749 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1750 filter->filt_name, retval);
1751 }
1752 return (retval);
1c79356b
A
1753}
1754
2d21ac55 1755__private_extern__ void
91447636
A
1756dlil_detach_filter(interface_filter_t filter)
1757{
3a60a9f5
A
1758 if (filter == NULL)
1759 return;
91447636
A
1760 dlil_detach_filter_internal(filter, 0);
1761}
1c79356b 1762
316670eb
A
1763/*
1764 * Main input thread:
1765 *
1766 * a) handles all inbound packets for lo0
1767 * b) handles all inbound packets for interfaces with no dedicated
1768 * input thread (e.g. anything but Ethernet/PDP or those that support
1769 * opportunistic polling.)
1770 * c) protocol registrations
1771 * d) packet injections
1772 */
91447636 1773static void
316670eb 1774dlil_main_input_thread_func(void *v, wait_result_t w)
91447636 1775{
316670eb
A
1776#pragma unused(w)
1777 struct dlil_main_threading_info *inpm = v;
1778 struct dlil_threading_info *inp = v;
1779
1780 VERIFY(inp == dlil_main_input_thread);
1781 VERIFY(inp->ifp == NULL);
1782 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1783
91447636 1784 while (1) {
2d21ac55 1785 struct mbuf *m = NULL, *m_loop = NULL;
316670eb
A
1786 u_int32_t m_cnt, m_cnt_loop;
1787 boolean_t proto_req;
6d2010ae 1788
316670eb 1789 lck_mtx_lock_spin(&inp->input_lck);
6d2010ae 1790
2d21ac55 1791 /* Wait until there is work to be done */
316670eb
A
1792 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1793 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1794 (void) msleep(&inp->input_waiting, &inp->input_lck,
1795 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2d21ac55
A
1796 }
1797
316670eb
A
1798 inp->input_waiting |= DLIL_INPUT_RUNNING;
1799 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2d21ac55 1800
316670eb
A
1801 /* Main input thread cannot be terminated */
1802 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
2d21ac55 1803
316670eb
A
1804 proto_req = (inp->input_waiting &
1805 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
6d2010ae 1806
316670eb
A
1807 /* Packets for non-dedicated interfaces other than lo0 */
1808 m_cnt = qlen(&inp->rcvq_pkts);
1809 m = _getq_all(&inp->rcvq_pkts);
6d2010ae 1810
39236c6e 1811 /* Packets exclusive to lo0 */
316670eb
A
1812 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
1813 m_loop = _getq_all(&inpm->lo_rcvq_pkts);
6d2010ae 1814
316670eb 1815 inp->wtot = 0;
6d2010ae 1816
316670eb 1817 lck_mtx_unlock(&inp->input_lck);
6d2010ae 1818
316670eb
A
1819 /*
1820 * NOTE warning %%% attention !!!!
1821 * We should think about putting some thread starvation
1822 * safeguards if we deal with long chains of packets.
1823 */
1824 if (m_loop != NULL)
1825 dlil_input_packet_list_extended(lo_ifp, m_loop,
1826 m_cnt_loop, inp->mode);
6d2010ae 1827
316670eb
A
1828 if (m != NULL)
1829 dlil_input_packet_list_extended(NULL, m,
1830 m_cnt, inp->mode);
1831
1832 if (proto_req)
1833 proto_input_run();
1834 }
1835
1836 /* NOTREACHED */
1837 VERIFY(0); /* we should never get here */
1838}
1839
1840/*
1841 * Input thread for interfaces with legacy input model.
1842 */
1843static void
1844dlil_input_thread_func(void *v, wait_result_t w)
1845{
1846#pragma unused(w)
1847 struct dlil_threading_info *inp = v;
1848 struct ifnet *ifp = inp->ifp;
1849
1850 VERIFY(inp != dlil_main_input_thread);
1851 VERIFY(ifp != NULL);
1852 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
1853 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2d21ac55 1854
316670eb
A
1855 while (1) {
1856 struct mbuf *m = NULL;
1857 u_int32_t m_cnt;
1858
1859 lck_mtx_lock_spin(&inp->input_lck);
2d21ac55 1860
316670eb
A
1861 /* Wait until there is work to be done */
1862 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1863 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1864 (void) msleep(&inp->input_waiting, &inp->input_lck,
1865 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2d21ac55
A
1866 }
1867
316670eb
A
1868 inp->input_waiting |= DLIL_INPUT_RUNNING;
1869 inp->input_waiting &= ~DLIL_INPUT_WAITING;
6d2010ae 1870
316670eb
A
1871 /*
1872 * Protocol registration and injection must always use
1873 * the main input thread; in theory the latter can utilize
1874 * the corresponding input thread where the packet arrived
1875 * on, but that requires our knowing the interface in advance
1876 * (and the benefits might not worth the trouble.)
1877 */
1878 VERIFY(!(inp->input_waiting &
1879 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
6d2010ae 1880
316670eb
A
1881 /* Packets for this interface */
1882 m_cnt = qlen(&inp->rcvq_pkts);
1883 m = _getq_all(&inp->rcvq_pkts);
6d2010ae 1884
316670eb
A
1885 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
1886 lck_mtx_unlock(&inp->input_lck);
1887
1888 /* Free up pending packets */
1889 if (m != NULL)
1890 mbuf_freem_list(m);
1891
1892 dlil_terminate_input_thread(inp);
1893 /* NOTREACHED */
1894 return;
2d21ac55
A
1895 }
1896
316670eb
A
1897 inp->wtot = 0;
1898
1899 dlil_input_stats_sync(ifp, inp);
1900
1901 lck_mtx_unlock(&inp->input_lck);
2d21ac55 1902
91447636
A
1903 /*
1904 * NOTE warning %%% attention !!!!
6d2010ae
A
1905 * We should think about putting some thread starvation
1906 * safeguards if we deal with long chains of packets.
91447636 1907 */
6d2010ae 1908 if (m != NULL)
316670eb
A
1909 dlil_input_packet_list_extended(NULL, m,
1910 m_cnt, inp->mode);
2d21ac55 1911 }
316670eb
A
1912
1913 /* NOTREACHED */
1914 VERIFY(0); /* we should never get here */
2d21ac55
A
1915}
1916
316670eb
A
1917/*
1918 * Input thread for interfaces with opportunistic polling input model.
1919 */
1920static void
1921dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2d21ac55 1922{
316670eb
A
1923#pragma unused(w)
1924 struct dlil_threading_info *inp = v;
1925 struct ifnet *ifp = inp->ifp;
1926 struct timespec ts;
2d21ac55 1927
316670eb
A
1928 VERIFY(inp != dlil_main_input_thread);
1929 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
2d21ac55 1930
2d21ac55 1931 while (1) {
316670eb
A
1932 struct mbuf *m = NULL;
1933 u_int32_t m_cnt, m_size, poll_req = 0;
1934 ifnet_model_t mode;
1935 struct timespec now, delta;
39236c6e 1936 u_int64_t ival;
6d2010ae 1937
316670eb 1938 lck_mtx_lock_spin(&inp->input_lck);
6d2010ae 1939
39236c6e
A
1940 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
1941 ival = IF_RXPOLL_INTERVALTIME_MIN;
1942
316670eb
A
1943 /* Link parameters changed? */
1944 if (ifp->if_poll_update != 0) {
1945 ifp->if_poll_update = 0;
39236c6e 1946 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
91447636 1947 }
1c79356b 1948
316670eb
A
1949 /* Current operating mode */
1950 mode = inp->mode;
1c79356b 1951
316670eb 1952 /* Wait until there is work to be done */
39236c6e 1953 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
316670eb
A
1954 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1955 (void) msleep(&inp->input_waiting, &inp->input_lck,
1956 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1957 }
2d21ac55 1958
316670eb
A
1959 inp->input_waiting |= DLIL_INPUT_RUNNING;
1960 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2d21ac55
A
1961
1962 /*
316670eb
A
1963 * Protocol registration and injection must always use
1964 * the main input thread; in theory the latter can utilize
1965 * the corresponding input thread where the packet arrived
1966 * on, but that requires our knowing the interface in advance
1967 * (and the benefits might not worth the trouble.)
2d21ac55 1968 */
316670eb
A
1969 VERIFY(!(inp->input_waiting &
1970 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2d21ac55 1971
316670eb
A
1972 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
1973 /* Free up pending packets */
1974 _flushq(&inp->rcvq_pkts);
1975 lck_mtx_unlock(&inp->input_lck);
2d21ac55 1976
316670eb
A
1977 dlil_terminate_input_thread(inp);
1978 /* NOTREACHED */
1979 return;
2d21ac55 1980 }
2d21ac55 1981
316670eb
A
1982 /* Total count of all packets */
1983 m_cnt = qlen(&inp->rcvq_pkts);
1984
1985 /* Total bytes of all packets */
1986 m_size = qsize(&inp->rcvq_pkts);
1987
1988 /* Packets for this interface */
1989 m = _getq_all(&inp->rcvq_pkts);
1990 VERIFY(m != NULL || m_cnt == 0);
1991
1992 nanouptime(&now);
1993 if (!net_timerisset(&inp->sample_lasttime))
1994 *(&inp->sample_lasttime) = *(&now);
1995
1996 net_timersub(&now, &inp->sample_lasttime, &delta);
1997 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
1998 u_int32_t ptot, btot;
1999
2000 /* Accumulate statistics for current sampling */
2001 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
2002
2003 if (net_timercmp(&delta, &inp->sample_holdtime, <))
2004 goto skip;
2005
2006 *(&inp->sample_lasttime) = *(&now);
2007
2008 /* Calculate min/max of inbound bytes */
2009 btot = (u_int32_t)inp->sstats.bytes;
2010 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
2011 inp->rxpoll_bmin = btot;
2012 if (btot > inp->rxpoll_bmax)
2013 inp->rxpoll_bmax = btot;
2014
2015 /* Calculate EWMA of inbound bytes */
2016 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
2017
2018 /* Calculate min/max of inbound packets */
2019 ptot = (u_int32_t)inp->sstats.packets;
2020 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
2021 inp->rxpoll_pmin = ptot;
2022 if (ptot > inp->rxpoll_pmax)
2023 inp->rxpoll_pmax = ptot;
2024
2025 /* Calculate EWMA of inbound packets */
2026 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
2027
2028 /* Reset sampling statistics */
2029 PKTCNTR_CLEAR(&inp->sstats);
2030
2031 /* Calculate EWMA of wakeup requests */
2032 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2033 inp->wtot = 0;
2034
2035 if (dlil_verbose) {
2036 if (!net_timerisset(&inp->dbg_lasttime))
2037 *(&inp->dbg_lasttime) = *(&now);
2038 net_timersub(&now, &inp->dbg_lasttime, &delta);
2039 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2040 *(&inp->dbg_lasttime) = *(&now);
39236c6e 2041 printf("%s: [%s] pkts avg %d max %d "
316670eb
A
2042 "limits [%d/%d], wreq avg %d "
2043 "limits [%d/%d], bytes avg %d "
39236c6e
A
2044 "limits [%d/%d]\n", if_name(ifp),
2045 (inp->mode ==
316670eb
A
2046 IFNET_MODEL_INPUT_POLL_ON) ?
2047 "ON" : "OFF", inp->rxpoll_pavg,
2048 inp->rxpoll_pmax,
2049 inp->rxpoll_plowat,
2050 inp->rxpoll_phiwat,
2051 inp->rxpoll_wavg,
2052 inp->rxpoll_wlowat,
2053 inp->rxpoll_whiwat,
2054 inp->rxpoll_bavg,
2055 inp->rxpoll_blowat,
2056 inp->rxpoll_bhiwat);
2057 }
2058 }
2d21ac55 2059
316670eb
A
2060 /* Perform mode transition, if necessary */
2061 if (!net_timerisset(&inp->mode_lasttime))
2062 *(&inp->mode_lasttime) = *(&now);
2063
2064 net_timersub(&now, &inp->mode_lasttime, &delta);
2065 if (net_timercmp(&delta, &inp->mode_holdtime, <))
2066 goto skip;
2067
2068 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2069 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
316670eb
A
2070 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2071 mode = IFNET_MODEL_INPUT_POLL_OFF;
2072 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2073 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2074 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2075 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2076 mode = IFNET_MODEL_INPUT_POLL_ON;
2077 }
6d2010ae 2078
316670eb
A
2079 if (mode != inp->mode) {
2080 inp->mode = mode;
2081 *(&inp->mode_lasttime) = *(&now);
2082 poll_req++;
2083 }
2084 }
2085skip:
2086 dlil_input_stats_sync(ifp, inp);
6d2010ae 2087
316670eb 2088 lck_mtx_unlock(&inp->input_lck);
6d2010ae 2089
316670eb
A
2090 /*
2091 * If there's a mode change and interface is still attached,
2092 * perform a downcall to the driver for the new mode. Also
2093 * hold an IO refcnt on the interface to prevent it from
2094 * being detached (will be release below.)
2095 */
2096 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2097 struct ifnet_model_params p = { mode, { 0 } };
2098 errno_t err;
2099
2100 if (dlil_verbose) {
39236c6e 2101 printf("%s: polling is now %s, "
316670eb
A
2102 "pkts avg %d max %d limits [%d/%d], "
2103 "wreq avg %d limits [%d/%d], "
2104 "bytes avg %d limits [%d/%d]\n",
39236c6e 2105 if_name(ifp),
316670eb
A
2106 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2107 "ON" : "OFF", inp->rxpoll_pavg,
2108 inp->rxpoll_pmax, inp->rxpoll_plowat,
2109 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2110 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2111 inp->rxpoll_bavg, inp->rxpoll_blowat,
2112 inp->rxpoll_bhiwat);
2113 }
2d21ac55 2114
316670eb
A
2115 if ((err = ((*ifp->if_input_ctl)(ifp,
2116 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
39236c6e
A
2117 printf("%s: error setting polling mode "
2118 "to %s (%d)\n", if_name(ifp),
316670eb
A
2119 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2120 "ON" : "OFF", err);
2121 }
1c79356b 2122
316670eb
A
2123 switch (mode) {
2124 case IFNET_MODEL_INPUT_POLL_OFF:
2125 ifnet_set_poll_cycle(ifp, NULL);
2126 inp->rxpoll_offreq++;
2127 if (err != 0)
2128 inp->rxpoll_offerr++;
2129 break;
2d21ac55 2130
316670eb 2131 case IFNET_MODEL_INPUT_POLL_ON:
39236c6e 2132 net_nsectimer(&ival, &ts);
316670eb
A
2133 ifnet_set_poll_cycle(ifp, &ts);
2134 ifnet_poll(ifp);
2135 inp->rxpoll_onreq++;
2136 if (err != 0)
2137 inp->rxpoll_onerr++;
2138 break;
2139
2140 default:
2141 VERIFY(0);
2142 /* NOTREACHED */
2143 }
2144
2145 /* Release the IO refcnt */
2146 ifnet_decr_iorefcnt(ifp);
2147 }
2148
2149 /*
2150 * NOTE warning %%% attention !!!!
2151 * We should think about putting some thread starvation
2152 * safeguards if we deal with long chains of packets.
2153 */
2154 if (m != NULL)
2155 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2156 }
2157
2158 /* NOTREACHED */
2159 VERIFY(0); /* we should never get here */
2160}
2161
39236c6e
A
2162/*
2163 * Must be called on an attached ifnet (caller is expected to check.)
2164 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2165 */
2166errno_t
2167dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2168 boolean_t locked)
316670eb 2169{
39236c6e 2170 struct dlil_threading_info *inp;
316670eb
A
2171 u_int64_t sample_holdtime, inbw;
2172
39236c6e
A
2173 VERIFY(ifp != NULL);
2174 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2175 return (ENXIO);
2176
2177 if (p != NULL) {
2178 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2179 (p->packets_lowat != 0 && p->packets_hiwat == 0))
2180 return (EINVAL);
2181 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2182 p->packets_lowat >= p->packets_hiwat)
2183 return (EINVAL);
2184 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2185 (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
2186 return (EINVAL);
2187 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2188 p->bytes_lowat >= p->bytes_hiwat)
2189 return (EINVAL);
2190 if (p->interval_time != 0 &&
2191 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
2192 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2193 }
2194
2195 if (!locked)
2196 lck_mtx_lock(&inp->input_lck);
2197
2198 lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2199
2200 /*
2201 * Normally, we'd reset the parameters to the auto-tuned values
2202 * if the the input thread detects a change in link rate. If the
2203 * driver provides its own parameters right after a link rate
2204 * changes, but before the input thread gets to run, we want to
2205 * make sure to keep the driver's values. Clearing if_poll_update
2206 * will achieve that.
2207 */
2208 if (p != NULL && !locked && ifp->if_poll_update != 0)
2209 ifp->if_poll_update = 0;
316670eb 2210
39236c6e 2211 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
316670eb
A
2212 sample_holdtime = 0; /* polling is disabled */
2213 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2214 inp->rxpoll_blowat = 0;
2215 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2216 inp->rxpoll_bhiwat = (u_int32_t)-1;
39236c6e
A
2217 inp->rxpoll_plim = 0;
2218 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
316670eb 2219 } else {
39236c6e
A
2220 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2221 u_int64_t ival;
316670eb
A
2222 unsigned int n, i;
2223
39236c6e 2224 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
316670eb
A
2225 if (inbw < rxpoll_tbl[i].speed)
2226 break;
2227 n = i;
2228 }
39236c6e
A
2229 /* auto-tune if caller didn't specify a value */
2230 plowat = ((p == NULL || p->packets_lowat == 0) ?
2231 rxpoll_tbl[n].plowat : p->packets_lowat);
2232 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2233 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2234 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2235 rxpoll_tbl[n].blowat : p->bytes_lowat);
2236 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2237 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2238 plim = ((p == NULL || p->packets_limit == 0) ?
2239 if_rxpoll_max : p->packets_limit);
2240 ival = ((p == NULL || p->interval_time == 0) ?
2241 if_rxpoll_interval_time : p->interval_time);
2242
2243 VERIFY(plowat != 0 && phiwat != 0);
2244 VERIFY(blowat != 0 && bhiwat != 0);
2245 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2246
316670eb
A
2247 sample_holdtime = if_rxpoll_sample_holdtime;
2248 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2249 inp->rxpoll_whiwat = if_rxpoll_whiwat;
39236c6e
A
2250 inp->rxpoll_plowat = plowat;
2251 inp->rxpoll_phiwat = phiwat;
2252 inp->rxpoll_blowat = blowat;
2253 inp->rxpoll_bhiwat = bhiwat;
2254 inp->rxpoll_plim = plim;
2255 inp->rxpoll_ival = ival;
316670eb
A
2256 }
2257
2258 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2259 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2260
2261 if (dlil_verbose) {
39236c6e
A
2262 printf("%s: speed %llu bps, sample per %llu nsec, "
2263 "poll interval %llu nsec, pkts per poll %u, "
2264 "pkt limits [%u/%u], wreq limits [%u/%u], "
2265 "bytes limits [%u/%u]\n", if_name(ifp),
2266 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2267 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2268 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
316670eb 2269 }
39236c6e
A
2270
2271 if (!locked)
2272 lck_mtx_unlock(&inp->input_lck);
2273
2274 return (0);
2275}
2276
2277/*
2278 * Must be called on an attached ifnet (caller is expected to check.)
2279 */
2280errno_t
2281dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2282{
2283 struct dlil_threading_info *inp;
2284
2285 VERIFY(ifp != NULL && p != NULL);
2286 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2287 return (ENXIO);
2288
2289 bzero(p, sizeof (*p));
2290
2291 lck_mtx_lock(&inp->input_lck);
2292 p->packets_limit = inp->rxpoll_plim;
2293 p->packets_lowat = inp->rxpoll_plowat;
2294 p->packets_hiwat = inp->rxpoll_phiwat;
2295 p->bytes_lowat = inp->rxpoll_blowat;
2296 p->bytes_hiwat = inp->rxpoll_bhiwat;
2297 p->interval_time = inp->rxpoll_ival;
2298 lck_mtx_unlock(&inp->input_lck);
2299
2300 return (0);
316670eb
A
2301}
2302
2303errno_t
2304ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2305 const struct ifnet_stat_increment_param *s)
2306{
2307 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
2308}
2309
2310errno_t
2311ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2312 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2313{
2314 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
2315}
2316
2317static errno_t
2318ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2319 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2320{
2321 struct thread *tp = current_thread();
2322 struct mbuf *last;
2323 struct dlil_threading_info *inp;
2324 u_int32_t m_cnt = 0, m_size = 0;
2325
39236c6e
A
2326 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2327 if (m_head != NULL)
2328 mbuf_freem_list(m_head);
2329 return (EINVAL);
2330 }
2331
2332 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2333 VERIFY(m_tail == NULL || ext);
2334 VERIFY(s != NULL || !ext);
2335
316670eb
A
2336 /*
2337 * Drop the packet(s) if the parameters are invalid, or if the
2338 * interface is no longer attached; else hold an IO refcnt to
2339 * prevent it from being detached (will be released below.)
2340 */
39236c6e 2341 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
316670eb
A
2342 if (m_head != NULL)
2343 mbuf_freem_list(m_head);
2344 return (EINVAL);
2345 }
2346
316670eb
A
2347 if (m_tail == NULL) {
2348 last = m_head;
39236c6e 2349 while (m_head != NULL) {
316670eb
A
2350#if IFNET_INPUT_SANITY_CHK
2351 if (dlil_input_sanity_check != 0)
2352 DLIL_INPUT_CHECK(last, ifp);
2353#endif /* IFNET_INPUT_SANITY_CHK */
2354 m_cnt++;
2355 m_size += m_length(last);
2356 if (mbuf_nextpkt(last) == NULL)
2357 break;
2358 last = mbuf_nextpkt(last);
2359 }
2360 m_tail = last;
2361 } else {
2362#if IFNET_INPUT_SANITY_CHK
2363 if (dlil_input_sanity_check != 0) {
2364 last = m_head;
2365 while (1) {
2366 DLIL_INPUT_CHECK(last, ifp);
2367 m_cnt++;
2368 m_size += m_length(last);
2369 if (mbuf_nextpkt(last) == NULL)
2370 break;
2371 last = mbuf_nextpkt(last);
2372 }
2373 } else {
2374 m_cnt = s->packets_in;
2375 m_size = s->bytes_in;
2376 last = m_tail;
2377 }
2378#else
2379 m_cnt = s->packets_in;
2380 m_size = s->bytes_in;
2381 last = m_tail;
2382#endif /* IFNET_INPUT_SANITY_CHK */
2383 }
2384
2385 if (last != m_tail) {
39236c6e
A
2386 panic_plain("%s: invalid input packet chain for %s, "
2387 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2388 m_tail, last);
316670eb
A
2389 }
2390
2391 /*
2392 * Assert packet count only for the extended variant, for backwards
2393 * compatibility, since this came directly from the device driver.
2394 * Relax this assertion for input bytes, as the driver may have
2395 * included the link-layer headers in the computation; hence
2396 * m_size is just an approximation.
2397 */
2398 if (ext && s->packets_in != m_cnt) {
39236c6e
A
2399 panic_plain("%s: input packet count mismatch for %s, "
2400 "%d instead of %d\n", __func__, if_name(ifp),
2401 s->packets_in, m_cnt);
316670eb
A
2402 }
2403
2404 if ((inp = ifp->if_inp) == NULL)
2405 inp = dlil_main_input_thread;
2406
2407 /*
2408 * If there is a matching DLIL input thread associated with an
2409 * affinity set, associate this thread with the same set. We
2410 * will only do this once.
2411 */
2412 lck_mtx_lock_spin(&inp->input_lck);
2413 if (inp != dlil_main_input_thread && inp->net_affinity &&
2414 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2415 (poll && inp->poll_thr == THREAD_NULL))) {
2416 u_int32_t tag = inp->tag;
2417
2418 if (poll) {
2419 VERIFY(inp->poll_thr == THREAD_NULL);
2420 inp->poll_thr = tp;
2421 } else {
2422 VERIFY(inp->wloop_thr == THREAD_NULL);
2423 inp->wloop_thr = tp;
2424 }
2425 lck_mtx_unlock(&inp->input_lck);
2426
2427 /* Associate the current thread with the new affinity tag */
2428 (void) dlil_affinity_set(tp, tag);
2429
2430 /*
2431 * Take a reference on the current thread; during detach,
2432 * we will need to refer to it in order ot tear down its
2433 * affinity.
2434 */
2435 thread_reference(tp);
2436 lck_mtx_lock_spin(&inp->input_lck);
2437 }
2438
39236c6e
A
2439 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2440
316670eb
A
2441 /*
2442 * Because of loopbacked multicast we cannot stuff the ifp in
2443 * the rcvif of the packet header: loopback (lo0) packets use a
2444 * dedicated list so that we can later associate them with lo_ifp
2445 * on their way up the stack. Packets for other interfaces without
2446 * dedicated input threads go to the regular list.
2447 */
39236c6e
A
2448 if (m_head != NULL) {
2449 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2450 struct dlil_main_threading_info *inpm =
2451 (struct dlil_main_threading_info *)inp;
2452 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2453 m_cnt, m_size);
2454 } else {
2455 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2456 m_cnt, m_size);
2457 }
316670eb
A
2458 }
2459
2460#if IFNET_INPUT_SANITY_CHK
2461 if (dlil_input_sanity_check != 0) {
2462 u_int32_t count;
2463 struct mbuf *m0;
2464
2465 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
2466 count++;
2467
2468 if (count != m_cnt) {
39236c6e
A
2469 panic_plain("%s: invalid packet count %d "
2470 "(expected %d)\n", if_name(ifp),
316670eb
A
2471 count, m_cnt);
2472 /* NOTREACHED */
2473 }
2474
2475 inp->input_mbuf_cnt += m_cnt;
2476 }
2477#endif /* IFNET_INPUT_SANITY_CHK */
2478
2479 if (s != NULL) {
2480 dlil_input_stats_add(s, inp, poll);
2481 /*
2482 * If we're using the main input thread, synchronize the
2483 * stats now since we have the interface context. All
2484 * other cases involving dedicated input threads will
2485 * have their stats synchronized there.
2486 */
2487 if (inp == dlil_main_input_thread)
2488 dlil_input_stats_sync(ifp, inp);
2489 }
2490
2491 inp->input_waiting |= DLIL_INPUT_WAITING;
2492 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2493 inp->wtot++;
2494 wakeup_one((caddr_t)&inp->input_waiting);
2495 }
2496 lck_mtx_unlock(&inp->input_lck);
2497
2498 if (ifp != lo_ifp) {
2499 /* Release the IO refcnt */
2500 ifnet_decr_iorefcnt(ifp);
2501 }
2502
2503 return (0);
2504}
2505
39236c6e
A
2506static void
2507ifnet_start_common(struct ifnet *ifp, int resetfc)
316670eb 2508{
39236c6e
A
2509 if (!(ifp->if_eflags & IFEF_TXSTART))
2510 return;
316670eb 2511 /*
39236c6e
A
2512 * If the starter thread is inactive, signal it to do work,
2513 * unless the interface is being flow controlled from below,
2514 * e.g. a virtual interface being flow controlled by a real
2515 * network interface beneath it.
316670eb
A
2516 */
2517 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e
A
2518 if (resetfc) {
2519 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2520 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2521 lck_mtx_unlock(&ifp->if_start_lock);
2522 return;
2523 }
316670eb 2524 ifp->if_start_req++;
3e170ce0
A
2525 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2526 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2527 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen
2528 || ifp->if_start_delayed == 0)) {
316670eb
A
2529 wakeup_one((caddr_t)&ifp->if_start_thread);
2530 }
2531 lck_mtx_unlock(&ifp->if_start_lock);
2532}
2533
39236c6e
A
2534void
2535ifnet_start(struct ifnet *ifp)
2536{
2537 ifnet_start_common(ifp, 0);
2538}
2539
316670eb
A
2540static void
2541ifnet_start_thread_fn(void *v, wait_result_t w)
2542{
2543#pragma unused(w)
2544 struct ifnet *ifp = v;
2545 char ifname[IFNAMSIZ + 1];
2546 struct timespec *ts = NULL;
2547 struct ifclassq *ifq = &ifp->if_snd;
3e170ce0 2548 struct timespec delay_start_ts;
316670eb
A
2549
2550 /*
2551 * Treat the dedicated starter thread for lo0 as equivalent to
2552 * the driver workloop thread; if net_affinity is enabled for
2553 * the main input thread, associate this starter thread to it
2554 * by binding them with the same affinity tag. This is done
2555 * only once (as we only have one lo_ifp which never goes away.)
2556 */
2557 if (ifp == lo_ifp) {
2558 struct dlil_threading_info *inp = dlil_main_input_thread;
2559 struct thread *tp = current_thread();
2560
2561 lck_mtx_lock(&inp->input_lck);
2562 if (inp->net_affinity) {
2563 u_int32_t tag = inp->tag;
2564
2565 VERIFY(inp->wloop_thr == THREAD_NULL);
2566 VERIFY(inp->poll_thr == THREAD_NULL);
2567 inp->wloop_thr = tp;
2568 lck_mtx_unlock(&inp->input_lck);
2569
2570 /* Associate this thread with the affinity tag */
2571 (void) dlil_affinity_set(tp, tag);
2572 } else {
2573 lck_mtx_unlock(&inp->input_lck);
2574 }
2575 }
2576
39236c6e
A
2577 snprintf(ifname, sizeof (ifname), "%s_starter",
2578 if_name(ifp));
316670eb
A
2579
2580 lck_mtx_lock_spin(&ifp->if_start_lock);
2581
2582 for (;;) {
3e170ce0
A
2583 if (ifp->if_start_thread != NULL)
2584 (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock,
2585 (PZERO - 1) | PSPIN, ifname, ts);
316670eb
A
2586
2587 /* interface is detached? */
2588 if (ifp->if_start_thread == THREAD_NULL) {
2589 ifnet_set_start_cycle(ifp, NULL);
2590 lck_mtx_unlock(&ifp->if_start_lock);
2591 ifnet_purge(ifp);
2592
2593 if (dlil_verbose) {
39236c6e
A
2594 printf("%s: starter thread terminated\n",
2595 if_name(ifp));
316670eb
A
2596 }
2597
2598 /* for the extra refcnt from kernel_thread_start() */
2599 thread_deallocate(current_thread());
2600 /* this is the end */
2601 thread_terminate(current_thread());
2602 /* NOTREACHED */
2603 return;
2604 }
2605
2606 ifp->if_start_active = 1;
3e170ce0 2607
316670eb
A
2608 for (;;) {
2609 u_int32_t req = ifp->if_start_req;
3e170ce0
A
2610 if (!IFCQ_IS_EMPTY(ifq) &&
2611 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2612 ifp->if_start_delayed == 0 &&
2613 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2614 (ifp->if_eflags & IFEF_DELAY_START)) {
2615 ifp->if_start_delayed = 1;
2616 ifnet_start_delayed++;
2617 break;
2618 } else {
2619 ifp->if_start_delayed = 0;
2620 }
316670eb 2621 lck_mtx_unlock(&ifp->if_start_lock);
3e170ce0
A
2622
2623 /*
2624 * If no longer attached, don't call start because ifp
2625 * is being destroyed; else hold an IO refcnt to
2626 * prevent the interface from being detached (will be
2627 * released below.)
2628 */
2629 if (!ifnet_is_attached(ifp, 1)) {
2630 lck_mtx_lock_spin(&ifp->if_start_lock);
2631 break;
2632 }
2633
316670eb
A
2634 /* invoke the driver's start routine */
2635 ((*ifp->if_start)(ifp));
3e170ce0
A
2636
2637 /*
2638 * Release the io ref count taken by ifnet_is_attached.
2639 */
2640 ifnet_decr_iorefcnt(ifp);
2641
316670eb
A
2642 lck_mtx_lock_spin(&ifp->if_start_lock);
2643
2644 /* if there's no pending request, we're done */
2645 if (req == ifp->if_start_req)
2646 break;
2647 }
3e170ce0 2648
316670eb
A
2649 ifp->if_start_req = 0;
2650 ifp->if_start_active = 0;
3e170ce0 2651
316670eb
A
2652 /*
2653 * Wakeup N ns from now if rate-controlled by TBR, and if
2654 * there are still packets in the send queue which haven't
2655 * been dequeued so far; else sleep indefinitely (ts = NULL)
2656 * until ifnet_start() is called again.
2657 */
2658 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2659 &ifp->if_start_cycle : NULL);
2660
3e170ce0
A
2661 if (ts == NULL && ifp->if_start_delayed == 1) {
2662 delay_start_ts.tv_sec = 0;
2663 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2664 ts = &delay_start_ts;
2665 }
2666
316670eb
A
2667 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
2668 ts = NULL;
2669 }
2670
2671 /* NOTREACHED */
316670eb
A
2672}
2673
2674void
2675ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2676{
2677 if (ts == NULL)
2678 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
2679 else
2680 *(&ifp->if_start_cycle) = *ts;
2681
2682 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
39236c6e
A
2683 printf("%s: restart interval set to %lu nsec\n",
2684 if_name(ifp), ts->tv_nsec);
316670eb
A
2685}
2686
2687static void
2688ifnet_poll(struct ifnet *ifp)
2689{
2690 /*
2691 * If the poller thread is inactive, signal it to do work.
2692 */
2693 lck_mtx_lock_spin(&ifp->if_poll_lock);
2694 ifp->if_poll_req++;
2695 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
2696 wakeup_one((caddr_t)&ifp->if_poll_thread);
2697 }
2698 lck_mtx_unlock(&ifp->if_poll_lock);
2699}
2700
2701static void
2702ifnet_poll_thread_fn(void *v, wait_result_t w)
2703{
2704#pragma unused(w)
2705 struct dlil_threading_info *inp;
2706 struct ifnet *ifp = v;
2707 char ifname[IFNAMSIZ + 1];
2708 struct timespec *ts = NULL;
2709 struct ifnet_stat_increment_param s;
2710
39236c6e
A
2711 snprintf(ifname, sizeof (ifname), "%s_poller",
2712 if_name(ifp));
316670eb
A
2713 bzero(&s, sizeof (s));
2714
2715 lck_mtx_lock_spin(&ifp->if_poll_lock);
2716
2717 inp = ifp->if_inp;
2718 VERIFY(inp != NULL);
2719
2720 for (;;) {
2721 if (ifp->if_poll_thread != THREAD_NULL) {
2722 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
2723 (PZERO - 1) | PSPIN, ifname, ts);
2724 }
2725
2726 /* interface is detached (maybe while asleep)? */
2727 if (ifp->if_poll_thread == THREAD_NULL) {
2728 ifnet_set_poll_cycle(ifp, NULL);
2729 lck_mtx_unlock(&ifp->if_poll_lock);
2730
2731 if (dlil_verbose) {
39236c6e
A
2732 printf("%s: poller thread terminated\n",
2733 if_name(ifp));
316670eb
A
2734 }
2735
2736 /* for the extra refcnt from kernel_thread_start() */
2737 thread_deallocate(current_thread());
2738 /* this is the end */
2739 thread_terminate(current_thread());
2740 /* NOTREACHED */
2741 return;
2742 }
2743
2744 ifp->if_poll_active = 1;
2745 for (;;) {
2746 struct mbuf *m_head, *m_tail;
2747 u_int32_t m_lim, m_cnt, m_totlen;
2748 u_int16_t req = ifp->if_poll_req;
2749
2750 lck_mtx_unlock(&ifp->if_poll_lock);
2751
2752 /*
2753 * If no longer attached, there's nothing to do;
2754 * else hold an IO refcnt to prevent the interface
2755 * from being detached (will be released below.)
2756 */
db609669
A
2757 if (!ifnet_is_attached(ifp, 1)) {
2758 lck_mtx_lock_spin(&ifp->if_poll_lock);
316670eb 2759 break;
db609669 2760 }
316670eb 2761
39236c6e 2762 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
316670eb
A
2763 MAX((qlimit(&inp->rcvq_pkts)),
2764 (inp->rxpoll_phiwat << 2));
2765
2766 if (dlil_verbose > 1) {
39236c6e 2767 printf("%s: polling up to %d pkts, "
316670eb
A
2768 "pkts avg %d max %d, wreq avg %d, "
2769 "bytes avg %d\n",
39236c6e 2770 if_name(ifp), m_lim,
316670eb
A
2771 inp->rxpoll_pavg, inp->rxpoll_pmax,
2772 inp->rxpoll_wavg, inp->rxpoll_bavg);
2773 }
2774
2775 /* invoke the driver's input poll routine */
2776 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
2777 &m_cnt, &m_totlen));
2778
2779 if (m_head != NULL) {
2780 VERIFY(m_tail != NULL && m_cnt > 0);
2781
2782 if (dlil_verbose > 1) {
39236c6e 2783 printf("%s: polled %d pkts, "
316670eb
A
2784 "pkts avg %d max %d, wreq avg %d, "
2785 "bytes avg %d\n",
39236c6e 2786 if_name(ifp), m_cnt,
316670eb
A
2787 inp->rxpoll_pavg, inp->rxpoll_pmax,
2788 inp->rxpoll_wavg, inp->rxpoll_bavg);
2789 }
2790
2791 /* stats are required for extended variant */
2792 s.packets_in = m_cnt;
2793 s.bytes_in = m_totlen;
2794
2795 (void) ifnet_input_common(ifp, m_head, m_tail,
2796 &s, TRUE, TRUE);
39236c6e
A
2797 } else {
2798 if (dlil_verbose > 1) {
2799 printf("%s: no packets, "
2800 "pkts avg %d max %d, wreq avg %d, "
2801 "bytes avg %d\n",
2802 if_name(ifp), inp->rxpoll_pavg,
2803 inp->rxpoll_pmax, inp->rxpoll_wavg,
2804 inp->rxpoll_bavg);
2805 }
2806
2807 (void) ifnet_input_common(ifp, NULL, NULL,
2808 NULL, FALSE, TRUE);
316670eb
A
2809 }
2810
2811 /* Release the io ref count */
2812 ifnet_decr_iorefcnt(ifp);
2813
2814 lck_mtx_lock_spin(&ifp->if_poll_lock);
2815
2816 /* if there's no pending request, we're done */
2817 if (req == ifp->if_poll_req)
2818 break;
2819 }
2820 ifp->if_poll_req = 0;
2821 ifp->if_poll_active = 0;
2822
2823 /*
2824 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2825 * until ifnet_poll() is called again.
2826 */
2827 ts = &ifp->if_poll_cycle;
2828 if (ts->tv_sec == 0 && ts->tv_nsec == 0)
2829 ts = NULL;
2830 }
2831
2832 /* NOTREACHED */
316670eb
A
2833}
2834
2835void
2836ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
2837{
2838 if (ts == NULL)
2839 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
2840 else
2841 *(&ifp->if_poll_cycle) = *ts;
2842
2843 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
39236c6e
A
2844 printf("%s: poll interval set to %lu nsec\n",
2845 if_name(ifp), ts->tv_nsec);
316670eb
A
2846}
2847
2848void
2849ifnet_purge(struct ifnet *ifp)
2850{
2851 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
2852 if_qflush(ifp, 0);
2853}
2854
2855void
2856ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
2857{
2858 IFCQ_LOCK_ASSERT_HELD(ifq);
2859
2860 if (!(IFCQ_IS_READY(ifq)))
2861 return;
2862
2863 if (IFCQ_TBR_IS_ENABLED(ifq)) {
2864 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
2865 ifq->ifcq_tbr.tbr_percent, 0 };
2866 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
2867 }
2868
2869 ifclassq_update(ifq, ev);
2870}
2871
2872void
2873ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
2874{
2875 switch (ev) {
39236c6e 2876 case CLASSQ_EV_LINK_BANDWIDTH:
316670eb
A
2877 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
2878 ifp->if_poll_update++;
2879 break;
2880
2881 default:
2882 break;
2883 }
2884}
2885
2886errno_t
2887ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
2888{
2889 struct ifclassq *ifq;
2890 u_int32_t omodel;
2891 errno_t err;
2892
2893 if (ifp == NULL || (model != IFNET_SCHED_MODEL_DRIVER_MANAGED &&
2894 model != IFNET_SCHED_MODEL_NORMAL))
2895 return (EINVAL);
2896 else if (!(ifp->if_eflags & IFEF_TXSTART))
2897 return (ENXIO);
2898
2899 ifq = &ifp->if_snd;
2900 IFCQ_LOCK(ifq);
2901 omodel = ifp->if_output_sched_model;
2902 ifp->if_output_sched_model = model;
2903 if ((err = ifclassq_pktsched_setup(ifq)) != 0)
2904 ifp->if_output_sched_model = omodel;
2905 IFCQ_UNLOCK(ifq);
2906
2907 return (err);
2908}
2909
2910errno_t
2911ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2912{
2913 if (ifp == NULL)
2914 return (EINVAL);
2915 else if (!(ifp->if_eflags & IFEF_TXSTART))
2916 return (ENXIO);
2917
2918 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
2919
2920 return (0);
2921}
2922
2923errno_t
2924ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2925{
2926 if (ifp == NULL || maxqlen == NULL)
2927 return (EINVAL);
2928 else if (!(ifp->if_eflags & IFEF_TXSTART))
2929 return (ENXIO);
2930
2931 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
2932
2933 return (0);
2934}
2935
2936errno_t
39236c6e 2937ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
316670eb 2938{
39236c6e
A
2939 errno_t err;
2940
2941 if (ifp == NULL || pkts == NULL)
2942 err = EINVAL;
316670eb 2943 else if (!(ifp->if_eflags & IFEF_TXSTART))
39236c6e
A
2944 err = ENXIO;
2945 else
2946 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
2947 pkts, NULL);
316670eb 2948
39236c6e
A
2949 return (err);
2950}
316670eb 2951
39236c6e
A
2952errno_t
2953ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
2954 u_int32_t *pkts, u_int32_t *bytes)
2955{
2956 errno_t err;
2957
2958 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
2959 (pkts == NULL && bytes == NULL))
2960 err = EINVAL;
2961 else if (!(ifp->if_eflags & IFEF_TXSTART))
2962 err = ENXIO;
2963 else
2964 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
2965
2966 return (err);
316670eb
A
2967}
2968
2969errno_t
2970ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2971{
2972 struct dlil_threading_info *inp;
2973
2974 if (ifp == NULL)
2975 return (EINVAL);
2976 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
2977 return (ENXIO);
2978
2979 if (maxqlen == 0)
2980 maxqlen = if_rcvq_maxlen;
2981 else if (maxqlen < IF_RCVQ_MINLEN)
2982 maxqlen = IF_RCVQ_MINLEN;
2983
2984 inp = ifp->if_inp;
2985 lck_mtx_lock(&inp->input_lck);
2986 qlimit(&inp->rcvq_pkts) = maxqlen;
2987 lck_mtx_unlock(&inp->input_lck);
2988
2989 return (0);
2990}
2991
2992errno_t
2993ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2994{
2995 struct dlil_threading_info *inp;
2996
2997 if (ifp == NULL || maxqlen == NULL)
2998 return (EINVAL);
2999 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3000 return (ENXIO);
3001
3002 inp = ifp->if_inp;
3003 lck_mtx_lock(&inp->input_lck);
3004 *maxqlen = qlimit(&inp->rcvq_pkts);
3005 lck_mtx_unlock(&inp->input_lck);
3006 return (0);
3007}
3008
3009errno_t
3010ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3011{
3012 int error;
3e170ce0
A
3013 struct timespec now;
3014 u_int64_t now_nsec;
316670eb
A
3015
3016 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3017 m->m_nextpkt != NULL) {
3018 if (m != NULL)
3019 m_freem_list(m);
3020 return (EINVAL);
3021 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3022 !(ifp->if_refflags & IFRF_ATTACHED)) {
3023 /* flag tested without lock for performance */
3024 m_freem(m);
3025 return (ENXIO);
3026 } else if (!(ifp->if_flags & IFF_UP)) {
3027 m_freem(m);
3028 return (ENETDOWN);
316670eb
A
3029 }
3030
3e170ce0
A
3031 nanouptime(&now);
3032 net_timernsec(&now, &now_nsec);
3033 m->m_pkthdr.pkt_enqueue_ts = now_nsec;
3034
3035 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3036 /*
3037 * If the driver chose to delay start callback for
3038 * coalescing multiple packets, Then use the following
3039 * heuristics to make sure that start callback will
3040 * be delayed only when bulk data transfer is detected.
3041 * 1. number of packets enqueued in (delay_win * 2) is
3042 * greater than or equal to the delay qlen.
3043 * 2. If delay_start is enabled it will stay enabled for
3044 * another 10 idle windows. This is to take into account
3045 * variable RTT and burst traffic.
3046 * 3. If the time elapsed since last enqueue is more
3047 * than 200ms we disable delaying start callback. This is
3048 * is to take idle time into account.
3049 */
3050 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3051 if (ifp->if_start_delay_swin > 0) {
3052 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3053 ifp->if_start_delay_cnt++;
3054 } else if ((now_nsec - ifp->if_start_delay_swin)
3055 >= (200 * 1000 * 1000)) {
3056 ifp->if_start_delay_swin = now_nsec;
3057 ifp->if_start_delay_cnt = 1;
3058 ifp->if_start_delay_idle = 0;
3059 if (ifp->if_eflags & IFEF_DELAY_START) {
3060 ifp->if_eflags &=
3061 ~(IFEF_DELAY_START);
3062 ifnet_delay_start_disabled++;
3063 }
3064 } else {
3065 if (ifp->if_start_delay_cnt >=
3066 ifp->if_start_delay_qlen) {
3067 ifp->if_eflags |= IFEF_DELAY_START;
3068 ifp->if_start_delay_idle = 0;
3069 } else {
3070 if (ifp->if_start_delay_idle >= 10) {
3071 ifp->if_eflags &= ~(IFEF_DELAY_START);
3072 ifnet_delay_start_disabled++;
3073 } else {
3074 ifp->if_start_delay_idle++;
3075 }
3076 }
3077 ifp->if_start_delay_swin = now_nsec;
3078 ifp->if_start_delay_cnt = 1;
3079 }
3080 } else {
3081 ifp->if_start_delay_swin = now_nsec;
3082 ifp->if_start_delay_cnt = 1;
3083 ifp->if_start_delay_idle = 0;
3084 ifp->if_eflags &= ~(IFEF_DELAY_START);
3085 }
3086 } else {
3087 ifp->if_eflags &= ~(IFEF_DELAY_START);
3088 }
3089
316670eb
A
3090 /* enqueue the packet */
3091 error = ifclassq_enqueue(&ifp->if_snd, m);
3092
3093 /*
3094 * Tell the driver to start dequeueing; do this even when the queue
3095 * for the packet is suspended (EQSUSPENDED), as the driver could still
3096 * be dequeueing from other unsuspended queues.
3097 */
3e170ce0
A
3098 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3099 (error == 0 || error == EQFULL || error == EQSUSPENDED))
316670eb
A
3100 ifnet_start(ifp);
3101
3102 return (error);
3103}
3104
3105errno_t
3106ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3107{
fe8ab488 3108 errno_t rc;
316670eb
A
3109 if (ifp == NULL || mp == NULL)
3110 return (EINVAL);
3111 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3112 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
3113 return (ENXIO);
fe8ab488
A
3114 if (!ifnet_is_attached(ifp, 1))
3115 return (ENXIO);
3116 rc = ifclassq_dequeue(&ifp->if_snd, 1, mp, NULL, NULL, NULL);
3117 ifnet_decr_iorefcnt(ifp);
316670eb 3118
fe8ab488 3119 return (rc);
316670eb
A
3120}
3121
3122errno_t
3123ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3124 struct mbuf **mp)
3125{
fe8ab488 3126 errno_t rc;
316670eb
A
3127 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
3128 return (EINVAL);
3129 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3130 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
3131 return (ENXIO);
fe8ab488
A
3132 if (!ifnet_is_attached(ifp, 1))
3133 return (ENXIO);
3134
3135 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL);
3136 ifnet_decr_iorefcnt(ifp);
3137 return (rc);
316670eb
A
3138}
3139
3140errno_t
3141ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t limit, struct mbuf **head,
3142 struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3143{
fe8ab488 3144 errno_t rc;
316670eb
A
3145 if (ifp == NULL || head == NULL || limit < 1)
3146 return (EINVAL);
3147 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3148 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
3149 return (ENXIO);
fe8ab488
A
3150 if (!ifnet_is_attached(ifp, 1))
3151 return (ENXIO);
3152
3153 rc = ifclassq_dequeue(&ifp->if_snd, limit, head, tail, cnt, len);
3154 ifnet_decr_iorefcnt(ifp);
3155 return (rc);
316670eb
A
3156}
3157
3158errno_t
3159ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3160 u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3161 u_int32_t *len)
3162{
fe8ab488 3163 errno_t rc;
316670eb
A
3164 if (ifp == NULL || head == NULL || limit < 1 || !MBUF_VALID_SC(sc))
3165 return (EINVAL);
3166 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3167 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
3168 return (ENXIO);
fe8ab488
A
3169 if (!ifnet_is_attached(ifp, 1))
3170 return (ENXIO);
3171 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, limit, head,
3172 tail, cnt, len);
3173 ifnet_decr_iorefcnt(ifp);
3174 return (rc);
316670eb
A
3175}
3176
39236c6e
A
3177errno_t
3178ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3179 const struct sockaddr *dest, const char *dest_linkaddr,
3180 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3181{
3182 if (pre != NULL)
3183 *pre = 0;
3184 if (post != NULL)
3185 *post = 0;
3186
3187 return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
3188}
3189
316670eb
A
3190static int
3191dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3192 char **frame_header_p, protocol_family_t protocol_family)
3193{
3194 struct ifnet_filter *filter;
3195
3196 /*
3197 * Pass the inbound packet to the interface filters
6d2010ae
A
3198 */
3199 lck_mtx_lock_spin(&ifp->if_flt_lock);
3200 /* prevent filter list from changing in case we drop the lock */
3201 if_flt_monitor_busy(ifp);
2d21ac55
A
3202 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3203 int result;
3204
6d2010ae
A
3205 if (!filter->filt_skip && filter->filt_input != NULL &&
3206 (filter->filt_protocol == 0 ||
3207 filter->filt_protocol == protocol_family)) {
3208 lck_mtx_unlock(&ifp->if_flt_lock);
3209
2d21ac55 3210 result = (*filter->filt_input)(filter->filt_cookie,
6d2010ae
A
3211 ifp, protocol_family, m_p, frame_header_p);
3212
3213 lck_mtx_lock_spin(&ifp->if_flt_lock);
2d21ac55 3214 if (result != 0) {
6d2010ae
A
3215 /* we're done with the filter list */
3216 if_flt_monitor_unbusy(ifp);
3217 lck_mtx_unlock(&ifp->if_flt_lock);
2d21ac55
A
3218 return (result);
3219 }
3220 }
3221 }
6d2010ae
A
3222 /* we're done with the filter list */
3223 if_flt_monitor_unbusy(ifp);
3224 lck_mtx_unlock(&ifp->if_flt_lock);
b7266188
A
3225
3226 /*
6d2010ae 3227 * Strip away M_PROTO1 bit prior to sending packet up the stack as
b7266188
A
3228 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3229 */
3230 if (*m_p != NULL)
3231 (*m_p)->m_flags &= ~M_PROTO1;
3232
2d21ac55 3233 return (0);
1c79356b
A
3234}
3235
6d2010ae
A
3236static int
3237dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3238 protocol_family_t protocol_family)
3239{
3240 struct ifnet_filter *filter;
3241
3242 /*
3243 * Pass the outbound packet to the interface filters
3244 */
3245 lck_mtx_lock_spin(&ifp->if_flt_lock);
3246 /* prevent filter list from changing in case we drop the lock */
3247 if_flt_monitor_busy(ifp);
3248 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3249 int result;
3250
3251 if (!filter->filt_skip && filter->filt_output != NULL &&
3252 (filter->filt_protocol == 0 ||
3253 filter->filt_protocol == protocol_family)) {
3254 lck_mtx_unlock(&ifp->if_flt_lock);
3255
3256 result = filter->filt_output(filter->filt_cookie, ifp,
3257 protocol_family, m_p);
3258
3259 lck_mtx_lock_spin(&ifp->if_flt_lock);
3260 if (result != 0) {
3261 /* we're done with the filter list */
3262 if_flt_monitor_unbusy(ifp);
3263 lck_mtx_unlock(&ifp->if_flt_lock);
3264 return (result);
3265 }
3266 }
3267 }
3268 /* we're done with the filter list */
3269 if_flt_monitor_unbusy(ifp);
3270 lck_mtx_unlock(&ifp->if_flt_lock);
3271
3272 return (0);
3273}
3274
2d21ac55
A
3275static void
3276dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
1c79356b 3277{
2d21ac55 3278 int error;
1c79356b 3279
2d21ac55
A
3280 if (ifproto->proto_kpi == kProtoKPI_v1) {
3281 /* Version 1 protocols get one packet at a time */
3282 while (m != NULL) {
3283 char * frame_header;
3284 mbuf_t next_packet;
6d2010ae 3285
2d21ac55
A
3286 next_packet = m->m_nextpkt;
3287 m->m_nextpkt = NULL;
39236c6e
A
3288 frame_header = m->m_pkthdr.pkt_hdr;
3289 m->m_pkthdr.pkt_hdr = NULL;
6d2010ae
A
3290 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3291 ifproto->protocol_family, m, frame_header);
2d21ac55
A
3292 if (error != 0 && error != EJUSTRETURN)
3293 m_freem(m);
3294 m = next_packet;
3295 }
6d2010ae 3296 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
2d21ac55
A
3297 /* Version 2 protocols support packet lists */
3298 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
6d2010ae 3299 ifproto->protocol_family, m);
2d21ac55
A
3300 if (error != 0 && error != EJUSTRETURN)
3301 m_freem_list(m);
91447636 3302 }
2d21ac55
A
3303 return;
3304}
1c79356b 3305
316670eb
A
3306static void
3307dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3308 struct dlil_threading_info *inp, boolean_t poll)
3309{
3310 struct ifnet_stat_increment_param *d = &inp->stats;
3311
3312 if (s->packets_in != 0)
3313 d->packets_in += s->packets_in;
3314 if (s->bytes_in != 0)
3315 d->bytes_in += s->bytes_in;
3316 if (s->errors_in != 0)
3317 d->errors_in += s->errors_in;
3318
3319 if (s->packets_out != 0)
3320 d->packets_out += s->packets_out;
3321 if (s->bytes_out != 0)
3322 d->bytes_out += s->bytes_out;
3323 if (s->errors_out != 0)
3324 d->errors_out += s->errors_out;
3325
3326 if (s->collisions != 0)
3327 d->collisions += s->collisions;
3328 if (s->dropped != 0)
3329 d->dropped += s->dropped;
3330
3331 if (poll)
3332 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
3333}
3334
3335static void
3336dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3337{
3338 struct ifnet_stat_increment_param *s = &inp->stats;
3339
3340 /*
3341 * Use of atomic operations is unavoidable here because
3342 * these stats may also be incremented elsewhere via KPIs.
3343 */
3344 if (s->packets_in != 0) {
3345 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3346 s->packets_in = 0;
3347 }
3348 if (s->bytes_in != 0) {
3349 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3350 s->bytes_in = 0;
3351 }
3352 if (s->errors_in != 0) {
3353 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3354 s->errors_in = 0;
3355 }
3356
3357 if (s->packets_out != 0) {
3358 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3359 s->packets_out = 0;
3360 }
3361 if (s->bytes_out != 0) {
3362 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3363 s->bytes_out = 0;
3364 }
3365 if (s->errors_out != 0) {
3366 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3367 s->errors_out = 0;
3368 }
3369
3370 if (s->collisions != 0) {
3371 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3372 s->collisions = 0;
3373 }
3374 if (s->dropped != 0) {
3375 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3376 s->dropped = 0;
3377 }
39236c6e
A
3378 /*
3379 * If we went over the threshold, notify NetworkStatistics.
3380 */
3381 if (ifp->if_data_threshold &&
3382 (ifp->if_ibytes + ifp->if_obytes) - ifp->if_dt_bytes >
3383 ifp->if_data_threshold) {
3384 ifp->if_dt_bytes = ifp->if_ibytes + ifp->if_obytes;
3385 nstat_ifnet_threshold_reached(ifp->if_index);
3386 }
316670eb
A
3387 /*
3388 * No need for atomic operations as they are modified here
3389 * only from within the DLIL input thread context.
3390 */
3391 if (inp->tstats.packets != 0) {
3392 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3393 inp->tstats.packets = 0;
3394 }
3395 if (inp->tstats.bytes != 0) {
3396 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3397 inp->tstats.bytes = 0;
3398 }
3399}
3400
3401__private_extern__ void
3402dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3403{
3404 return (dlil_input_packet_list_common(ifp, m, 0,
3405 IFNET_MODEL_INPUT_POLL_OFF, FALSE));
3406}
3407
2d21ac55 3408__private_extern__ void
316670eb
A
3409dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3410 u_int32_t cnt, ifnet_model_t mode)
3411{
3412 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
3413}
3414
3415static void
3416dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3417 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
2d21ac55
A
3418{
3419 int error = 0;
2d21ac55
A
3420 protocol_family_t protocol_family;
3421 mbuf_t next_packet;
3422 ifnet_t ifp = ifp_param;
3423 char * frame_header;
3424 struct if_proto * last_ifproto = NULL;
3425 mbuf_t pkt_first = NULL;
3426 mbuf_t * pkt_next = NULL;
316670eb 3427 u_int32_t poll_thresh = 0, poll_ival = 0;
2d21ac55
A
3428
3429 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0);
3430
316670eb
A
3431 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
3432 (poll_ival = if_rxpoll_interval_pkts) > 0)
3433 poll_thresh = cnt;
6d2010ae 3434
2d21ac55 3435 while (m != NULL) {
6d2010ae
A
3436 struct if_proto *ifproto = NULL;
3437 int iorefcnt = 0;
39236c6e 3438 uint32_t pktf_mask; /* pkt flags to preserve */
2d21ac55 3439
2d21ac55
A
3440 if (ifp_param == NULL)
3441 ifp = m->m_pkthdr.rcvif;
6d2010ae 3442
316670eb
A
3443 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
3444 poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
3445 ifnet_poll(ifp);
3446
6d2010ae 3447 /* Check if this mbuf looks valid */
316670eb 3448 MBUF_INPUT_CHECK(m, ifp);
6d2010ae
A
3449
3450 next_packet = m->m_nextpkt;
3451 m->m_nextpkt = NULL;
39236c6e
A
3452 frame_header = m->m_pkthdr.pkt_hdr;
3453 m->m_pkthdr.pkt_hdr = NULL;
2d21ac55 3454
316670eb
A
3455 /*
3456 * Get an IO reference count if the interface is not
3457 * loopback (lo0) and it is attached; lo0 never goes
3458 * away, so optimize for that.
6d2010ae
A
3459 */
3460 if (ifp != lo_ifp) {
3461 if (!ifnet_is_attached(ifp, 1)) {
3462 m_freem(m);
3463 goto next;
3464 }
3465 iorefcnt = 1;
39236c6e
A
3466 pktf_mask = 0;
3467 } else {
3468 /*
3469 * If this arrived on lo0, preserve interface addr
3470 * info to allow for connectivity between loopback
3471 * and local interface addresses.
3472 */
3473 pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
2d21ac55 3474 }
d41d1dae 3475
39236c6e
A
3476 /* make sure packet comes in clean */
3477 m_classifier_init(m, pktf_mask);
3478
316670eb 3479 ifp_inc_traffic_class_in(ifp, m);
d41d1dae 3480
2d21ac55 3481 /* find which protocol family this packet is for */
6d2010ae 3482 ifnet_lock_shared(ifp);
2d21ac55 3483 error = (*ifp->if_demux)(ifp, m, frame_header,
6d2010ae
A
3484 &protocol_family);
3485 ifnet_lock_done(ifp);
2d21ac55 3486 if (error != 0) {
6d2010ae 3487 if (error == EJUSTRETURN)
2d21ac55 3488 goto next;
2d21ac55
A
3489 protocol_family = 0;
3490 }
6d2010ae 3491
39236c6e
A
3492 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
3493 !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
3494 dlil_input_cksum_dbg(ifp, m, frame_header,
3495 protocol_family);
3496
3497 /*
3498 * For partial checksum offload, we expect the driver to
3499 * set the start offset indicating the start of the span
3500 * that is covered by the hardware-computed checksum;
3501 * adjust this start offset accordingly because the data
3502 * pointer has been advanced beyond the link-layer header.
3503 *
3504 * Don't adjust if the interface is a bridge member, as
3505 * the adjustment will occur from the context of the
3506 * bridge interface during input.
3507 */
3508 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
3509 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
3510 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
3511 int adj;
3512
3513 if (frame_header == NULL ||
3514 frame_header < (char *)mbuf_datastart(m) ||
3515 frame_header > (char *)m->m_data ||
3516 (adj = (m->m_data - frame_header)) >
3517 m->m_pkthdr.csum_rx_start) {
3518 m->m_pkthdr.csum_data = 0;
3519 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
3520 hwcksum_in_invalidated++;
3521 } else {
3522 m->m_pkthdr.csum_rx_start -= adj;
3523 }
3524 }
3525
3526 pktap_input(ifp, protocol_family, m, frame_header);
316670eb 3527
2d21ac55 3528 if (m->m_flags & (M_BCAST|M_MCAST))
6d2010ae 3529 atomic_add_64(&ifp->if_imcasts, 1);
1c79356b 3530
2d21ac55
A
3531 /* run interface filters, exclude VLAN packets PR-3586856 */
3532 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
6d2010ae
A
3533 error = dlil_interface_filters_input(ifp, &m,
3534 &frame_header, protocol_family);
3535 if (error != 0) {
3536 if (error != EJUSTRETURN)
2d21ac55 3537 m_freem(m);
2d21ac55 3538 goto next;
91447636
A
3539 }
3540 }
2d21ac55 3541 if (error != 0 || ((m->m_flags & M_PROMISC) != 0) ) {
91447636 3542 m_freem(m);
2d21ac55 3543 goto next;
91447636 3544 }
6d2010ae 3545
2d21ac55
A
3546 /* Lookup the protocol attachment to this interface */
3547 if (protocol_family == 0) {
3548 ifproto = NULL;
6d2010ae
A
3549 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
3550 (last_ifproto->protocol_family == protocol_family)) {
3551 VERIFY(ifproto == NULL);
2d21ac55 3552 ifproto = last_ifproto;
6d2010ae
A
3553 if_proto_ref(last_ifproto);
3554 } else {
3555 VERIFY(ifproto == NULL);
3556 ifnet_lock_shared(ifp);
3557 /* callee holds a proto refcnt upon success */
2d21ac55 3558 ifproto = find_attached_proto(ifp, protocol_family);
6d2010ae 3559 ifnet_lock_done(ifp);
2d21ac55
A
3560 }
3561 if (ifproto == NULL) {
3562 /* no protocol for this packet, discard */
3563 m_freem(m);
3564 goto next;
3565 }
3566 if (ifproto != last_ifproto) {
2d21ac55
A
3567 if (last_ifproto != NULL) {
3568 /* pass up the list for the previous protocol */
2d21ac55
A
3569 dlil_ifproto_input(last_ifproto, pkt_first);
3570 pkt_first = NULL;
3571 if_proto_free(last_ifproto);
2d21ac55
A
3572 }
3573 last_ifproto = ifproto;
6d2010ae 3574 if_proto_ref(ifproto);
2d21ac55
A
3575 }
3576 /* extend the list */
39236c6e 3577 m->m_pkthdr.pkt_hdr = frame_header;
2d21ac55
A
3578 if (pkt_first == NULL) {
3579 pkt_first = m;
3580 } else {
3581 *pkt_next = m;
3582 }
3583 pkt_next = &m->m_nextpkt;
1c79356b 3584
6d2010ae 3585next:
2d21ac55
A
3586 if (next_packet == NULL && last_ifproto != NULL) {
3587 /* pass up the last list of packets */
2d21ac55
A
3588 dlil_ifproto_input(last_ifproto, pkt_first);
3589 if_proto_free(last_ifproto);
6d2010ae
A
3590 last_ifproto = NULL;
3591 }
3592 if (ifproto != NULL) {
3593 if_proto_free(ifproto);
3594 ifproto = NULL;
2d21ac55 3595 }
316670eb 3596
2d21ac55 3597 m = next_packet;
1c79356b 3598
6d2010ae
A
3599 /* update the driver's multicast filter, if needed */
3600 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
3601 ifp->if_updatemcasts = 0;
3602 if (iorefcnt == 1)
3603 ifnet_decr_iorefcnt(ifp);
91447636 3604 }
6d2010ae 3605
91447636 3606 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0);
1c79356b
A
3607}
3608
6d2010ae
A
3609errno_t
3610if_mcasts_update(struct ifnet *ifp)
3611{
3612 errno_t err;
3613
3614 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
3615 if (err == EAFNOSUPPORT)
3616 err = 0;
39236c6e
A
3617 printf("%s: %s %d suspended link-layer multicast membership(s) "
3618 "(err=%d)\n", if_name(ifp),
6d2010ae
A
3619 (err == 0 ? "successfully restored" : "failed to restore"),
3620 ifp->if_updatemcasts, err);
3621
3622 /* just return success */
3623 return (0);
3624}
3625
a1c7dba1
A
3626
3627#define TMP_IF_PROTO_ARR_SIZE 10
91447636
A
3628static int
3629dlil_event_internal(struct ifnet *ifp, struct kev_msg *event)
1c79356b 3630{
a1c7dba1
A
3631 struct ifnet_filter *filter = NULL;
3632 struct if_proto *proto = NULL;
3633 int if_proto_count = 0;
3634 struct if_proto **tmp_ifproto_arr = NULL;
3635 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
3636 int tmp_ifproto_arr_idx = 0;
3637 bool tmp_malloc = false;
6d2010ae 3638
6d2010ae
A
3639 /*
3640 * Pass the event to the interface filters
3641 */
3642 lck_mtx_lock_spin(&ifp->if_flt_lock);
3643 /* prevent filter list from changing in case we drop the lock */
3644 if_flt_monitor_busy(ifp);
3645 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3646 if (filter->filt_event != NULL) {
3647 lck_mtx_unlock(&ifp->if_flt_lock);
3648
3649 filter->filt_event(filter->filt_cookie, ifp,
3650 filter->filt_protocol, event);
3651
3652 lck_mtx_lock_spin(&ifp->if_flt_lock);
91447636 3653 }
6d2010ae
A
3654 }
3655 /* we're done with the filter list */
3656 if_flt_monitor_unbusy(ifp);
3657 lck_mtx_unlock(&ifp->if_flt_lock);
3658
3e170ce0
A
3659 /* Get an io ref count if the interface is attached */
3660 if (!ifnet_is_attached(ifp, 1))
3661 goto done;
3662
a1c7dba1
A
3663 /*
3664 * An embedded tmp_list_entry in if_proto may still get
3665 * over-written by another thread after giving up ifnet lock,
3666 * therefore we are avoiding embedded pointers here.
3667 */
6d2010ae 3668 ifnet_lock_shared(ifp);
a1c7dba1
A
3669 if_proto_count = dlil_ifp_proto_count(ifp);
3670 if (if_proto_count) {
6d2010ae 3671 int i;
a1c7dba1
A
3672 VERIFY(ifp->if_proto_hash != NULL);
3673 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
3674 tmp_ifproto_arr = tmp_ifproto_stack_arr;
3675 } else {
3676 MALLOC(tmp_ifproto_arr, struct if_proto **,
3677 sizeof (*tmp_ifproto_arr) * if_proto_count,
3678 M_TEMP, M_ZERO);
3679 if (tmp_ifproto_arr == NULL) {
3680 ifnet_lock_done(ifp);
3681 goto cleanup;
3682 }
3683 tmp_malloc = true;
3684 }
6d2010ae
A
3685
3686 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6d2010ae
A
3687 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
3688 next_hash) {
a1c7dba1
A
3689 if_proto_ref(proto);
3690 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
3691 tmp_ifproto_arr_idx++;
91447636
A
3692 }
3693 }
a1c7dba1 3694 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
91447636 3695 }
6d2010ae
A
3696 ifnet_lock_done(ifp);
3697
a1c7dba1
A
3698 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
3699 tmp_ifproto_arr_idx++) {
3700 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
3701 VERIFY(proto != NULL);
3702 proto_media_event eventp =
3703 (proto->proto_kpi == kProtoKPI_v1 ?
3704 proto->kpi.v1.event :
3705 proto->kpi.v2.event);
3706
3707 if (eventp != NULL) {
3708 eventp(ifp, proto->protocol_family,
3709 event);
3710 }
3711 if_proto_free(proto);
3712 }
3713
3714cleanup:
3715 if (tmp_malloc) {
3716 FREE(tmp_ifproto_arr, M_TEMP);
3717 }
3718
6d2010ae
A
3719 /* Pass the event to the interface */
3720 if (ifp->if_event != NULL)
3721 ifp->if_event(ifp, event);
3722
3723 /* Release the io ref count */
3724 ifnet_decr_iorefcnt(ifp);
6d2010ae
A
3725done:
3726 return (kev_post_msg(event));
1c79356b
A
3727}
3728
2d21ac55 3729errno_t
6d2010ae 3730ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
1c79356b 3731{
91447636 3732 struct kev_msg kev_msg;
2d21ac55
A
3733 int result = 0;
3734
6d2010ae
A
3735 if (ifp == NULL || event == NULL)
3736 return (EINVAL);
1c79356b 3737
6d2010ae 3738 bzero(&kev_msg, sizeof (kev_msg));
91447636
A
3739 kev_msg.vendor_code = event->vendor_code;
3740 kev_msg.kev_class = event->kev_class;
3741 kev_msg.kev_subclass = event->kev_subclass;
3742 kev_msg.event_code = event->event_code;
3743 kev_msg.dv[0].data_ptr = &event->event_data[0];
3744 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
3745 kev_msg.dv[1].data_length = 0;
6d2010ae 3746
91447636 3747 result = dlil_event_internal(ifp, &kev_msg);
1c79356b 3748
6d2010ae 3749 return (result);
91447636 3750}
1c79356b 3751
2d21ac55
A
3752#if CONFIG_MACF_NET
3753#include <netinet/ip6.h>
3754#include <netinet/ip.h>
6d2010ae
A
3755static int
3756dlil_get_socket_type(struct mbuf **mp, int family, int raw)
2d21ac55
A
3757{
3758 struct mbuf *m;
3759 struct ip *ip;
3760 struct ip6_hdr *ip6;
3761 int type = SOCK_RAW;
3762
3763 if (!raw) {
3764 switch (family) {
3765 case PF_INET:
3766 m = m_pullup(*mp, sizeof(struct ip));
3767 if (m == NULL)
3768 break;
3769 *mp = m;
3770 ip = mtod(m, struct ip *);
3771 if (ip->ip_p == IPPROTO_TCP)
3772 type = SOCK_STREAM;
3773 else if (ip->ip_p == IPPROTO_UDP)
3774 type = SOCK_DGRAM;
3775 break;
3776 case PF_INET6:
3777 m = m_pullup(*mp, sizeof(struct ip6_hdr));
3778 if (m == NULL)
3779 break;
3780 *mp = m;
3781 ip6 = mtod(m, struct ip6_hdr *);
3782 if (ip6->ip6_nxt == IPPROTO_TCP)
3783 type = SOCK_STREAM;
3784 else if (ip6->ip6_nxt == IPPROTO_UDP)
3785 type = SOCK_DGRAM;
3786 break;
3787 }
3788 }
3789
3790 return (type);
3791}
3792#endif
3793
316670eb
A
3794/*
3795 * This is mostly called from the context of the DLIL input thread;
3796 * because of that there is no need for atomic operations.
3797 */
3798static __inline void
3799ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
d41d1dae 3800{
d41d1dae
A
3801 if (!(m->m_flags & M_PKTHDR))
3802 return;
3803
316670eb
A
3804 switch (m_get_traffic_class(m)) {
3805 case MBUF_TC_BE:
3806 ifp->if_tc.ifi_ibepackets++;
3807 ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len;
3808 break;
3809 case MBUF_TC_BK:
3810 ifp->if_tc.ifi_ibkpackets++;
3811 ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len;
3812 break;
3813 case MBUF_TC_VI:
3814 ifp->if_tc.ifi_ivipackets++;
3815 ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len;
3816 break;
3817 case MBUF_TC_VO:
3818 ifp->if_tc.ifi_ivopackets++;
3819 ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len;
3820 break;
3821 default:
3822 break;
3823 }
3824
3825 if (mbuf_is_traffic_class_privileged(m)) {
3826 ifp->if_tc.ifi_ipvpackets++;
3827 ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len;
3828 }
3829}
3830
3831/*
3832 * This is called from DLIL output, hence multiple threads could end
3833 * up modifying the statistics. We trade off acccuracy for performance
3834 * by not using atomic operations here.
3835 */
3836static __inline void
3837ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
3838{
3839 if (!(m->m_flags & M_PKTHDR))
3840 return;
3841
3842 switch (m_get_traffic_class(m)) {
3843 case MBUF_TC_BE:
3844 ifp->if_tc.ifi_obepackets++;
3845 ifp->if_tc.ifi_obebytes += m->m_pkthdr.len;
3846 break;
3847 case MBUF_TC_BK:
3848 ifp->if_tc.ifi_obkpackets++;
3849 ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len;
3850 break;
3851 case MBUF_TC_VI:
3852 ifp->if_tc.ifi_ovipackets++;
3853 ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len;
3854 break;
3855 case MBUF_TC_VO:
3856 ifp->if_tc.ifi_ovopackets++;
3857 ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len;
3858 break;
3859 default:
3860 break;
3861 }
3862
3863 if (mbuf_is_traffic_class_privileged(m)) {
3864 ifp->if_tc.ifi_opvpackets++;
3865 ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len;
d41d1dae 3866 }
1c79356b
A
3867}
3868
3e170ce0
A
3869static void
3870dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
3871{
3872 mbuf_t n = m;
3873 int chainlen = 0;
3874
3875 while (n != NULL) {
3876 chainlen++;
3877 n = n->m_next;
3878 }
3879 switch (chainlen) {
3880 case 0:
3881 break;
3882 case 1:
3883 atomic_add_64(&cls->cls_one, 1);
3884 break;
3885 case 2:
3886 atomic_add_64(&cls->cls_two, 1);
3887 break;
3888 case 3:
3889 atomic_add_64(&cls->cls_three, 1);
3890 break;
3891 case 4:
3892 atomic_add_64(&cls->cls_four, 1);
3893 break;
3894 case 5:
3895 default:
3896 atomic_add_64(&cls->cls_five_or_more, 1);
3897 break;
3898 }
3899}
3900
1c79356b 3901/*
91447636
A
3902 * dlil_output
3903 *
3904 * Caller should have a lock on the protocol domain if the protocol
3905 * doesn't support finer grained locking. In most cases, the lock
3906 * will be held from the socket layer and won't be released until
3907 * we return back to the socket layer.
3908 *
3909 * This does mean that we must take a protocol lock before we take
3910 * an interface lock if we're going to take both. This makes sense
3911 * because a protocol is likely to interact with an ifp while it
3912 * is under the protocol lock.
316670eb
A
3913 *
3914 * An advisory code will be returned if adv is not null. This
39236c6e 3915 * can be used to provide feedback about interface queues to the
316670eb 3916 * application.
1c79356b 3917 */
6d2010ae
A
3918errno_t
3919dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
316670eb 3920 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
6d2010ae
A
3921{
3922 char *frame_type = NULL;
3923 char *dst_linkaddr = NULL;
3924 int retval = 0;
3925 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
3926 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
3927 struct if_proto *proto = NULL;
2d21ac55
A
3928 mbuf_t m;
3929 mbuf_t send_head = NULL;
3930 mbuf_t *send_tail = &send_head;
6d2010ae 3931 int iorefcnt = 0;
316670eb 3932 u_int32_t pre = 0, post = 0;
39236c6e
A
3933 u_int32_t fpkts = 0, fbytes = 0;
3934 int32_t flen = 0;
6d2010ae 3935
39236c6e 3936 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
6d2010ae
A
3937
3938 /* Get an io refcnt if the interface is attached to prevent ifnet_detach
3939 * from happening while this operation is in progress */
3940 if (!ifnet_is_attached(ifp, 1)) {
3941 retval = ENXIO;
3942 goto cleanup;
3943 }
3944 iorefcnt = 1;
3945
3946 /* update the driver's multicast filter, if needed */
3947 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
3948 ifp->if_updatemcasts = 0;
3949
3950 frame_type = frame_type_buffer;
3951 dst_linkaddr = dst_linkaddr_buffer;
3952
91447636 3953 if (raw == 0) {
6d2010ae
A
3954 ifnet_lock_shared(ifp);
3955 /* callee holds a proto refcnt upon success */
91447636
A
3956 proto = find_attached_proto(ifp, proto_family);
3957 if (proto == NULL) {
6d2010ae 3958 ifnet_lock_done(ifp);
91447636
A
3959 retval = ENXIO;
3960 goto cleanup;
3961 }
6d2010ae 3962 ifnet_lock_done(ifp);
2d21ac55 3963 }
6d2010ae 3964
2d21ac55
A
3965preout_again:
3966 if (packetlist == NULL)
3967 goto cleanup;
6d2010ae 3968
2d21ac55
A
3969 m = packetlist;
3970 packetlist = packetlist->m_nextpkt;
3971 m->m_nextpkt = NULL;
6d2010ae 3972
2d21ac55 3973 if (raw == 0) {
6d2010ae
A
3974 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
3975 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
91447636 3976 retval = 0;
6d2010ae
A
3977 if (preoutp != NULL) {
3978 retval = preoutp(ifp, proto_family, &m, dest, route,
3979 frame_type, dst_linkaddr);
3980
3981 if (retval != 0) {
3982 if (retval == EJUSTRETURN)
3983 goto preout_again;
3984 m_freem(m);
3985 goto cleanup;
91447636 3986 }
1c79356b 3987 }
1c79356b 3988 }
2d21ac55
A
3989
3990#if CONFIG_MACF_NET
3991 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
3992 dlil_get_socket_type(&m, proto_family, raw));
39236c6e 3993 if (retval != 0) {
2d21ac55
A
3994 m_freem(m);
3995 goto cleanup;
3996 }
3997#endif
3998
3999 do {
6d2010ae 4000#if CONFIG_DTRACE
316670eb 4001 if (!raw && proto_family == PF_INET) {
6d2010ae
A
4002 struct ip *ip = mtod(m, struct ip*);
4003 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4004 struct ip *, ip, struct ifnet *, ifp,
4005 struct ip *, ip, struct ip6_hdr *, NULL);
4006
316670eb 4007 } else if (!raw && proto_family == PF_INET6) {
6d2010ae
A
4008 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr*);
4009 DTRACE_IP6(send, struct mbuf*, m, struct inpcb *, NULL,
4010 struct ip6_hdr *, ip6, struct ifnet*, ifp,
4011 struct ip*, NULL, struct ip6_hdr *, ip6);
4012 }
4013#endif /* CONFIG_DTRACE */
4014
39236c6e 4015 if (raw == 0 && ifp->if_framer != NULL) {
7e4a7d39
A
4016 int rcvif_set = 0;
4017
4018 /*
4019 * If this is a broadcast packet that needs to be
4020 * looped back into the system, set the inbound ifp
4021 * to that of the outbound ifp. This will allow
4022 * us to determine that it is a legitimate packet
4023 * for the system. Only set the ifp if it's not
4024 * already set, just to be safe.
4025 */
4026 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
4027 m->m_pkthdr.rcvif == NULL) {
4028 m->m_pkthdr.rcvif = ifp;
4029 rcvif_set = 1;
4030 }
4031
6d2010ae 4032 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
39236c6e
A
4033 frame_type, &pre, &post);
4034 if (retval != 0) {
6d2010ae 4035 if (retval != EJUSTRETURN)
2d21ac55 4036 m_freem(m);
2d21ac55 4037 goto next;
91447636 4038 }
7e4a7d39 4039
39236c6e
A
4040 /*
4041 * For partial checksum offload, adjust the start
4042 * and stuff offsets based on the prepended header.
4043 */
4044 if ((m->m_pkthdr.csum_flags &
4045 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4046 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4047 m->m_pkthdr.csum_tx_stuff += pre;
4048 m->m_pkthdr.csum_tx_start += pre;
4049 }
4050
4051 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
4052 dlil_output_cksum_dbg(ifp, m, pre,
4053 proto_family);
4054
7e4a7d39
A
4055 /*
4056 * Clear the ifp if it was set above, and to be
4057 * safe, only if it is still the same as the
4058 * outbound ifp we have in context. If it was
4059 * looped back, then a copy of it was sent to the
4060 * loopback interface with the rcvif set, and we
4061 * are clearing the one that will go down to the
4062 * layer below.
4063 */
4064 if (rcvif_set && m->m_pkthdr.rcvif == ifp)
4065 m->m_pkthdr.rcvif = NULL;
91447636 4066 }
6d2010ae
A
4067
4068 /*
2d21ac55
A
4069 * Let interface filters (if any) do their thing ...
4070 */
4071 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4072 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
6d2010ae
A
4073 retval = dlil_interface_filters_output(ifp,
4074 &m, proto_family);
4075 if (retval != 0) {
4076 if (retval != EJUSTRETURN)
4077 m_freem(m);
4078 goto next;
1c79356b 4079 }
1c79356b 4080 }
b7266188 4081 /*
39236c6e
A
4082 * Strip away M_PROTO1 bit prior to sending packet
4083 * to the driver as this field may be used by the driver
b7266188
A
4084 */
4085 m->m_flags &= ~M_PROTO1;
4086
2d21ac55
A
4087 /*
4088 * If the underlying interface is not capable of handling a
4089 * packet whose data portion spans across physically disjoint
4090 * pages, we need to "normalize" the packet so that we pass
4091 * down a chain of mbufs where each mbuf points to a span that
4092 * resides in the system page boundary. If the packet does
4093 * not cross page(s), the following is a no-op.
4094 */
4095 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
4096 if ((m = m_normalize(m)) == NULL)
4097 goto next;
4098 }
4099
6d2010ae
A
4100 /*
4101 * If this is a TSO packet, make sure the interface still
4102 * advertise TSO capability.
b0d623f7 4103 */
39236c6e 4104 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
6d2010ae
A
4105 retval = EMSGSIZE;
4106 m_freem(m);
4107 goto cleanup;
b0d623f7
A
4108 }
4109
39236c6e
A
4110 /*
4111 * If the packet service class is not background,
4112 * update the timestamp to indicate recent activity
4113 * on a foreground socket.
4114 */
3e170ce0
A
4115 if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
4116 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
4117 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND))
4118 ifp->if_fg_sendts = net_uptime();
4119
4120 if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME)
4121 ifp->if_rt_sendts = net_uptime();
4122 }
39236c6e
A
4123
4124 ifp_inc_traffic_class_out(ifp, m);
4125 pktap_output(ifp, proto_family, m, pre, post);
6d2010ae 4126
3e170ce0
A
4127 /*
4128 * Count the number of elements in the mbuf chain
4129 */
4130 if (tx_chain_len_count) {
4131 dlil_count_chain_len(m, &tx_chain_len_stats);
4132 }
4133
2d21ac55
A
4134 /*
4135 * Finally, call the driver.
4136 */
3e170ce0 4137 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
39236c6e
A
4138 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4139 flen += (m_pktlen(m) - (pre + post));
4140 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4141 }
2d21ac55
A
4142 *send_tail = m;
4143 send_tail = &m->m_nextpkt;
6d2010ae 4144 } else {
39236c6e
A
4145 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4146 flen = (m_pktlen(m) - (pre + post));
4147 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4148 } else {
4149 flen = 0;
4150 }
6d2010ae 4151 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
39236c6e 4152 0, 0, 0, 0, 0);
316670eb
A
4153 retval = (*ifp->if_output)(ifp, m);
4154 if (retval == EQFULL || retval == EQSUSPENDED) {
4155 if (adv != NULL && adv->code == FADV_SUCCESS) {
4156 adv->code = (retval == EQFULL ?
4157 FADV_FLOW_CONTROLLED :
4158 FADV_SUSPENDED);
4159 }
4160 retval = 0;
4161 }
39236c6e
A
4162 if (retval == 0 && flen > 0) {
4163 fbytes += flen;
4164 fpkts++;
4165 }
4166 if (retval != 0 && dlil_verbose) {
4167 printf("%s: output error on %s retval = %d\n",
4168 __func__, if_name(ifp),
6d2010ae 4169 retval);
2d21ac55 4170 }
6d2010ae 4171 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
39236c6e 4172 0, 0, 0, 0, 0);
2d21ac55 4173 }
39236c6e 4174 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
2d21ac55
A
4175
4176next:
4177 m = packetlist;
39236c6e 4178 if (m != NULL) {
2d21ac55
A
4179 packetlist = packetlist->m_nextpkt;
4180 m->m_nextpkt = NULL;
4181 }
39236c6e 4182 } while (m != NULL);
d41d1dae 4183
39236c6e 4184 if (send_head != NULL) {
39236c6e
A
4185 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4186 0, 0, 0, 0, 0);
3e170ce0
A
4187 if (ifp->if_eflags & IFEF_SENDLIST) {
4188 retval = (*ifp->if_output)(ifp, send_head);
4189 if (retval == EQFULL || retval == EQSUSPENDED) {
4190 if (adv != NULL) {
4191 adv->code = (retval == EQFULL ?
4192 FADV_FLOW_CONTROLLED :
4193 FADV_SUSPENDED);
4194 }
4195 retval = 0;
4196 }
4197 if (retval == 0 && flen > 0) {
4198 fbytes += flen;
4199 fpkts++;
4200 }
4201 if (retval != 0 && dlil_verbose) {
4202 printf("%s: output error on %s retval = %d\n",
4203 __func__, if_name(ifp), retval);
4204 }
4205 } else {
4206 struct mbuf *send_m;
4207 int enq_cnt = 0;
4208 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
4209 while (send_head != NULL) {
4210 send_m = send_head;
4211 send_head = send_m->m_nextpkt;
4212 send_m->m_nextpkt = NULL;
4213 retval = (*ifp->if_output)(ifp, send_m);
4214 if (retval == EQFULL || retval == EQSUSPENDED) {
4215 if (adv != NULL) {
4216 adv->code = (retval == EQFULL ?
4217 FADV_FLOW_CONTROLLED :
4218 FADV_SUSPENDED);
4219 }
4220 retval = 0;
4221 }
4222 if (retval == 0) {
4223 enq_cnt++;
4224 if (flen > 0)
4225 fpkts++;
4226 }
4227 if (retval != 0 && dlil_verbose) {
4228 printf("%s: output error on %s retval = %d\n",
4229 __func__, if_name(ifp), retval);
4230 }
4231 }
4232 if (enq_cnt > 0) {
4233 fbytes += flen;
4234 ifnet_start(ifp);
316670eb 4235 }
39236c6e
A
4236 }
4237 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 4238 }
6d2010ae 4239
39236c6e 4240 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 4241
91447636 4242cleanup:
39236c6e
A
4243 if (fbytes > 0)
4244 ifp->if_fbytes += fbytes;
4245 if (fpkts > 0)
4246 ifp->if_fpackets += fpkts;
6d2010ae
A
4247 if (proto != NULL)
4248 if_proto_free(proto);
4249 if (packetlist) /* if any packets are left, clean up */
2d21ac55 4250 mbuf_freem_list(packetlist);
91447636
A
4251 if (retval == EJUSTRETURN)
4252 retval = 0;
6d2010ae
A
4253 if (iorefcnt == 1)
4254 ifnet_decr_iorefcnt(ifp);
4255
4256 return (retval);
1c79356b
A
4257}
4258
2d21ac55 4259errno_t
6d2010ae
A
4260ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
4261 void *ioctl_arg)
4262{
4263 struct ifnet_filter *filter;
4264 int retval = EOPNOTSUPP;
4265 int result = 0;
4266
2d21ac55 4267 if (ifp == NULL || ioctl_code == 0)
6d2010ae
A
4268 return (EINVAL);
4269
4270 /* Get an io ref count if the interface is attached */
4271 if (!ifnet_is_attached(ifp, 1))
4272 return (EOPNOTSUPP);
4273
91447636
A
4274 /* Run the interface filters first.
4275 * We want to run all filters before calling the protocol,
4276 * interface family, or interface.
4277 */
6d2010ae
A
4278 lck_mtx_lock_spin(&ifp->if_flt_lock);
4279 /* prevent filter list from changing in case we drop the lock */
4280 if_flt_monitor_busy(ifp);
91447636 4281 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
6d2010ae
A
4282 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
4283 filter->filt_protocol == proto_fam)) {
4284 lck_mtx_unlock(&ifp->if_flt_lock);
4285
4286 result = filter->filt_ioctl(filter->filt_cookie, ifp,
4287 proto_fam, ioctl_code, ioctl_arg);
4288
4289 lck_mtx_lock_spin(&ifp->if_flt_lock);
4290
91447636
A
4291 /* Only update retval if no one has handled the ioctl */
4292 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4293 if (result == ENOTSUP)
4294 result = EOPNOTSUPP;
4295 retval = result;
6d2010ae
A
4296 if (retval != 0 && retval != EOPNOTSUPP) {
4297 /* we're done with the filter list */
4298 if_flt_monitor_unbusy(ifp);
4299 lck_mtx_unlock(&ifp->if_flt_lock);
91447636
A
4300 goto cleanup;
4301 }
4302 }
4303 }
4304 }
6d2010ae
A
4305 /* we're done with the filter list */
4306 if_flt_monitor_unbusy(ifp);
4307 lck_mtx_unlock(&ifp->if_flt_lock);
4308
91447636 4309 /* Allow the protocol to handle the ioctl */
6d2010ae
A
4310 if (proto_fam != 0) {
4311 struct if_proto *proto;
4312
4313 /* callee holds a proto refcnt upon success */
4314 ifnet_lock_shared(ifp);
4315 proto = find_attached_proto(ifp, proto_fam);
4316 ifnet_lock_done(ifp);
4317 if (proto != NULL) {
4318 proto_media_ioctl ioctlp =
4319 (proto->proto_kpi == kProtoKPI_v1 ?
4320 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
91447636 4321 result = EOPNOTSUPP;
6d2010ae
A
4322 if (ioctlp != NULL)
4323 result = ioctlp(ifp, proto_fam, ioctl_code,
4324 ioctl_arg);
4325 if_proto_free(proto);
4326
91447636
A
4327 /* Only update retval if no one has handled the ioctl */
4328 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4329 if (result == ENOTSUP)
4330 result = EOPNOTSUPP;
4331 retval = result;
6d2010ae 4332 if (retval && retval != EOPNOTSUPP)
91447636 4333 goto cleanup;
91447636
A
4334 }
4335 }
4336 }
6d2010ae 4337
91447636 4338 /* retval is either 0 or EOPNOTSUPP */
6d2010ae 4339
91447636
A
4340 /*
4341 * Let the interface handle this ioctl.
4342 * If it returns EOPNOTSUPP, ignore that, we may have
4343 * already handled this in the protocol or family.
4344 */
6d2010ae 4345 if (ifp->if_ioctl)
91447636 4346 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
6d2010ae 4347
91447636
A
4348 /* Only update retval if no one has handled the ioctl */
4349 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4350 if (result == ENOTSUP)
4351 result = EOPNOTSUPP;
4352 retval = result;
4353 if (retval && retval != EOPNOTSUPP) {
4354 goto cleanup;
4355 }
4356 }
1c79356b 4357
6d2010ae 4358cleanup:
91447636
A
4359 if (retval == EJUSTRETURN)
4360 retval = 0;
6d2010ae
A
4361
4362 ifnet_decr_iorefcnt(ifp);
4363
4364 return (retval);
91447636 4365}
1c79356b 4366
91447636 4367__private_extern__ errno_t
6d2010ae 4368dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
91447636
A
4369{
4370 errno_t error = 0;
6d2010ae
A
4371
4372
4373 if (ifp->if_set_bpf_tap) {
4374 /* Get an io reference on the interface if it is attached */
4375 if (!ifnet_is_attached(ifp, 1))
4376 return ENXIO;
91447636 4377 error = ifp->if_set_bpf_tap(ifp, mode, callback);
6d2010ae
A
4378 ifnet_decr_iorefcnt(ifp);
4379 }
4380 return (error);
1c79356b
A
4381}
4382
2d21ac55 4383errno_t
6d2010ae
A
4384dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4385 struct sockaddr *ll_addr, size_t ll_len)
1c79356b 4386{
91447636
A
4387 errno_t result = EOPNOTSUPP;
4388 struct if_proto *proto;
4389 const struct sockaddr *verify;
2d21ac55 4390 proto_media_resolve_multi resolvep;
6d2010ae
A
4391
4392 if (!ifnet_is_attached(ifp, 1))
4393 return result;
4394
91447636 4395 bzero(ll_addr, ll_len);
6d2010ae
A
4396
4397 /* Call the protocol first; callee holds a proto refcnt upon success */
4398 ifnet_lock_shared(ifp);
91447636 4399 proto = find_attached_proto(ifp, proto_addr->sa_family);
6d2010ae 4400 ifnet_lock_done(ifp);
2d21ac55 4401 if (proto != NULL) {
6d2010ae
A
4402 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4403 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
2d21ac55 4404 if (resolvep != NULL)
6d2010ae 4405 result = resolvep(ifp, proto_addr,
316670eb 4406 (struct sockaddr_dl*)(void *)ll_addr, ll_len);
6d2010ae 4407 if_proto_free(proto);
91447636 4408 }
6d2010ae 4409
91447636
A
4410 /* Let the interface verify the multicast address */
4411 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4412 if (result == 0)
4413 verify = ll_addr;
4414 else
4415 verify = proto_addr;
4416 result = ifp->if_check_multi(ifp, verify);
4417 }
6d2010ae
A
4418
4419 ifnet_decr_iorefcnt(ifp);
4420 return (result);
91447636 4421}
1c79356b 4422
91447636 4423__private_extern__ errno_t
6d2010ae
A
4424dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4425 const struct sockaddr_dl* sender_hw, const struct sockaddr* sender_proto,
4426 const struct sockaddr_dl* target_hw, const struct sockaddr* target_proto)
91447636
A
4427{
4428 struct if_proto *proto;
4429 errno_t result = 0;
6d2010ae
A
4430
4431 /* callee holds a proto refcnt upon success */
4432 ifnet_lock_shared(ifp);
91447636 4433 proto = find_attached_proto(ifp, target_proto->sa_family);
6d2010ae 4434 ifnet_lock_done(ifp);
2d21ac55 4435 if (proto == NULL) {
91447636 4436 result = ENOTSUP;
6d2010ae 4437 } else {
2d21ac55 4438 proto_media_send_arp arpp;
6d2010ae
A
4439 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4440 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
39236c6e 4441 if (arpp == NULL) {
2d21ac55 4442 result = ENOTSUP;
39236c6e
A
4443 } else {
4444 switch (arpop) {
4445 case ARPOP_REQUEST:
4446 arpstat.txrequests++;
4447 if (target_hw != NULL)
4448 arpstat.txurequests++;
4449 break;
4450 case ARPOP_REPLY:
4451 arpstat.txreplies++;
4452 break;
4453 }
6d2010ae
A
4454 result = arpp(ifp, arpop, sender_hw, sender_proto,
4455 target_hw, target_proto);
39236c6e 4456 }
6d2010ae 4457 if_proto_free(proto);
91447636 4458 }
6d2010ae
A
4459
4460 return (result);
91447636 4461}
1c79356b 4462
39236c6e
A
4463struct net_thread_marks { };
4464static const struct net_thread_marks net_thread_marks_base = { };
4465
4466__private_extern__ const net_thread_marks_t net_thread_marks_none =
4467 &net_thread_marks_base;
4468
4469__private_extern__ net_thread_marks_t
4470net_thread_marks_push(u_int32_t push)
316670eb 4471{
39236c6e
A
4472 static const char *const base = (const void*)&net_thread_marks_base;
4473 u_int32_t pop = 0;
4474
4475 if (push != 0) {
4476 struct uthread *uth = get_bsdthread_info(current_thread());
4477
4478 pop = push & ~uth->uu_network_marks;
4479 if (pop != 0)
4480 uth->uu_network_marks |= pop;
4481 }
4482
4483 return ((net_thread_marks_t)&base[pop]);
316670eb
A
4484}
4485
39236c6e
A
4486__private_extern__ net_thread_marks_t
4487net_thread_unmarks_push(u_int32_t unpush)
316670eb 4488{
39236c6e
A
4489 static const char *const base = (const void*)&net_thread_marks_base;
4490 u_int32_t unpop = 0;
4491
4492 if (unpush != 0) {
4493 struct uthread *uth = get_bsdthread_info(current_thread());
316670eb 4494
39236c6e
A
4495 unpop = unpush & uth->uu_network_marks;
4496 if (unpop != 0)
4497 uth->uu_network_marks &= ~unpop;
4498 }
4499
4500 return ((net_thread_marks_t)&base[unpop]);
316670eb
A
4501}
4502
4503__private_extern__ void
39236c6e 4504net_thread_marks_pop(net_thread_marks_t popx)
316670eb 4505{
39236c6e 4506 static const char *const base = (const void*)&net_thread_marks_base;
3e170ce0 4507 const ptrdiff_t pop = (const char *)popx - (const char *)base;
316670eb 4508
39236c6e
A
4509 if (pop != 0) {
4510 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4511 struct uthread *uth = get_bsdthread_info(current_thread());
4512
4513 VERIFY((pop & ones) == pop);
4514 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
4515 uth->uu_network_marks &= ~pop;
4516 }
4517}
4518
4519__private_extern__ void
4520net_thread_unmarks_pop(net_thread_marks_t unpopx)
4521{
4522 static const char *const base = (const void*)&net_thread_marks_base;
3e170ce0 4523 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
39236c6e
A
4524
4525 if (unpop != 0) {
4526 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4527 struct uthread *uth = get_bsdthread_info(current_thread());
4528
4529 VERIFY((unpop & ones) == unpop);
4530 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
4531 uth->uu_network_marks |= unpop;
4532 }
4533}
4534
4535__private_extern__ u_int32_t
4536net_thread_is_marked(u_int32_t check)
4537{
4538 if (check != 0) {
4539 struct uthread *uth = get_bsdthread_info(current_thread());
4540 return (uth->uu_network_marks & check);
4541 }
4542 else
4543 return (0);
4544}
4545
4546__private_extern__ u_int32_t
4547net_thread_is_unmarked(u_int32_t check)
4548{
4549 if (check != 0) {
4550 struct uthread *uth = get_bsdthread_info(current_thread());
4551 return (~uth->uu_network_marks & check);
4552 }
4553 else
4554 return (0);
316670eb
A
4555}
4556
2d21ac55
A
4557static __inline__ int
4558_is_announcement(const struct sockaddr_in * sender_sin,
6d2010ae 4559 const struct sockaddr_in * target_sin)
2d21ac55
A
4560{
4561 if (sender_sin == NULL) {
6d2010ae 4562 return (FALSE);
2d21ac55
A
4563 }
4564 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
4565}
4566
91447636 4567__private_extern__ errno_t
6d2010ae
A
4568dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw,
4569 const struct sockaddr* sender_proto, const struct sockaddr_dl* target_hw,
316670eb 4570 const struct sockaddr* target_proto0, u_int32_t rtflags)
91447636
A
4571{
4572 errno_t result = 0;
2d21ac55
A
4573 const struct sockaddr_in * sender_sin;
4574 const struct sockaddr_in * target_sin;
316670eb
A
4575 struct sockaddr_inarp target_proto_sinarp;
4576 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
6d2010ae
A
4577
4578 if (target_proto == NULL || (sender_proto != NULL &&
4579 sender_proto->sa_family != target_proto->sa_family))
4580 return (EINVAL);
4581
316670eb
A
4582 /*
4583 * If the target is a (default) router, provide that
4584 * information to the send_arp callback routine.
4585 */
4586 if (rtflags & RTF_ROUTER) {
4587 bcopy(target_proto, &target_proto_sinarp,
4588 sizeof (struct sockaddr_in));
4589 target_proto_sinarp.sin_other |= SIN_ROUTER;
4590 target_proto = (struct sockaddr *)&target_proto_sinarp;
4591 }
4592
91447636
A
4593 /*
4594 * If this is an ARP request and the target IP is IPv4LL,
2d21ac55
A
4595 * send the request on all interfaces. The exception is
4596 * an announcement, which must only appear on the specific
4597 * interface.
91447636 4598 */
316670eb
A
4599 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
4600 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
6d2010ae
A
4601 if (target_proto->sa_family == AF_INET &&
4602 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4603 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4604 !_is_announcement(target_sin, sender_sin)) {
91447636
A
4605 ifnet_t *ifp_list;
4606 u_int32_t count;
4607 u_int32_t ifp_on;
6d2010ae 4608
91447636
A
4609 result = ENOTSUP;
4610
4611 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4612 for (ifp_on = 0; ifp_on < count; ifp_on++) {
6d2010ae
A
4613 errno_t new_result;
4614 ifaddr_t source_hw = NULL;
4615 ifaddr_t source_ip = NULL;
4616 struct sockaddr_in source_ip_copy;
4617 struct ifnet *cur_ifp = ifp_list[ifp_on];
4618
91447636 4619 /*
6d2010ae
A
4620 * Only arp on interfaces marked for IPv4LL
4621 * ARPing. This may mean that we don't ARP on
4622 * the interface the subnet route points to.
91447636 4623 */
6d2010ae 4624 if (!(cur_ifp->if_eflags & IFEF_ARPLL))
91447636 4625 continue;
b0d623f7 4626
91447636 4627 /* Find the source IP address */
6d2010ae
A
4628 ifnet_lock_shared(cur_ifp);
4629 source_hw = cur_ifp->if_lladdr;
4630 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4631 ifa_link) {
4632 IFA_LOCK(source_ip);
4633 if (source_ip->ifa_addr != NULL &&
4634 source_ip->ifa_addr->sa_family ==
4635 AF_INET) {
4636 /* Copy the source IP address */
4637 source_ip_copy =
4638 *(struct sockaddr_in *)
316670eb 4639 (void *)source_ip->ifa_addr;
6d2010ae 4640 IFA_UNLOCK(source_ip);
91447636
A
4641 break;
4642 }
6d2010ae 4643 IFA_UNLOCK(source_ip);
91447636 4644 }
6d2010ae 4645
91447636
A
4646 /* No IP Source, don't arp */
4647 if (source_ip == NULL) {
6d2010ae 4648 ifnet_lock_done(cur_ifp);
91447636
A
4649 continue;
4650 }
6d2010ae
A
4651
4652 IFA_ADDREF(source_hw);
4653 ifnet_lock_done(cur_ifp);
4654
91447636 4655 /* Send the ARP */
6d2010ae 4656 new_result = dlil_send_arp_internal(cur_ifp,
316670eb
A
4657 arpop, (struct sockaddr_dl *)(void *)
4658 source_hw->ifa_addr,
6d2010ae
A
4659 (struct sockaddr *)&source_ip_copy, NULL,
4660 target_proto);
b0d623f7 4661
6d2010ae 4662 IFA_REMREF(source_hw);
91447636
A
4663 if (result == ENOTSUP) {
4664 result = new_result;
4665 }
4666 }
6d2010ae 4667 ifnet_list_free(ifp_list);
91447636 4668 }
6d2010ae
A
4669 } else {
4670 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4671 sender_proto, target_hw, target_proto);
91447636 4672 }
6d2010ae
A
4673
4674 return (result);
91447636 4675}
1c79356b 4676
6d2010ae
A
4677/*
4678 * Caller must hold ifnet head lock.
4679 */
4680static int
4681ifnet_lookup(struct ifnet *ifp)
91447636 4682{
6d2010ae
A
4683 struct ifnet *_ifp;
4684
4685 lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
4686 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
4687 if (_ifp == ifp)
91447636 4688 break;
6d2010ae
A
4689 }
4690 return (_ifp != NULL);
91447636 4691}
6d2010ae
A
4692/*
4693 * Caller has to pass a non-zero refio argument to get a
4694 * IO reference count. This will prevent ifnet_detach from
4695 * being called when there are outstanding io reference counts.
91447636 4696 */
6d2010ae
A
4697int
4698ifnet_is_attached(struct ifnet *ifp, int refio)
4699{
4700 int ret;
4701
4702 lck_mtx_lock_spin(&ifp->if_ref_lock);
4703 if ((ret = ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
4704 IFRF_ATTACHED))) {
4705 if (refio > 0)
4706 ifp->if_refio++;
4707 }
4708 lck_mtx_unlock(&ifp->if_ref_lock);
4709
4710 return (ret);
4711}
4712
4713void
4714ifnet_decr_iorefcnt(struct ifnet *ifp)
4715{
4716 lck_mtx_lock_spin(&ifp->if_ref_lock);
4717 VERIFY(ifp->if_refio > 0);
4718 VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) != 0);
4719 ifp->if_refio--;
4720
4721 /* if there are no more outstanding io references, wakeup the
4722 * ifnet_detach thread if detaching flag is set.
4723 */
4724 if (ifp->if_refio == 0 &&
4725 (ifp->if_refflags & IFRF_DETACHING) != 0) {
6d2010ae 4726 wakeup(&(ifp->if_refio));
91447636 4727 }
6d2010ae
A
4728 lck_mtx_unlock(&ifp->if_ref_lock);
4729}
b0d623f7 4730
6d2010ae
A
4731static void
4732dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
4733{
4734 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
4735 ctrace_t *tr;
4736 u_int32_t idx;
4737 u_int16_t *cnt;
1c79356b 4738
6d2010ae
A
4739 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
4740 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
4741 /* NOTREACHED */
4742 }
4743
4744 if (refhold) {
4745 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
4746 tr = dl_if_dbg->dldbg_if_refhold;
4747 } else {
4748 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
4749 tr = dl_if_dbg->dldbg_if_refrele;
4750 }
4751
4752 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
4753 ctrace_record(&tr[idx]);
91447636 4754}
1c79356b 4755
6d2010ae
A
4756errno_t
4757dlil_if_ref(struct ifnet *ifp)
4758{
4759 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4760
4761 if (dl_if == NULL)
4762 return (EINVAL);
4763
4764 lck_mtx_lock_spin(&dl_if->dl_if_lock);
4765 ++dl_if->dl_if_refcnt;
4766 if (dl_if->dl_if_refcnt == 0) {
4767 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
4768 /* NOTREACHED */
4769 }
4770 if (dl_if->dl_if_trace != NULL)
4771 (*dl_if->dl_if_trace)(dl_if, TRUE);
4772 lck_mtx_unlock(&dl_if->dl_if_lock);
4773
4774 return (0);
91447636 4775}
1c79356b 4776
6d2010ae
A
4777errno_t
4778dlil_if_free(struct ifnet *ifp)
4779{
4780 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4781
4782 if (dl_if == NULL)
4783 return (EINVAL);
4784
4785 lck_mtx_lock_spin(&dl_if->dl_if_lock);
4786 if (dl_if->dl_if_refcnt == 0) {
4787 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
4788 /* NOTREACHED */
4789 }
4790 --dl_if->dl_if_refcnt;
4791 if (dl_if->dl_if_trace != NULL)
4792 (*dl_if->dl_if_trace)(dl_if, FALSE);
4793 lck_mtx_unlock(&dl_if->dl_if_lock);
4794
4795 return (0);
4796}
1c79356b 4797
2d21ac55 4798static errno_t
6d2010ae
A
4799dlil_attach_protocol_internal(struct if_proto *proto,
4800 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count)
91447636 4801{
6d2010ae 4802 struct kev_dl_proto_data ev_pr_data;
91447636
A
4803 struct ifnet *ifp = proto->ifp;
4804 int retval = 0;
b0d623f7 4805 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
6d2010ae
A
4806 struct if_proto *prev_proto;
4807 struct if_proto *_proto;
4808
4809 /* callee holds a proto refcnt upon success */
4810 ifnet_lock_exclusive(ifp);
4811 _proto = find_attached_proto(ifp, proto->protocol_family);
4812 if (_proto != NULL) {
91447636 4813 ifnet_lock_done(ifp);
6d2010ae
A
4814 if_proto_free(_proto);
4815 return (EEXIST);
91447636 4816 }
6d2010ae 4817
91447636
A
4818 /*
4819 * Call family module add_proto routine so it can refine the
4820 * demux descriptors as it wishes.
4821 */
6d2010ae
A
4822 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
4823 demux_count);
91447636 4824 if (retval) {
6d2010ae
A
4825 ifnet_lock_done(ifp);
4826 return (retval);
91447636 4827 }
6d2010ae 4828
91447636
A
4829 /*
4830 * Insert the protocol in the hash
4831 */
6d2010ae
A
4832 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
4833 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
4834 prev_proto = SLIST_NEXT(prev_proto, next_hash);
4835 if (prev_proto)
4836 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
4837 else
4838 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
4839 proto, next_hash);
4840
4841 /* hold a proto refcnt for attach */
4842 if_proto_ref(proto);
1c79356b 4843
91447636 4844 /*
6d2010ae
A
4845 * The reserved field carries the number of protocol still attached
4846 * (subject to change)
91447636 4847 */
91447636
A
4848 ev_pr_data.proto_family = proto->protocol_family;
4849 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
6d2010ae
A
4850 ifnet_lock_done(ifp);
4851
4852 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
4853 (struct net_event_data *)&ev_pr_data,
4854 sizeof (struct kev_dl_proto_data));
4855 return (retval);
91447636 4856}
0b4e3aa0 4857
2d21ac55
A
4858errno_t
4859ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 4860 const struct ifnet_attach_proto_param *proto_details)
91447636
A
4861{
4862 int retval = 0;
4863 struct if_proto *ifproto = NULL;
6d2010ae
A
4864
4865 ifnet_head_lock_shared();
4866 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4867 retval = EINVAL;
4868 goto end;
4869 }
4870 /* Check that the interface is in the global list */
4871 if (!ifnet_lookup(ifp)) {
4872 retval = ENXIO;
4873 goto end;
4874 }
4875
4876 ifproto = zalloc(dlif_proto_zone);
4877 if (ifproto == NULL) {
91447636
A
4878 retval = ENOMEM;
4879 goto end;
4880 }
6d2010ae
A
4881 bzero(ifproto, dlif_proto_size);
4882
4883 /* refcnt held above during lookup */
91447636
A
4884 ifproto->ifp = ifp;
4885 ifproto->protocol_family = protocol;
4886 ifproto->proto_kpi = kProtoKPI_v1;
4887 ifproto->kpi.v1.input = proto_details->input;
4888 ifproto->kpi.v1.pre_output = proto_details->pre_output;
4889 ifproto->kpi.v1.event = proto_details->event;
4890 ifproto->kpi.v1.ioctl = proto_details->ioctl;
4891 ifproto->kpi.v1.detached = proto_details->detached;
4892 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
4893 ifproto->kpi.v1.send_arp = proto_details->send_arp;
6d2010ae 4894
2d21ac55 4895 retval = dlil_attach_protocol_internal(ifproto,
6d2010ae
A
4896 proto_details->demux_list, proto_details->demux_count);
4897
4898 if (dlil_verbose) {
39236c6e
A
4899 printf("%s: attached v1 protocol %d\n", if_name(ifp),
4900 protocol);
6d2010ae
A
4901 }
4902
9bccf70c 4903end:
6d2010ae 4904 if (retval != 0 && retval != EEXIST && ifp != NULL) {
39236c6e
A
4905 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
4906 if_name(ifp), protocol, retval);
6d2010ae
A
4907 }
4908 ifnet_head_done();
4909 if (retval != 0 && ifproto != NULL)
4910 zfree(dlif_proto_zone, ifproto);
4911 return (retval);
1c79356b
A
4912}
4913
2d21ac55
A
4914errno_t
4915ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 4916 const struct ifnet_attach_proto_param_v2 *proto_details)
91447636 4917{
2d21ac55 4918 int retval = 0;
91447636 4919 struct if_proto *ifproto = NULL;
6d2010ae
A
4920
4921 ifnet_head_lock_shared();
4922 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4923 retval = EINVAL;
4924 goto end;
4925 }
4926 /* Check that the interface is in the global list */
4927 if (!ifnet_lookup(ifp)) {
4928 retval = ENXIO;
4929 goto end;
4930 }
4931
4932 ifproto = zalloc(dlif_proto_zone);
4933 if (ifproto == NULL) {
91447636
A
4934 retval = ENOMEM;
4935 goto end;
4936 }
2d21ac55 4937 bzero(ifproto, sizeof(*ifproto));
6d2010ae
A
4938
4939 /* refcnt held above during lookup */
2d21ac55
A
4940 ifproto->ifp = ifp;
4941 ifproto->protocol_family = protocol;
4942 ifproto->proto_kpi = kProtoKPI_v2;
4943 ifproto->kpi.v2.input = proto_details->input;
4944 ifproto->kpi.v2.pre_output = proto_details->pre_output;
4945 ifproto->kpi.v2.event = proto_details->event;
4946 ifproto->kpi.v2.ioctl = proto_details->ioctl;
4947 ifproto->kpi.v2.detached = proto_details->detached;
4948 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
4949 ifproto->kpi.v2.send_arp = proto_details->send_arp;
1c79356b 4950
6d2010ae
A
4951 retval = dlil_attach_protocol_internal(ifproto,
4952 proto_details->demux_list, proto_details->demux_count);
1c79356b 4953
6d2010ae 4954 if (dlil_verbose) {
39236c6e
A
4955 printf("%s: attached v2 protocol %d\n", if_name(ifp),
4956 protocol);
91447636 4957 }
6d2010ae
A
4958
4959end:
4960 if (retval != 0 && retval != EEXIST && ifp != NULL) {
39236c6e
A
4961 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
4962 if_name(ifp), protocol, retval);
2d21ac55 4963 }
6d2010ae
A
4964 ifnet_head_done();
4965 if (retval != 0 && ifproto != NULL)
4966 zfree(dlif_proto_zone, ifproto);
4967 return (retval);
91447636 4968}
1c79356b 4969
2d21ac55
A
4970errno_t
4971ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
91447636
A
4972{
4973 struct if_proto *proto = NULL;
4974 int retval = 0;
6d2010ae
A
4975
4976 if (ifp == NULL || proto_family == 0) {
4977 retval = EINVAL;
91447636
A
4978 goto end;
4979 }
6d2010ae
A
4980
4981 ifnet_lock_exclusive(ifp);
4982 /* callee holds a proto refcnt upon success */
91447636 4983 proto = find_attached_proto(ifp, proto_family);
91447636
A
4984 if (proto == NULL) {
4985 retval = ENXIO;
6d2010ae 4986 ifnet_lock_done(ifp);
91447636
A
4987 goto end;
4988 }
6d2010ae
A
4989
4990 /* call family module del_proto */
91447636
A
4991 if (ifp->if_del_proto)
4992 ifp->if_del_proto(ifp, proto->protocol_family);
1c79356b 4993
6d2010ae
A
4994 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
4995 proto, if_proto, next_hash);
4996
4997 if (proto->proto_kpi == kProtoKPI_v1) {
4998 proto->kpi.v1.input = ifproto_media_input_v1;
4999 proto->kpi.v1.pre_output= ifproto_media_preout;
5000 proto->kpi.v1.event = ifproto_media_event;
5001 proto->kpi.v1.ioctl = ifproto_media_ioctl;
5002 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
5003 proto->kpi.v1.send_arp = ifproto_media_send_arp;
5004 } else {
5005 proto->kpi.v2.input = ifproto_media_input_v2;
5006 proto->kpi.v2.pre_output = ifproto_media_preout;
5007 proto->kpi.v2.event = ifproto_media_event;
5008 proto->kpi.v2.ioctl = ifproto_media_ioctl;
5009 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
5010 proto->kpi.v2.send_arp = ifproto_media_send_arp;
5011 }
5012 proto->detached = 1;
5013 ifnet_lock_done(ifp);
5014
5015 if (dlil_verbose) {
39236c6e
A
5016 printf("%s: detached %s protocol %d\n", if_name(ifp),
5017 (proto->proto_kpi == kProtoKPI_v1) ?
6d2010ae
A
5018 "v1" : "v2", proto_family);
5019 }
5020
5021 /* release proto refcnt held during protocol attach */
5022 if_proto_free(proto);
91447636
A
5023
5024 /*
6d2010ae
A
5025 * Release proto refcnt held during lookup; the rest of
5026 * protocol detach steps will happen when the last proto
5027 * reference is released.
91447636 5028 */
6d2010ae
A
5029 if_proto_free(proto);
5030
91447636 5031end:
6d2010ae 5032 return (retval);
91447636 5033}
1c79356b 5034
6d2010ae
A
5035
5036static errno_t
5037ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
5038 struct mbuf *packet, char *header)
91447636 5039{
6d2010ae
A
5040#pragma unused(ifp, protocol, packet, header)
5041 return (ENXIO);
5042}
5043
5044static errno_t
5045ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
5046 struct mbuf *packet)
5047{
5048#pragma unused(ifp, protocol, packet)
5049 return (ENXIO);
5050
5051}
5052
5053static errno_t
5054ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
5055 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
5056 char *link_layer_dest)
5057{
5058#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5059 return (ENXIO);
9bccf70c 5060
91447636 5061}
9bccf70c 5062
91447636 5063static void
6d2010ae
A
5064ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
5065 const struct kev_msg *event)
5066{
5067#pragma unused(ifp, protocol, event)
5068}
5069
5070static errno_t
5071ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
5072 unsigned long command, void *argument)
5073{
5074#pragma unused(ifp, protocol, command, argument)
5075 return (ENXIO);
5076}
5077
5078static errno_t
5079ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
5080 struct sockaddr_dl *out_ll, size_t ll_len)
5081{
5082#pragma unused(ifp, proto_addr, out_ll, ll_len)
5083 return (ENXIO);
5084}
5085
5086static errno_t
5087ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
5088 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5089 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5090{
5091#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5092 return (ENXIO);
91447636 5093}
9bccf70c 5094
91447636
A
5095extern int if_next_index(void);
5096
2d21ac55 5097errno_t
6d2010ae 5098ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
91447636 5099{
91447636 5100 struct ifnet *tmp_if;
6d2010ae
A
5101 struct ifaddr *ifa;
5102 struct if_data_internal if_data_saved;
5103 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
316670eb
A
5104 struct dlil_threading_info *dl_inp;
5105 u_int32_t sflags = 0;
5106 int err;
1c79356b 5107
6d2010ae
A
5108 if (ifp == NULL)
5109 return (EINVAL);
5110
7ddcb079
A
5111 /*
5112 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5113 * prevent the interface from being configured while it is
5114 * embryonic, as ifnet_head_lock is dropped and reacquired
5115 * below prior to marking the ifnet with IFRF_ATTACHED.
5116 */
5117 dlil_if_lock();
6d2010ae 5118 ifnet_head_lock_exclusive();
91447636
A
5119 /* Verify we aren't already on the list */
5120 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
5121 if (tmp_if == ifp) {
5122 ifnet_head_done();
7ddcb079 5123 dlil_if_unlock();
6d2010ae 5124 return (EEXIST);
91447636
A
5125 }
5126 }
0b4e3aa0 5127
6d2010ae
A
5128 lck_mtx_lock_spin(&ifp->if_ref_lock);
5129 if (ifp->if_refflags & IFRF_ATTACHED) {
316670eb 5130 panic_plain("%s: flags mismatch (attached set) ifp=%p",
6d2010ae
A
5131 __func__, ifp);
5132 /* NOTREACHED */
91447636 5133 }
6d2010ae 5134 lck_mtx_unlock(&ifp->if_ref_lock);
1c79356b 5135
6d2010ae 5136 ifnet_lock_exclusive(ifp);
b0d623f7 5137
6d2010ae
A
5138 /* Sanity check */
5139 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5140 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5141
5142 if (ll_addr != NULL) {
5143 if (ifp->if_addrlen == 0) {
5144 ifp->if_addrlen = ll_addr->sdl_alen;
5145 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
5146 ifnet_lock_done(ifp);
5147 ifnet_head_done();
7ddcb079 5148 dlil_if_unlock();
6d2010ae 5149 return (EINVAL);
b0d623f7
A
5150 }
5151 }
5152
91447636 5153 /*
b0d623f7 5154 * Allow interfaces without protocol families to attach
91447636
A
5155 * only if they have the necessary fields filled out.
5156 */
6d2010ae
A
5157 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
5158 DLIL_PRINTF("%s: Attempt to attach interface without "
5159 "family module - %d\n", __func__, ifp->if_family);
5160 ifnet_lock_done(ifp);
5161 ifnet_head_done();
7ddcb079 5162 dlil_if_unlock();
6d2010ae 5163 return (ENODEV);
1c79356b
A
5164 }
5165
6d2010ae
A
5166 /* Allocate protocol hash table */
5167 VERIFY(ifp->if_proto_hash == NULL);
5168 ifp->if_proto_hash = zalloc(dlif_phash_zone);
5169 if (ifp->if_proto_hash == NULL) {
5170 ifnet_lock_done(ifp);
5171 ifnet_head_done();
7ddcb079 5172 dlil_if_unlock();
6d2010ae
A
5173 return (ENOBUFS);
5174 }
5175 bzero(ifp->if_proto_hash, dlif_phash_size);
91447636 5176
6d2010ae
A
5177 lck_mtx_lock_spin(&ifp->if_flt_lock);
5178 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
91447636 5179 TAILQ_INIT(&ifp->if_flt_head);
6d2010ae
A
5180 VERIFY(ifp->if_flt_busy == 0);
5181 VERIFY(ifp->if_flt_waiters == 0);
5182 lck_mtx_unlock(&ifp->if_flt_lock);
5183
5184 VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
5185 TAILQ_INIT(&ifp->if_prefixhead);
5186
5187 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
5188 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
91447636 5189 LIST_INIT(&ifp->if_multiaddrs);
6d2010ae 5190 }
1c79356b 5191
6d2010ae
A
5192 VERIFY(ifp->if_allhostsinm == NULL);
5193 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5194 TAILQ_INIT(&ifp->if_addrhead);
5195
6d2010ae
A
5196 if (ifp->if_index == 0) {
5197 int idx = if_next_index();
5198
5199 if (idx == -1) {
5200 ifp->if_index = 0;
5201 ifnet_lock_done(ifp);
5202 ifnet_head_done();
7ddcb079 5203 dlil_if_unlock();
6d2010ae 5204 return (ENOBUFS);
1c79356b 5205 }
6d2010ae
A
5206 ifp->if_index = idx;
5207 }
5208 /* There should not be anything occupying this slot */
5209 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5210
5211 /* allocate (if needed) and initialize a link address */
5212 VERIFY(!(dl_if->dl_if_flags & DLIF_REUSE) || ifp->if_lladdr != NULL);
5213 ifa = dlil_alloc_lladdr(ifp, ll_addr);
5214 if (ifa == NULL) {
5215 ifnet_lock_done(ifp);
5216 ifnet_head_done();
7ddcb079 5217 dlil_if_unlock();
6d2010ae
A
5218 return (ENOBUFS);
5219 }
5220
5221 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5222 ifnet_addrs[ifp->if_index - 1] = ifa;
5223
5224 /* make this address the first on the list */
5225 IFA_LOCK(ifa);
5226 /* hold a reference for ifnet_addrs[] */
5227 IFA_ADDREF_LOCKED(ifa);
5228 /* if_attach_link_ifa() holds a reference for ifa_link */
5229 if_attach_link_ifa(ifp, ifa);
5230 IFA_UNLOCK(ifa);
5231
2d21ac55 5232#if CONFIG_MACF_NET
6d2010ae 5233 mac_ifnet_label_associate(ifp);
2d21ac55 5234#endif
2d21ac55 5235
6d2010ae
A
5236 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5237 ifindex2ifnet[ifp->if_index] = ifp;
2d21ac55 5238
6d2010ae
A
5239 /* Hold a reference to the underlying dlil_ifnet */
5240 ifnet_reference(ifp);
5241
316670eb
A
5242 /* Clear stats (save and restore other fields that we care) */
5243 if_data_saved = ifp->if_data;
5244 bzero(&ifp->if_data, sizeof (ifp->if_data));
5245 ifp->if_data.ifi_type = if_data_saved.ifi_type;
5246 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5247 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5248 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5249 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5250 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5251 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5252 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5253 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5254 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5255 ifnet_touch_lastchange(ifp);
5256
5257 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
5258 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED);
5259
5260 /* By default, use SFB and enable flow advisory */
5261 sflags = PKTSCHEDF_QALG_SFB;
5262 if (if_flowadv)
5263 sflags |= PKTSCHEDF_QALG_FLOWCTL;
5264
fe8ab488
A
5265 if (if_delaybased_queue)
5266 sflags |= PKTSCHEDF_QALG_DELAYBASED;
5267
316670eb
A
5268 /* Initialize transmit queue(s) */
5269 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
5270 if (err != 0) {
5271 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5272 "err=%d", __func__, ifp, err);
5273 /* NOTREACHED */
5274 }
5275
5276 /* Sanity checks on the input thread storage */
5277 dl_inp = &dl_if->dl_if_inpstorage;
5278 bzero(&dl_inp->stats, sizeof (dl_inp->stats));
5279 VERIFY(dl_inp->input_waiting == 0);
5280 VERIFY(dl_inp->wtot == 0);
5281 VERIFY(dl_inp->ifp == NULL);
5282 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
5283 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
5284 VERIFY(!dl_inp->net_affinity);
5285 VERIFY(ifp->if_inp == NULL);
5286 VERIFY(dl_inp->input_thr == THREAD_NULL);
5287 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
5288 VERIFY(dl_inp->poll_thr == THREAD_NULL);
5289 VERIFY(dl_inp->tag == 0);
5290 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
5291 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
5292 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
5293 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
5294#if IFNET_INPUT_SANITY_CHK
5295 VERIFY(dl_inp->input_mbuf_cnt == 0);
5296#endif /* IFNET_INPUT_SANITY_CHK */
5297
5298 /*
5299 * A specific DLIL input thread is created per Ethernet/cellular
5300 * interface or for an interface which supports opportunistic
5301 * input polling. Pseudo interfaces or other types of interfaces
5302 * use the main input thread instead.
5303 */
5304 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
5305 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
5306 ifp->if_inp = dl_inp;
5307 err = dlil_create_input_thread(ifp, ifp->if_inp);
5308 if (err != 0) {
5309 panic_plain("%s: ifp=%p couldn't get an input thread; "
5310 "err=%d", __func__, ifp, err);
5311 /* NOTREACHED */
5312 }
5313 }
5314
6d2010ae 5315 /*
39236c6e
A
5316 * If the driver supports the new transmit model, calculate flow hash
5317 * and create a workloop starter thread to invoke the if_start callback
5318 * where the packets may be dequeued and transmitted.
6d2010ae 5319 */
316670eb 5320 if (ifp->if_eflags & IFEF_TXSTART) {
39236c6e
A
5321 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5322 VERIFY(ifp->if_flowhash != 0);
5323
316670eb
A
5324 VERIFY(ifp->if_start != NULL);
5325 VERIFY(ifp->if_start_thread == THREAD_NULL);
5326
5327 ifnet_set_start_cycle(ifp, NULL);
5328 ifp->if_start_active = 0;
5329 ifp->if_start_req = 0;
39236c6e 5330 ifp->if_start_flags = 0;
316670eb
A
5331 if ((err = kernel_thread_start(ifnet_start_thread_fn, ifp,
5332 &ifp->if_start_thread)) != KERN_SUCCESS) {
5333 panic_plain("%s: ifp=%p couldn't get a start thread; "
5334 "err=%d", __func__, ifp, err);
6d2010ae
A
5335 /* NOTREACHED */
5336 }
316670eb
A
5337 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
5338 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
39236c6e
A
5339 } else {
5340 ifp->if_flowhash = 0;
316670eb
A
5341 }
5342
5343 /*
5344 * If the driver supports the new receive model, create a poller
5345 * thread to invoke if_input_poll callback where the packets may
5346 * be dequeued from the driver and processed for reception.
5347 */
5348 if (ifp->if_eflags & IFEF_RXPOLL) {
5349 VERIFY(ifp->if_input_poll != NULL);
5350 VERIFY(ifp->if_input_ctl != NULL);
5351 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5352
5353 ifnet_set_poll_cycle(ifp, NULL);
5354 ifp->if_poll_update = 0;
5355 ifp->if_poll_active = 0;
5356 ifp->if_poll_req = 0;
5357 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
5358 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5359 panic_plain("%s: ifp=%p couldn't get a poll thread; "
6d2010ae
A
5360 "err=%d", __func__, ifp, err);
5361 /* NOTREACHED */
5362 }
316670eb
A
5363 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
5364 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
91447636 5365 }
6d2010ae 5366
316670eb
A
5367 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5368 VERIFY(ifp->if_desc.ifd_len == 0);
5369 VERIFY(ifp->if_desc.ifd_desc != NULL);
6d2010ae
A
5370
5371 /* Record attach PC stacktrace */
5372 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5373
5374 ifp->if_updatemcasts = 0;
5375 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5376 struct ifmultiaddr *ifma;
5377 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5378 IFMA_LOCK(ifma);
5379 if (ifma->ifma_addr->sa_family == AF_LINK ||
5380 ifma->ifma_addr->sa_family == AF_UNSPEC)
5381 ifp->if_updatemcasts++;
5382 IFMA_UNLOCK(ifma);
5383 }
5384
39236c6e
A
5385 printf("%s: attached with %d suspended link-layer multicast "
5386 "membership(s)\n", if_name(ifp),
6d2010ae
A
5387 ifp->if_updatemcasts);
5388 }
5389
39236c6e
A
5390 /* Clear logging parameters */
5391 bzero(&ifp->if_log, sizeof (ifp->if_log));
5392 ifp->if_fg_sendts = 0;
5393
5394 VERIFY(ifp->if_delegated.ifp == NULL);
5395 VERIFY(ifp->if_delegated.type == 0);
5396 VERIFY(ifp->if_delegated.family == 0);
5397 VERIFY(ifp->if_delegated.subfamily == 0);
fe8ab488 5398 VERIFY(ifp->if_delegated.expensive == 0);
39236c6e 5399
3e170ce0
A
5400 bzero(&ifp->if_agentids, sizeof(ifp->if_agentids));
5401
5402 /* Reset interface state */
5403 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5404 ifp->if_interface_state.valid_bitmask |=
5405 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5406 ifp->if_interface_state.interface_availability =
5407 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5408
5409 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5410 if (ifp == lo_ifp) {
5411 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5412 ifp->if_interface_state.valid_bitmask |=
5413 IF_INTERFACE_STATE_LQM_STATE_VALID;
5414 } else {
5415 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5416 }
0c530ab8 5417 ifnet_lock_done(ifp);
b0d623f7 5418 ifnet_head_done();
6d2010ae
A
5419
5420 lck_mtx_lock(&ifp->if_cached_route_lock);
5421 /* Enable forwarding cached route */
5422 ifp->if_fwd_cacheok = 1;
5423 /* Clean up any existing cached routes */
39236c6e 5424 ROUTE_RELEASE(&ifp->if_fwd_route);
6d2010ae 5425 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
39236c6e 5426 ROUTE_RELEASE(&ifp->if_src_route);
6d2010ae 5427 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
39236c6e 5428 ROUTE_RELEASE(&ifp->if_src_route6);
6d2010ae
A
5429 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
5430 lck_mtx_unlock(&ifp->if_cached_route_lock);
5431
5432 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5433
b0d623f7 5434 /*
6d2010ae
A
5435 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5436 * and trees; do this before the ifnet is marked as attached.
5437 * The ifnet keeps the reference to the info structures even after
5438 * the ifnet is detached, since the network-layer records still
5439 * refer to the info structures even after that. This also
5440 * makes it possible for them to still function after the ifnet
5441 * is recycled or reattached.
b0d623f7 5442 */
6d2010ae
A
5443#if INET
5444 if (IGMP_IFINFO(ifp) == NULL) {
5445 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
5446 VERIFY(IGMP_IFINFO(ifp) != NULL);
5447 } else {
5448 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5449 igmp_domifreattach(IGMP_IFINFO(ifp));
5450 }
5451#endif /* INET */
5452#if INET6
5453 if (MLD_IFINFO(ifp) == NULL) {
5454 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
5455 VERIFY(MLD_IFINFO(ifp) != NULL);
5456 } else {
5457 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5458 mld_domifreattach(MLD_IFINFO(ifp));
5459 }
5460#endif /* INET6 */
b0d623f7 5461
39236c6e
A
5462 VERIFY(ifp->if_data_threshold == 0);
5463
6d2010ae
A
5464 /*
5465 * Finally, mark this ifnet as attached.
5466 */
5467 lck_mtx_lock(rnh_lock);
5468 ifnet_lock_exclusive(ifp);
5469 lck_mtx_lock_spin(&ifp->if_ref_lock);
5470 ifp->if_refflags = IFRF_ATTACHED;
5471 lck_mtx_unlock(&ifp->if_ref_lock);
d1ecb069 5472 if (net_rtref) {
6d2010ae
A
5473 /* boot-args override; enable idle notification */
5474 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
d1ecb069 5475 IFRF_IDLE_NOTIFY);
6d2010ae
A
5476 } else {
5477 /* apply previous request(s) to set the idle flags, if any */
5478 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5479 ifp->if_idle_new_flags_mask);
5480
d1ecb069 5481 }
6d2010ae
A
5482 ifnet_lock_done(ifp);
5483 lck_mtx_unlock(rnh_lock);
7ddcb079 5484 dlil_if_unlock();
6d2010ae
A
5485
5486#if PF
5487 /*
5488 * Attach packet filter to this interface, if enabled.
5489 */
5490 pf_ifnet_hook(ifp, 1);
5491#endif /* PF */
d1ecb069 5492
2d21ac55 5493 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
1c79356b 5494
6d2010ae 5495 if (dlil_verbose) {
39236c6e 5496 printf("%s: attached%s\n", if_name(ifp),
6d2010ae
A
5497 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5498 }
5499
5500 return (0);
5501}
5502
5503/*
5504 * Prepare the storage for the first/permanent link address, which must
5505 * must have the same lifetime as the ifnet itself. Although the link
5506 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5507 * its location in memory must never change as it may still be referred
5508 * to by some parts of the system afterwards (unfortunate implementation
5509 * artifacts inherited from BSD.)
5510 *
5511 * Caller must hold ifnet lock as writer.
5512 */
5513static struct ifaddr *
5514dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
5515{
5516 struct ifaddr *ifa, *oifa;
5517 struct sockaddr_dl *asdl, *msdl;
5518 char workbuf[IFNAMSIZ*2];
5519 int namelen, masklen, socksize;
5520 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5521
5522 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
5523 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
5524
39236c6e
A
5525 namelen = snprintf(workbuf, sizeof (workbuf), "%s",
5526 if_name(ifp));
6d2010ae
A
5527 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
5528 socksize = masklen + ifp->if_addrlen;
5529#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
5530 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
5531 socksize = sizeof(struct sockaddr_dl);
5532 socksize = ROUNDUP(socksize);
5533#undef ROUNDUP
5534
5535 ifa = ifp->if_lladdr;
5536 if (socksize > DLIL_SDLMAXLEN ||
5537 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
5538 /*
5539 * Rare, but in the event that the link address requires
5540 * more storage space than DLIL_SDLMAXLEN, allocate the
5541 * largest possible storages for address and mask, such
5542 * that we can reuse the same space when if_addrlen grows.
5543 * This same space will be used when if_addrlen shrinks.
5544 */
5545 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
5546 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
5547 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
5548 if (ifa == NULL)
5549 return (NULL);
5550 ifa_lock_init(ifa);
5551 /* Don't set IFD_ALLOC, as this is permanent */
5552 ifa->ifa_debug = IFD_LINK;
5553 }
5554 IFA_LOCK(ifa);
5555 /* address and mask sockaddr_dl locations */
5556 asdl = (struct sockaddr_dl *)(ifa + 1);
5557 bzero(asdl, SOCK_MAXADDRLEN);
316670eb
A
5558 msdl = (struct sockaddr_dl *)(void *)
5559 ((char *)asdl + SOCK_MAXADDRLEN);
6d2010ae
A
5560 bzero(msdl, SOCK_MAXADDRLEN);
5561 } else {
5562 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
5563 /*
5564 * Use the storage areas for address and mask within the
5565 * dlil_ifnet structure. This is the most common case.
5566 */
5567 if (ifa == NULL) {
5568 ifa = &dl_if->dl_if_lladdr.ifa;
5569 ifa_lock_init(ifa);
5570 /* Don't set IFD_ALLOC, as this is permanent */
5571 ifa->ifa_debug = IFD_LINK;
5572 }
5573 IFA_LOCK(ifa);
5574 /* address and mask sockaddr_dl locations */
316670eb 5575 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
6d2010ae 5576 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
316670eb 5577 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
6d2010ae
A
5578 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
5579 }
5580
5581 /* hold a permanent reference for the ifnet itself */
5582 IFA_ADDREF_LOCKED(ifa);
5583 oifa = ifp->if_lladdr;
5584 ifp->if_lladdr = ifa;
5585
5586 VERIFY(ifa->ifa_debug == IFD_LINK);
5587 ifa->ifa_ifp = ifp;
5588 ifa->ifa_rtrequest = link_rtrequest;
5589 ifa->ifa_addr = (struct sockaddr *)asdl;
5590 asdl->sdl_len = socksize;
5591 asdl->sdl_family = AF_LINK;
5592 bcopy(workbuf, asdl->sdl_data, namelen);
5593 asdl->sdl_nlen = namelen;
5594 asdl->sdl_index = ifp->if_index;
5595 asdl->sdl_type = ifp->if_type;
5596 if (ll_addr != NULL) {
5597 asdl->sdl_alen = ll_addr->sdl_alen;
5598 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
5599 } else {
5600 asdl->sdl_alen = 0;
5601 }
5602 ifa->ifa_netmask = (struct sockaddr*)msdl;
5603 msdl->sdl_len = masklen;
5604 while (namelen != 0)
5605 msdl->sdl_data[--namelen] = 0xff;
5606 IFA_UNLOCK(ifa);
5607
5608 if (oifa != NULL)
5609 IFA_REMREF(oifa);
5610
5611 return (ifa);
5612}
5613
5614static void
5615if_purgeaddrs(struct ifnet *ifp)
5616{
5617#if INET
5618 in_purgeaddrs(ifp);
5619#endif /* INET */
5620#if INET6
5621 in6_purgeaddrs(ifp);
5622#endif /* INET6 */
1c79356b
A
5623}
5624
2d21ac55 5625errno_t
6d2010ae 5626ifnet_detach(ifnet_t ifp)
1c79356b 5627{
39236c6e
A
5628 struct ifnet *delegated_ifp;
5629
6d2010ae
A
5630 if (ifp == NULL)
5631 return (EINVAL);
5632
6d2010ae 5633 lck_mtx_lock(rnh_lock);
316670eb 5634 ifnet_head_lock_exclusive();
91447636 5635 ifnet_lock_exclusive(ifp);
6d2010ae
A
5636
5637 /*
5638 * Check to see if this interface has previously triggered
5639 * aggressive protocol draining; if so, decrement the global
5640 * refcnt and clear PR_AGGDRAIN on the route domain if
5641 * there are no more of such an interface around.
5642 */
5643 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
5644
5645 lck_mtx_lock_spin(&ifp->if_ref_lock);
5646 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
5647 lck_mtx_unlock(&ifp->if_ref_lock);
5648 ifnet_lock_done(ifp);
6d2010ae 5649 ifnet_head_done();
13f56ec4 5650 lck_mtx_unlock(rnh_lock);
6d2010ae
A
5651 return (EINVAL);
5652 } else if (ifp->if_refflags & IFRF_DETACHING) {
91447636 5653 /* Interface has already been detached */
6d2010ae 5654 lck_mtx_unlock(&ifp->if_ref_lock);
91447636 5655 ifnet_lock_done(ifp);
6d2010ae 5656 ifnet_head_done();
13f56ec4 5657 lck_mtx_unlock(rnh_lock);
6d2010ae 5658 return (ENXIO);
55e303ae 5659 }
6d2010ae
A
5660 /* Indicate this interface is being detached */
5661 ifp->if_refflags &= ~IFRF_ATTACHED;
5662 ifp->if_refflags |= IFRF_DETACHING;
5663 lck_mtx_unlock(&ifp->if_ref_lock);
5664
5665 if (dlil_verbose)
39236c6e 5666 printf("%s: detaching\n", if_name(ifp));
6d2010ae 5667
91447636 5668 /*
6d2010ae
A
5669 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5670 * no longer be visible during lookups from this point.
91447636 5671 */
6d2010ae
A
5672 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
5673 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
5674 ifp->if_link.tqe_next = NULL;
5675 ifp->if_link.tqe_prev = NULL;
5676 ifindex2ifnet[ifp->if_index] = NULL;
5677
3e170ce0
A
5678 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
5679 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
5680
6d2010ae
A
5681 /* Record detach PC stacktrace */
5682 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
5683
39236c6e
A
5684 /* Clear logging parameters */
5685 bzero(&ifp->if_log, sizeof (ifp->if_log));
5686
5687 /* Clear delegated interface info (reference released below) */
5688 delegated_ifp = ifp->if_delegated.ifp;
5689 bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
5690
3e170ce0
A
5691 /* Reset interface state */
5692 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5693
91447636 5694 ifnet_lock_done(ifp);
6d2010ae 5695 ifnet_head_done();
13f56ec4 5696 lck_mtx_unlock(rnh_lock);
6d2010ae 5697
39236c6e
A
5698 /* Release reference held on the delegated interface */
5699 if (delegated_ifp != NULL)
5700 ifnet_release(delegated_ifp);
5701
316670eb
A
5702 /* Reset Link Quality Metric (unless loopback [lo0]) */
5703 if (ifp != lo_ifp)
3e170ce0 5704 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
316670eb
A
5705
5706 /* Reset TCP local statistics */
5707 if (ifp->if_tcp_stat != NULL)
5708 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
5709
5710 /* Reset UDP local statistics */
5711 if (ifp->if_udp_stat != NULL)
5712 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
5713
3e170ce0
A
5714 /* Release memory held for interface link status report */
5715 if (ifp->if_link_status != NULL) {
5716 FREE(ifp->if_link_status, M_TEMP);
5717 ifp->if_link_status = NULL;
5718 }
5719
2d21ac55
A
5720 /* Let BPF know we're detaching */
5721 bpfdetach(ifp);
6d2010ae
A
5722
5723 /* Mark the interface as DOWN */
5724 if_down(ifp);
5725
5726 /* Disable forwarding cached route */
5727 lck_mtx_lock(&ifp->if_cached_route_lock);
5728 ifp->if_fwd_cacheok = 0;
5729 lck_mtx_unlock(&ifp->if_cached_route_lock);
5730
39236c6e 5731 ifp->if_data_threshold = 0;
d1ecb069 5732 /*
6d2010ae
A
5733 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5734 * references to the info structures and leave them attached to
5735 * this ifnet.
d1ecb069 5736 */
6d2010ae
A
5737#if INET
5738 igmp_domifdetach(ifp);
5739#endif /* INET */
5740#if INET6
5741 mld_domifdetach(ifp);
5742#endif /* INET6 */
5743
5744 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
5745
5746 /* Let worker thread take care of the rest, to avoid reentrancy */
7ddcb079 5747 dlil_if_lock();
6d2010ae 5748 ifnet_detaching_enqueue(ifp);
7ddcb079 5749 dlil_if_unlock();
6d2010ae
A
5750
5751 return (0);
5752}
5753
5754static void
5755ifnet_detaching_enqueue(struct ifnet *ifp)
5756{
7ddcb079 5757 dlil_if_lock_assert();
6d2010ae
A
5758
5759 ++ifnet_detaching_cnt;
5760 VERIFY(ifnet_detaching_cnt != 0);
5761 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
5762 wakeup((caddr_t)&ifnet_delayed_run);
5763}
5764
5765static struct ifnet *
5766ifnet_detaching_dequeue(void)
5767{
5768 struct ifnet *ifp;
5769
7ddcb079 5770 dlil_if_lock_assert();
6d2010ae
A
5771
5772 ifp = TAILQ_FIRST(&ifnet_detaching_head);
5773 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
5774 if (ifp != NULL) {
5775 VERIFY(ifnet_detaching_cnt != 0);
5776 --ifnet_detaching_cnt;
5777 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
5778 ifp->if_detaching_link.tqe_next = NULL;
5779 ifp->if_detaching_link.tqe_prev = NULL;
5780 }
5781 return (ifp);
5782}
5783
316670eb
A
5784static int
5785ifnet_detacher_thread_cont(int err)
6d2010ae 5786{
316670eb 5787#pragma unused(err)
6d2010ae
A
5788 struct ifnet *ifp;
5789
5790 for (;;) {
316670eb 5791 dlil_if_lock_assert();
6d2010ae 5792 while (ifnet_detaching_cnt == 0) {
316670eb
A
5793 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
5794 (PZERO - 1), "ifnet_detacher_cont", 0,
5795 ifnet_detacher_thread_cont);
5796 /* NOTREACHED */
6d2010ae
A
5797 }
5798
5799 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
5800
5801 /* Take care of detaching ifnet */
5802 ifp = ifnet_detaching_dequeue();
316670eb
A
5803 if (ifp != NULL) {
5804 dlil_if_unlock();
6d2010ae 5805 ifnet_detach_final(ifp);
316670eb
A
5806 dlil_if_lock();
5807 }
55e303ae 5808 }
316670eb
A
5809 /* NOTREACHED */
5810 return (0);
5811}
5812
5813static void
5814ifnet_detacher_thread_func(void *v, wait_result_t w)
5815{
5816#pragma unused(v, w)
5817 dlil_if_lock();
5818 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
5819 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
5820 /*
5821 * msleep0() shouldn't have returned as PCATCH was not set;
5822 * therefore assert in this case.
5823 */
5824 dlil_if_unlock();
5825 VERIFY(0);
6d2010ae 5826}
b0d623f7 5827
6d2010ae
A
5828static void
5829ifnet_detach_final(struct ifnet *ifp)
5830{
5831 struct ifnet_filter *filter, *filter_next;
5832 struct ifnet_filter_head fhead;
316670eb 5833 struct dlil_threading_info *inp;
6d2010ae
A
5834 struct ifaddr *ifa;
5835 ifnet_detached_func if_free;
5836 int i;
5837
5838 lck_mtx_lock(&ifp->if_ref_lock);
5839 if (!(ifp->if_refflags & IFRF_DETACHING)) {
5840 panic("%s: flags mismatch (detaching not set) ifp=%p",
5841 __func__, ifp);
5842 /* NOTREACHED */
5843 }
5844
316670eb
A
5845 /*
5846 * Wait until the existing IO references get released
5847 * before we proceed with ifnet_detach. This is not a
5848 * common case, so block without using a continuation.
b0d623f7 5849 */
6d2010ae 5850 while (ifp->if_refio > 0) {
39236c6e
A
5851 printf("%s: Waiting for IO references on %s interface "
5852 "to be released\n", __func__, if_name(ifp));
6d2010ae
A
5853 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
5854 (PZERO - 1), "ifnet_ioref_wait", NULL);
5855 }
5856 lck_mtx_unlock(&ifp->if_ref_lock);
5857
fe8ab488
A
5858 /* Drain and destroy send queue */
5859 ifclassq_teardown(ifp);
5860
6d2010ae
A
5861 /* Detach interface filters */
5862 lck_mtx_lock(&ifp->if_flt_lock);
5863 if_flt_monitor_enter(ifp);
b0d623f7 5864
6d2010ae 5865 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636
A
5866 fhead = ifp->if_flt_head;
5867 TAILQ_INIT(&ifp->if_flt_head);
2d21ac55 5868
6d2010ae
A
5869 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
5870 filter_next = TAILQ_NEXT(filter, filt_next);
5871 lck_mtx_unlock(&ifp->if_flt_lock);
5872
5873 dlil_detach_filter_internal(filter, 1);
5874 lck_mtx_lock(&ifp->if_flt_lock);
5875 }
5876 if_flt_monitor_leave(ifp);
5877 lck_mtx_unlock(&ifp->if_flt_lock);
5878
5879 /* Tell upper layers to drop their network addresses */
5880 if_purgeaddrs(ifp);
5881
5882 ifnet_lock_exclusive(ifp);
5883
5884 /* Uplumb all protocols */
5885 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
5886 struct if_proto *proto;
5887
5888 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5889 while (proto != NULL) {
5890 protocol_family_t family = proto->protocol_family;
5891 ifnet_lock_done(ifp);
5892 proto_unplumb(family, ifp);
5893 ifnet_lock_exclusive(ifp);
5894 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5895 }
5896 /* There should not be any protocols left */
5897 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
5898 }
5899 zfree(dlif_phash_zone, ifp->if_proto_hash);
5900 ifp->if_proto_hash = NULL;
5901
5902 /* Detach (permanent) link address from if_addrhead */
5903 ifa = TAILQ_FIRST(&ifp->if_addrhead);
5904 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
5905 IFA_LOCK(ifa);
5906 if_detach_link_ifa(ifp, ifa);
5907 IFA_UNLOCK(ifa);
5908
5909 /* Remove (permanent) link address from ifnet_addrs[] */
5910 IFA_REMREF(ifa);
5911 ifnet_addrs[ifp->if_index - 1] = NULL;
5912
5913 /* This interface should not be on {ifnet_head,detaching} */
5914 VERIFY(ifp->if_link.tqe_next == NULL);
5915 VERIFY(ifp->if_link.tqe_prev == NULL);
5916 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5917 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5918
5919 /* Prefix list should be empty by now */
5920 VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
5921
5922 /* The slot should have been emptied */
5923 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5924
5925 /* There should not be any addresses left */
5926 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
1c79356b 5927
316670eb
A
5928 /*
5929 * Signal the starter thread to terminate itself.
5930 */
5931 if (ifp->if_start_thread != THREAD_NULL) {
5932 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e 5933 ifp->if_start_flags = 0;
316670eb
A
5934 ifp->if_start_thread = THREAD_NULL;
5935 wakeup_one((caddr_t)&ifp->if_start_thread);
5936 lck_mtx_unlock(&ifp->if_start_lock);
5937 }
5938
5939 /*
5940 * Signal the poller thread to terminate itself.
5941 */
5942 if (ifp->if_poll_thread != THREAD_NULL) {
5943 lck_mtx_lock_spin(&ifp->if_poll_lock);
5944 ifp->if_poll_thread = THREAD_NULL;
5945 wakeup_one((caddr_t)&ifp->if_poll_thread);
5946 lck_mtx_unlock(&ifp->if_poll_lock);
5947 }
5948
2d21ac55
A
5949 /*
5950 * If thread affinity was set for the workloop thread, we will need
5951 * to tear down the affinity and release the extra reference count
316670eb
A
5952 * taken at attach time. Does not apply to lo0 or other interfaces
5953 * without dedicated input threads.
2d21ac55 5954 */
316670eb
A
5955 if ((inp = ifp->if_inp) != NULL) {
5956 VERIFY(inp != dlil_main_input_thread);
5957
5958 if (inp->net_affinity) {
5959 struct thread *tp, *wtp, *ptp;
5960
5961 lck_mtx_lock_spin(&inp->input_lck);
5962 wtp = inp->wloop_thr;
5963 inp->wloop_thr = THREAD_NULL;
5964 ptp = inp->poll_thr;
5965 inp->poll_thr = THREAD_NULL;
5966 tp = inp->input_thr; /* don't nullify now */
5967 inp->tag = 0;
5968 inp->net_affinity = FALSE;
5969 lck_mtx_unlock(&inp->input_lck);
5970
5971 /* Tear down poll thread affinity */
5972 if (ptp != NULL) {
5973 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
5974 (void) dlil_affinity_set(ptp,
5975 THREAD_AFFINITY_TAG_NULL);
5976 thread_deallocate(ptp);
6d2010ae 5977 }
2d21ac55 5978
2d21ac55 5979 /* Tear down workloop thread affinity */
316670eb
A
5980 if (wtp != NULL) {
5981 (void) dlil_affinity_set(wtp,
2d21ac55 5982 THREAD_AFFINITY_TAG_NULL);
316670eb 5983 thread_deallocate(wtp);
2d21ac55 5984 }
1c79356b 5985
316670eb 5986 /* Tear down DLIL input thread affinity */
2d21ac55
A
5987 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
5988 thread_deallocate(tp);
9bccf70c 5989 }
1c79356b 5990
316670eb
A
5991 /* disassociate ifp DLIL input thread */
5992 ifp->if_inp = NULL;
6d2010ae 5993
316670eb
A
5994 lck_mtx_lock_spin(&inp->input_lck);
5995 inp->input_waiting |= DLIL_INPUT_TERMINATE;
5996 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
5997 wakeup_one((caddr_t)&inp->input_waiting);
91447636 5998 }
316670eb 5999 lck_mtx_unlock(&inp->input_lck);
55e303ae 6000 }
6d2010ae
A
6001
6002 /* The driver might unload, so point these to ourselves */
6003 if_free = ifp->if_free;
6004 ifp->if_output = ifp_if_output;
316670eb
A
6005 ifp->if_pre_enqueue = ifp_if_output;
6006 ifp->if_start = ifp_if_start;
6007 ifp->if_output_ctl = ifp_if_ctl;
6008 ifp->if_input_poll = ifp_if_input_poll;
6009 ifp->if_input_ctl = ifp_if_ctl;
6d2010ae
A
6010 ifp->if_ioctl = ifp_if_ioctl;
6011 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
6012 ifp->if_free = ifp_if_free;
6013 ifp->if_demux = ifp_if_demux;
6014 ifp->if_event = ifp_if_event;
39236c6e
A
6015 ifp->if_framer_legacy = ifp_if_framer;
6016 ifp->if_framer = ifp_if_framer_extended;
6d2010ae
A
6017 ifp->if_add_proto = ifp_if_add_proto;
6018 ifp->if_del_proto = ifp_if_del_proto;
6019 ifp->if_check_multi = ifp_if_check_multi;
6020
316670eb
A
6021 /* wipe out interface description */
6022 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
6023 ifp->if_desc.ifd_len = 0;
6024 VERIFY(ifp->if_desc.ifd_desc != NULL);
6025 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
6026
39236c6e
A
6027 /* there shouldn't be any delegation by now */
6028 VERIFY(ifp->if_delegated.ifp == NULL);
6029 VERIFY(ifp->if_delegated.type == 0);
6030 VERIFY(ifp->if_delegated.family == 0);
6031 VERIFY(ifp->if_delegated.subfamily == 0);
fe8ab488 6032 VERIFY(ifp->if_delegated.expensive == 0);
39236c6e 6033
6d2010ae
A
6034 ifnet_lock_done(ifp);
6035
6036#if PF
6037 /*
6038 * Detach this interface from packet filter, if enabled.
6039 */
6040 pf_ifnet_hook(ifp, 0);
6041#endif /* PF */
6042
6043 /* Filter list should be empty */
6044 lck_mtx_lock_spin(&ifp->if_flt_lock);
6045 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6046 VERIFY(ifp->if_flt_busy == 0);
6047 VERIFY(ifp->if_flt_waiters == 0);
6048 lck_mtx_unlock(&ifp->if_flt_lock);
6049
316670eb
A
6050 /* Last chance to drain send queue */
6051 if_qflush(ifp, 0);
6052
6d2010ae
A
6053 /* Last chance to cleanup any cached route */
6054 lck_mtx_lock(&ifp->if_cached_route_lock);
6055 VERIFY(!ifp->if_fwd_cacheok);
39236c6e 6056 ROUTE_RELEASE(&ifp->if_fwd_route);
6d2010ae 6057 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
39236c6e 6058 ROUTE_RELEASE(&ifp->if_src_route);
6d2010ae 6059 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
39236c6e 6060 ROUTE_RELEASE(&ifp->if_src_route6);
6d2010ae
A
6061 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
6062 lck_mtx_unlock(&ifp->if_cached_route_lock);
6063
39236c6e
A
6064 VERIFY(ifp->if_data_threshold == 0);
6065
6d2010ae
A
6066 ifnet_llreach_ifdetach(ifp);
6067
6068 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
6069
6070 if (if_free != NULL)
6071 if_free(ifp);
6072
6073 /*
6074 * Finally, mark this ifnet as detached.
6075 */
6076 lck_mtx_lock_spin(&ifp->if_ref_lock);
6077 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6078 panic("%s: flags mismatch (detaching not set) ifp=%p",
6079 __func__, ifp);
6080 /* NOTREACHED */
55e303ae 6081 }
6d2010ae
A
6082 ifp->if_refflags &= ~IFRF_DETACHING;
6083 lck_mtx_unlock(&ifp->if_ref_lock);
6084
6085 if (dlil_verbose)
39236c6e 6086 printf("%s: detached\n", if_name(ifp));
6d2010ae
A
6087
6088 /* Release reference held during ifnet attach */
6089 ifnet_release(ifp);
1c79356b 6090}
9bccf70c 6091
91447636 6092static errno_t
6d2010ae 6093ifp_if_output(struct ifnet *ifp, struct mbuf *m)
9bccf70c 6094{
6d2010ae
A
6095#pragma unused(ifp)
6096 m_freem(m);
6097 return (0);
9bccf70c
A
6098}
6099
316670eb
A
6100static void
6101ifp_if_start(struct ifnet *ifp)
6102{
6103 ifnet_purge(ifp);
6104}
6105
6106static void
6107ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6108 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6109{
6110#pragma unused(ifp, flags, max_cnt)
6111 if (m_head != NULL)
6112 *m_head = NULL;
6113 if (m_tail != NULL)
6114 *m_tail = NULL;
6115 if (cnt != NULL)
6116 *cnt = 0;
6117 if (len != NULL)
6118 *len = 0;
6119}
6120
6121static errno_t
6122ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6123{
6124#pragma unused(ifp, cmd, arglen, arg)
6125 return (EOPNOTSUPP);
6126}
6127
6d2010ae
A
6128static errno_t
6129ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
9bccf70c 6130{
6d2010ae
A
6131#pragma unused(ifp, fh, pf)
6132 m_freem(m);
6133 return (EJUSTRETURN);
9bccf70c
A
6134}
6135
6d2010ae
A
6136static errno_t
6137ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6138 const struct ifnet_demux_desc *da, u_int32_t dc)
9bccf70c 6139{
6d2010ae
A
6140#pragma unused(ifp, pf, da, dc)
6141 return (EINVAL);
9bccf70c
A
6142}
6143
91447636 6144static errno_t
6d2010ae 6145ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
9bccf70c 6146{
6d2010ae
A
6147#pragma unused(ifp, pf)
6148 return (EINVAL);
6149}
6150
6151static errno_t
6152ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6153{
6154#pragma unused(ifp, sa)
6155 return (EOPNOTSUPP);
6156}
6157
39236c6e
A
6158static errno_t
6159ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6160 const struct sockaddr *sa, const char *ll, const char *t)
6d2010ae
A
6161{
6162#pragma unused(ifp, m, sa, ll, t)
39236c6e
A
6163 return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
6164}
6165
6166static errno_t
6167ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6168 const struct sockaddr *sa, const char *ll, const char *t,
6169 u_int32_t *pre, u_int32_t *post)
6170{
6171#pragma unused(ifp, sa, ll, t)
6d2010ae
A
6172 m_freem(*m);
6173 *m = NULL;
39236c6e
A
6174
6175 if (pre != NULL)
6176 *pre = 0;
6177 if (post != NULL)
6178 *post = 0;
6179
6d2010ae
A
6180 return (EJUSTRETURN);
6181}
6182
316670eb 6183errno_t
6d2010ae
A
6184ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6185{
6186#pragma unused(ifp, cmd, arg)
6187 return (EOPNOTSUPP);
6188}
6189
6190static errno_t
6191ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6192{
6193#pragma unused(ifp, tm, f)
6194 /* XXX not sure what to do here */
6195 return (0);
6196}
6197
6198static void
6199ifp_if_free(struct ifnet *ifp)
6200{
6201#pragma unused(ifp)
6202}
6203
6204static void
6205ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6206{
6207#pragma unused(ifp, e)
9bccf70c
A
6208}
6209
2d21ac55 6210__private_extern__
6d2010ae
A
6211int dlil_if_acquire(u_int32_t family, const void *uniqueid,
6212 size_t uniqueid_len, struct ifnet **ifp)
6213{
6214 struct ifnet *ifp1 = NULL;
6215 struct dlil_ifnet *dlifp1 = NULL;
6216 void *buf, *base, **pbuf;
6217 int ret = 0;
6218
7ddcb079 6219 dlil_if_lock();
6d2010ae
A
6220 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
6221 ifp1 = (struct ifnet *)dlifp1;
6222
6223 if (ifp1->if_family != family)
6224 continue;
6225
6226 lck_mtx_lock(&dlifp1->dl_if_lock);
6227 /* same uniqueid and same len or no unique id specified */
6228 if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) &&
6229 !bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len)) {
6230 /* check for matching interface in use */
6231 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6232 if (uniqueid_len) {
6233 ret = EBUSY;
6234 lck_mtx_unlock(&dlifp1->dl_if_lock);
9bccf70c 6235 goto end;
6d2010ae
A
6236 }
6237 } else {
6238 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
6239 lck_mtx_unlock(&dlifp1->dl_if_lock);
6240 *ifp = ifp1;
6241 goto end;
6242 }
6243 }
6244 lck_mtx_unlock(&dlifp1->dl_if_lock);
6245 }
6246
6247 /* no interface found, allocate a new one */
6248 buf = zalloc(dlif_zone);
6249 if (buf == NULL) {
6250 ret = ENOMEM;
6251 goto end;
6252 }
6253 bzero(buf, dlif_bufsize);
6254
6255 /* Get the 64-bit aligned base address for this object */
6256 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
6257 sizeof (u_int64_t));
6258 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
6259
6260 /*
6261 * Wind back a pointer size from the aligned base and
6262 * save the original address so we can free it later.
6263 */
6264 pbuf = (void **)((intptr_t)base - sizeof (void *));
6265 *pbuf = buf;
6266 dlifp1 = base;
6267
6268 if (uniqueid_len) {
6269 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
6270 M_NKE, M_WAITOK);
6271 if (dlifp1->dl_if_uniqueid == NULL) {
6272 zfree(dlif_zone, dlifp1);
6273 ret = ENOMEM;
6274 goto end;
6275 }
6276 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
6277 dlifp1->dl_if_uniqueid_len = uniqueid_len;
6278 }
6279
6280 ifp1 = (struct ifnet *)dlifp1;
6281 dlifp1->dl_if_flags = DLIF_INUSE;
6282 if (ifnet_debug) {
6283 dlifp1->dl_if_flags |= DLIF_DEBUG;
6284 dlifp1->dl_if_trace = dlil_if_trace;
6285 }
6286 ifp1->if_name = dlifp1->dl_if_namestorage;
39236c6e 6287 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
316670eb
A
6288
6289 /* initialize interface description */
6290 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
6291 ifp1->if_desc.ifd_len = 0;
6292 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
6293
2d21ac55 6294#if CONFIG_MACF_NET
6d2010ae 6295 mac_ifnet_label_init(ifp1);
2d21ac55 6296#endif
9bccf70c 6297
316670eb
A
6298 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
6299 DLIL_PRINTF("%s: failed to allocate if local stats, "
6300 "error: %d\n", __func__, ret);
6301 /* This probably shouldn't be fatal */
6302 ret = 0;
6303 }
6304
6d2010ae
A
6305 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
6306 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
6307 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
6308 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6d2010ae
A
6309 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
6310 ifnet_lock_attr);
6311 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
3e170ce0
A
6312#if INET
6313 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
6314 ifnet_lock_attr);
6315 ifp1->if_inetdata = NULL;
6316#endif
39236c6e 6317#if INET6
3e170ce0
A
6318 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
6319 ifnet_lock_attr);
39236c6e
A
6320 ifp1->if_inet6data = NULL;
6321#endif
3e170ce0
A
6322 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
6323 ifnet_lock_attr);
6324 ifp1->if_link_status = NULL;
6d2010ae 6325
316670eb
A
6326 /* for send data paths */
6327 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
6328 ifnet_lock_attr);
6329 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
6330 ifnet_lock_attr);
6331 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
6332 ifnet_lock_attr);
6333
6334 /* for receive data paths */
6335 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
6336 ifnet_lock_attr);
6337
6d2010ae
A
6338 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
6339
6340 *ifp = ifp1;
9bccf70c
A
6341
6342end:
7ddcb079 6343 dlil_if_unlock();
9bccf70c 6344
6d2010ae
A
6345 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
6346 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
6347
6348 return (ret);
9bccf70c
A
6349}
6350
2d21ac55 6351__private_extern__ void
6d2010ae
A
6352dlil_if_release(ifnet_t ifp)
6353{
6354 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
6355
6356 ifnet_lock_exclusive(ifp);
6357 lck_mtx_lock(&dlifp->dl_if_lock);
6358 dlifp->dl_if_flags &= ~DLIF_INUSE;
fe8ab488 6359 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6d2010ae 6360 ifp->if_name = dlifp->dl_if_namestorage;
39236c6e
A
6361 /* Reset external name (name + unit) */
6362 ifp->if_xname = dlifp->dl_if_xnamestorage;
6363 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
6364 "%s?", ifp->if_name);
6d2010ae 6365 lck_mtx_unlock(&dlifp->dl_if_lock);
2d21ac55 6366#if CONFIG_MACF_NET
6d2010ae
A
6367 /*
6368 * We can either recycle the MAC label here or in dlil_if_acquire().
6369 * It seems logical to do it here but this means that anything that
6370 * still has a handle on ifp will now see it as unlabeled.
6371 * Since the interface is "dead" that may be OK. Revisit later.
6372 */
6373 mac_ifnet_label_recycle(ifp);
2d21ac55 6374#endif
6d2010ae 6375 ifnet_lock_done(ifp);
9bccf70c 6376}
4a3eedf9 6377
7ddcb079
A
6378__private_extern__ void
6379dlil_if_lock(void)
6380{
6381 lck_mtx_lock(&dlil_ifnet_lock);
6382}
6383
6384__private_extern__ void
6385dlil_if_unlock(void)
6386{
6387 lck_mtx_unlock(&dlil_ifnet_lock);
6388}
6389
6390__private_extern__ void
6391dlil_if_lock_assert(void)
6392{
6393 lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
6394}
6395
4a3eedf9
A
6396__private_extern__ void
6397dlil_proto_unplumb_all(struct ifnet *ifp)
6398{
6399 /*
39236c6e
A
6400 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6401 * each bucket contains exactly one entry; PF_VLAN does not need an
6402 * explicit unplumb.
4a3eedf9 6403 *
39236c6e 6404 * if_proto_hash[3] is for other protocols; we expect anything
4a3eedf9
A
6405 * in this bucket to respond to the DETACHING event (which would
6406 * have happened by now) and do the unplumb then.
6407 */
6408 (void) proto_unplumb(PF_INET, ifp);
6409#if INET6
6410 (void) proto_unplumb(PF_INET6, ifp);
6411#endif /* INET6 */
4a3eedf9 6412}
6d2010ae
A
6413
6414static void
6415ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6416{
6417 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6418 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6419
6420 route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
6421
6422 lck_mtx_unlock(&ifp->if_cached_route_lock);
6423}
6424
6425static void
6426ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6427{
6428 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6429 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6430
6431 if (ifp->if_fwd_cacheok) {
6432 route_copyin(src, &ifp->if_src_route, sizeof (*src));
6433 } else {
39236c6e 6434 ROUTE_RELEASE(src);
6d2010ae
A
6435 }
6436 lck_mtx_unlock(&ifp->if_cached_route_lock);
6437}
6438
6439#if INET6
6440static void
6441ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6442{
6443 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6444 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6445
6446 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6447 sizeof (*dst));
6448
6449 lck_mtx_unlock(&ifp->if_cached_route_lock);
6450}
6451
6452static void
6453ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6454{
6455 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6456 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6457
6458 if (ifp->if_fwd_cacheok) {
6459 route_copyin((struct route *)src,
6460 (struct route *)&ifp->if_src_route6, sizeof (*src));
6461 } else {
39236c6e 6462 ROUTE_RELEASE(src);
6d2010ae
A
6463 }
6464 lck_mtx_unlock(&ifp->if_cached_route_lock);
6465}
6466#endif /* INET6 */
6467
6468struct rtentry *
6469ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6470{
6471 struct route src_rt;
316670eb
A
6472 struct sockaddr_in *dst;
6473
6474 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
6d2010ae
A
6475
6476 ifp_src_route_copyout(ifp, &src_rt);
6477
39236c6e
A
6478 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6479 ROUTE_RELEASE(&src_rt);
6480 if (dst->sin_family != AF_INET) {
6d2010ae
A
6481 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
6482 dst->sin_len = sizeof (src_rt.ro_dst);
6483 dst->sin_family = AF_INET;
6484 }
6485 dst->sin_addr = src_ip;
6486
6487 if (src_rt.ro_rt == NULL) {
6488 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
6489 0, 0, ifp->if_index);
6490
6491 if (src_rt.ro_rt != NULL) {
6492 /* retain a ref, copyin consumes one */
6493 struct rtentry *rte = src_rt.ro_rt;
6494 RT_ADDREF(rte);
6495 ifp_src_route_copyin(ifp, &src_rt);
6496 src_rt.ro_rt = rte;
6497 }
6498 }
6499 }
6500
6501 return (src_rt.ro_rt);
6502}
6503
6504#if INET6
6505struct rtentry*
6506ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
6507{
6508 struct route_in6 src_rt;
6509
6510 ifp_src_route6_copyout(ifp, &src_rt);
6511
39236c6e
A
6512 if (ROUTE_UNUSABLE(&src_rt) ||
6513 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
6514 ROUTE_RELEASE(&src_rt);
6515 if (src_rt.ro_dst.sin6_family != AF_INET6) {
6d2010ae
A
6516 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
6517 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
6518 src_rt.ro_dst.sin6_family = AF_INET6;
6519 }
6520 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
316670eb
A
6521 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
6522 sizeof (src_rt.ro_dst.sin6_addr));
6d2010ae
A
6523
6524 if (src_rt.ro_rt == NULL) {
6525 src_rt.ro_rt = rtalloc1_scoped(
6526 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
6527 ifp->if_index);
6528
6529 if (src_rt.ro_rt != NULL) {
6530 /* retain a ref, copyin consumes one */
6531 struct rtentry *rte = src_rt.ro_rt;
6532 RT_ADDREF(rte);
6533 ifp_src_route6_copyin(ifp, &src_rt);
6534 src_rt.ro_rt = rte;
6535 }
6536 }
6537 }
6538
6539 return (src_rt.ro_rt);
6540}
6541#endif /* INET6 */
316670eb
A
6542
6543void
3e170ce0 6544if_lqm_update(struct ifnet *ifp, int lqm, int locked)
316670eb
A
6545{
6546 struct kev_dl_link_quality_metric_data ev_lqm_data;
6547
6548 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
6549
6550 /* Normalize to edge */
3e170ce0 6551 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_BAD)
fe8ab488
A
6552 lqm = IFNET_LQM_THRESH_BAD;
6553 else if (lqm > IFNET_LQM_THRESH_BAD && lqm <= IFNET_LQM_THRESH_POOR)
316670eb
A
6554 lqm = IFNET_LQM_THRESH_POOR;
6555 else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD)
6556 lqm = IFNET_LQM_THRESH_GOOD;
6557
3e170ce0
A
6558 /*
6559 * Take the lock if needed
6560 */
6561 if (!locked)
6562 ifnet_lock_exclusive(ifp);
6563
6564 if (lqm == ifp->if_interface_state.lqm_state &&
6565 (ifp->if_interface_state.valid_bitmask &
6566 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
6567 /*
6568 * Release the lock if was not held by the caller
6569 */
6570 if (!locked)
6571 ifnet_lock_done(ifp);
316670eb
A
6572 return; /* nothing to update */
6573 }
3e170ce0
A
6574 ifp->if_interface_state.valid_bitmask |=
6575 IF_INTERFACE_STATE_LQM_STATE_VALID;
6576 ifp->if_interface_state.lqm_state = lqm;
6577
6578 /*
6579 * Don't want to hold the lock when issuing kernel events
6580 */
316670eb
A
6581 ifnet_lock_done(ifp);
6582
6583 bzero(&ev_lqm_data, sizeof (ev_lqm_data));
6584 ev_lqm_data.link_quality_metric = lqm;
6585
6586 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
6587 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
3e170ce0
A
6588
6589 /*
6590 * Reacquire the lock for the caller
6591 */
6592 if (locked)
6593 ifnet_lock_exclusive(ifp);
6594}
6595
6596static void
6597if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
6598{
6599 struct kev_dl_rrc_state kev;
6600
6601 if (rrc_state == ifp->if_interface_state.rrc_state &&
6602 (ifp->if_interface_state.valid_bitmask &
6603 IF_INTERFACE_STATE_RRC_STATE_VALID))
6604 return;
6605
6606 ifp->if_interface_state.valid_bitmask |=
6607 IF_INTERFACE_STATE_RRC_STATE_VALID;
6608
6609 ifp->if_interface_state.rrc_state = rrc_state;
6610
6611 /*
6612 * Don't want to hold the lock when issuing kernel events
6613 */
6614 ifnet_lock_done(ifp);
6615
6616 bzero(&kev, sizeof(struct kev_dl_rrc_state));
6617 kev.rrc_state = rrc_state;
6618
6619 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
6620 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
6621
6622 ifnet_lock_exclusive(ifp);
6623}
6624
6625errno_t
6626if_state_update(struct ifnet *ifp,
6627 struct if_interface_state* if_interface_state)
6628{
6629 u_short if_index_available = 0;
6630
6631 ifnet_lock_exclusive(ifp);
6632
6633 if ((ifp->if_type != IFT_CELLULAR) &&
6634 (if_interface_state->valid_bitmask &
6635 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6636 ifnet_lock_done(ifp);
6637 return (ENOTSUP);
6638 }
6639 if ((if_interface_state->valid_bitmask &
6640 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
6641 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
6642 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
6643 ifnet_lock_done(ifp);
6644 return (EINVAL);
6645 }
6646 if ((if_interface_state->valid_bitmask &
6647 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
6648 if_interface_state->rrc_state !=
6649 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
6650 if_interface_state->rrc_state !=
6651 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
6652 ifnet_lock_done(ifp);
6653 return (EINVAL);
6654 }
6655
6656 if (if_interface_state->valid_bitmask &
6657 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6658 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
6659 }
6660 if (if_interface_state->valid_bitmask &
6661 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6662 if_rrc_state_update(ifp, if_interface_state->rrc_state);
6663 }
6664 if (if_interface_state->valid_bitmask &
6665 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6666 ifp->if_interface_state.valid_bitmask |=
6667 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6668 ifp->if_interface_state.interface_availability =
6669 if_interface_state->interface_availability;
6670
6671 if (ifp->if_interface_state.interface_availability ==
6672 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
6673 if_index_available = ifp->if_index;
6674 }
6675 }
6676 ifnet_lock_done(ifp);
6677
6678 /*
6679 * Check if the TCP connections going on this interface should be
6680 * forced to send probe packets instead of waiting for TCP timers
6681 * to fire. This will be done when there is an explicit
6682 * notification that the interface became available.
6683 */
6684 if (if_index_available > 0)
6685 tcp_interface_send_probe(if_index_available);
6686
6687 return (0);
6688}
6689
6690void
6691if_get_state(struct ifnet *ifp,
6692 struct if_interface_state* if_interface_state)
6693{
6694 ifnet_lock_shared(ifp);
6695
6696 if_interface_state->valid_bitmask = 0;
6697
6698 if (ifp->if_interface_state.valid_bitmask &
6699 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6700 if_interface_state->valid_bitmask |=
6701 IF_INTERFACE_STATE_RRC_STATE_VALID;
6702 if_interface_state->rrc_state =
6703 ifp->if_interface_state.rrc_state;
6704 }
6705 if (ifp->if_interface_state.valid_bitmask &
6706 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6707 if_interface_state->valid_bitmask |=
6708 IF_INTERFACE_STATE_LQM_STATE_VALID;
6709 if_interface_state->lqm_state =
6710 ifp->if_interface_state.lqm_state;
6711 }
6712 if (ifp->if_interface_state.valid_bitmask &
6713 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6714 if_interface_state->valid_bitmask |=
6715 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6716 if_interface_state->interface_availability =
6717 ifp->if_interface_state.interface_availability;
6718 }
6719
6720 ifnet_lock_done(ifp);
6721}
6722
6723errno_t
6724if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
6725{
6726 ifnet_lock_exclusive(ifp);
6727 if (conn_probe > 1) {
6728 ifnet_lock_done(ifp);
6729 return (EINVAL);
6730 }
6731 if (conn_probe == 0)
6732 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
6733 else
6734 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
6735 ifnet_lock_done(ifp);
6736
6737 tcp_probe_connectivity(ifp, conn_probe);
6738 return (0);
316670eb
A
6739}
6740
6741/* for uuid.c */
6742int
6743uuid_get_ethernet(u_int8_t *node)
6744{
6745 struct ifnet *ifp;
6746 struct sockaddr_dl *sdl;
6747
6748 ifnet_head_lock_shared();
6749 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6750 ifnet_lock_shared(ifp);
6751 IFA_LOCK_SPIN(ifp->if_lladdr);
6752 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
6753 if (sdl->sdl_type == IFT_ETHER) {
6754 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
6755 IFA_UNLOCK(ifp->if_lladdr);
6756 ifnet_lock_done(ifp);
6757 ifnet_head_done();
6758 return (0);
6759 }
6760 IFA_UNLOCK(ifp->if_lladdr);
6761 ifnet_lock_done(ifp);
6762 }
6763 ifnet_head_done();
6764
6765 return (-1);
6766}
6767
6768static int
6769sysctl_rxpoll SYSCTL_HANDLER_ARGS
6770{
6771#pragma unused(arg1, arg2)
39236c6e
A
6772 uint32_t i;
6773 int err;
316670eb
A
6774
6775 i = if_rxpoll;
6776
6777 err = sysctl_handle_int(oidp, &i, 0, req);
6778 if (err != 0 || req->newptr == USER_ADDR_NULL)
6779 return (err);
6780
6781 if (net_rxpoll == 0)
6782 return (ENXIO);
6783
6784 if_rxpoll = i;
6785 return (err);
6786}
6787
6788static int
39236c6e 6789sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
6790{
6791#pragma unused(arg1, arg2)
39236c6e
A
6792 uint64_t q;
6793 int err;
316670eb 6794
39236c6e 6795 q = if_rxpoll_mode_holdtime;
316670eb 6796
39236c6e 6797 err = sysctl_handle_quad(oidp, &q, 0, req);
316670eb
A
6798 if (err != 0 || req->newptr == USER_ADDR_NULL)
6799 return (err);
6800
39236c6e
A
6801 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN)
6802 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
6803
6804 if_rxpoll_mode_holdtime = q;
316670eb 6805
316670eb
A
6806 return (err);
6807}
6808
6809static int
39236c6e 6810sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
6811{
6812#pragma unused(arg1, arg2)
39236c6e
A
6813 uint64_t q;
6814 int err;
316670eb 6815
39236c6e 6816 q = if_rxpoll_sample_holdtime;
316670eb 6817
39236c6e 6818 err = sysctl_handle_quad(oidp, &q, 0, req);
316670eb
A
6819 if (err != 0 || req->newptr == USER_ADDR_NULL)
6820 return (err);
6821
39236c6e
A
6822 if (q < IF_RXPOLL_SAMPLETIME_MIN)
6823 q = IF_RXPOLL_SAMPLETIME_MIN;
6824
6825 if_rxpoll_sample_holdtime = q;
316670eb 6826
316670eb
A
6827 return (err);
6828}
6829
39236c6e
A
6830static int
6831sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
316670eb 6832{
39236c6e
A
6833#pragma unused(arg1, arg2)
6834 uint64_t q;
6835 int err;
316670eb 6836
39236c6e 6837 q = if_rxpoll_interval_time;
316670eb 6838
39236c6e
A
6839 err = sysctl_handle_quad(oidp, &q, 0, req);
6840 if (err != 0 || req->newptr == USER_ADDR_NULL)
6841 return (err);
6842
6843 if (q < IF_RXPOLL_INTERVALTIME_MIN)
6844 q = IF_RXPOLL_INTERVALTIME_MIN;
316670eb 6845
39236c6e 6846 if_rxpoll_interval_time = q;
316670eb 6847
39236c6e 6848 return (err);
316670eb
A
6849}
6850
39236c6e
A
6851static int
6852sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
316670eb 6853{
39236c6e
A
6854#pragma unused(arg1, arg2)
6855 uint32_t i;
6856 int err;
316670eb 6857
39236c6e 6858 i = if_rxpoll_wlowat;
316670eb 6859
39236c6e
A
6860 err = sysctl_handle_int(oidp, &i, 0, req);
6861 if (err != 0 || req->newptr == USER_ADDR_NULL)
6862 return (err);
316670eb 6863
39236c6e
A
6864 if (i == 0 || i >= if_rxpoll_whiwat)
6865 return (EINVAL);
6866
6867 if_rxpoll_wlowat = i;
6868 return (err);
316670eb
A
6869}
6870
39236c6e
A
6871static int
6872sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
316670eb 6873{
39236c6e
A
6874#pragma unused(arg1, arg2)
6875 uint32_t i;
6876 int err;
316670eb 6877
39236c6e 6878 i = if_rxpoll_whiwat;
316670eb 6879
39236c6e
A
6880 err = sysctl_handle_int(oidp, &i, 0, req);
6881 if (err != 0 || req->newptr == USER_ADDR_NULL)
6882 return (err);
316670eb 6883
39236c6e
A
6884 if (i <= if_rxpoll_wlowat)
6885 return (EINVAL);
6886
6887 if_rxpoll_whiwat = i;
6888 return (err);
316670eb
A
6889}
6890
6891static int
39236c6e 6892sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
316670eb 6893{
39236c6e
A
6894#pragma unused(arg1, arg2)
6895 int i, err;
316670eb 6896
39236c6e 6897 i = if_sndq_maxlen;
316670eb 6898
39236c6e
A
6899 err = sysctl_handle_int(oidp, &i, 0, req);
6900 if (err != 0 || req->newptr == USER_ADDR_NULL)
6901 return (err);
316670eb 6902
39236c6e
A
6903 if (i < IF_SNDQ_MINLEN)
6904 i = IF_SNDQ_MINLEN;
316670eb 6905
39236c6e
A
6906 if_sndq_maxlen = i;
6907 return (err);
316670eb
A
6908}
6909
39236c6e
A
6910static int
6911sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
316670eb 6912{
39236c6e
A
6913#pragma unused(arg1, arg2)
6914 int i, err;
6915
6916 i = if_rcvq_maxlen;
6917
6918 err = sysctl_handle_int(oidp, &i, 0, req);
6919 if (err != 0 || req->newptr == USER_ADDR_NULL)
6920 return (err);
6921
6922 if (i < IF_RCVQ_MINLEN)
6923 i = IF_RCVQ_MINLEN;
6924
6925 if_rcvq_maxlen = i;
6926 return (err);
316670eb
A
6927}
6928
6929void
6930dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
6931 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6932{
6933 struct kev_dl_node_presence kev;
6934 struct sockaddr_dl *sdl;
6935 struct sockaddr_in6 *sin6;
6936
6937 VERIFY(ifp);
6938 VERIFY(sa);
6939 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6940
6941 bzero(&kev, sizeof (kev));
6942 sin6 = &kev.sin6_node_address;
6943 sdl = &kev.sdl_node_address;
6944 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
6945 kev.rssi = rssi;
6946 kev.link_quality_metric = lqm;
6947 kev.node_proximity_metric = npm;
6948 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
6949
6950 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
6951 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6952 &kev.link_data, sizeof (kev));
6953}
6954
6955void
6956dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
6957{
6958 struct kev_dl_node_absence kev;
6959 struct sockaddr_in6 *sin6;
6960 struct sockaddr_dl *sdl;
6961
6962 VERIFY(ifp);
6963 VERIFY(sa);
6964 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6965
6966 bzero(&kev, sizeof (kev));
6967 sin6 = &kev.sin6_node_address;
6968 sdl = &kev.sdl_node_address;
6969 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
6970
6971 nd6_alt_node_absent(ifp, sin6);
6972 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
6973 &kev.link_data, sizeof (kev));
6974}
6975
39236c6e
A
6976const void *
6977dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
6978 kauth_cred_t *credp)
6979{
6980 const u_int8_t *bytes;
6981 size_t size;
6982
6983 bytes = CONST_LLADDR(sdl);
6984 size = sdl->sdl_alen;
6985
6986#if CONFIG_MACF
6987 if (dlil_lladdr_ckreq) {
6988 switch (sdl->sdl_type) {
6989 case IFT_ETHER:
39236c6e 6990 case IFT_IEEE1394:
39236c6e
A
6991 break;
6992 default:
6993 credp = NULL;
6994 break;
6995 };
6996
6997 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
6998 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
6999 [0] = 2
7000 };
7001
7002 switch (sdl->sdl_type) {
7003 case IFT_ETHER:
39236c6e
A
7004 VERIFY(size == ETHER_ADDR_LEN);
7005 bytes = unspec;
7006 break;
7007 case IFT_IEEE1394:
7008 VERIFY(size == FIREWIRE_EUI64_LEN);
7009 bytes = unspec;
7010 break;
7011 default:
7012 VERIFY(FALSE);
7013 break;
7014 };
7015 }
7016 }
7017#else
7018#pragma unused(credp)
7019#endif
7020
7021 if (sizep != NULL) *sizep = size;
7022 return (bytes);
7023}
7024
7025void
7026dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
7027 u_int8_t info[DLIL_MODARGLEN])
7028{
7029 struct kev_dl_issues kev;
7030 struct timeval tv;
7031
7032 VERIFY(ifp != NULL);
7033 VERIFY(modid != NULL);
7034 _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
7035 _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
7036
3e170ce0 7037 bzero(&kev, sizeof (kev));
39236c6e
A
7038
7039 microtime(&tv);
7040 kev.timestamp = tv.tv_sec;
7041 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
7042 if (info != NULL)
7043 bcopy(info, &kev.info, DLIL_MODARGLEN);
7044
7045 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
7046 &kev.link_data, sizeof (kev));
7047}
7048
316670eb
A
7049errno_t
7050ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7051 struct proc *p)
7052{
7053 u_int32_t level = IFNET_THROTTLE_OFF;
7054 errno_t result = 0;
7055
7056 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
7057
7058 if (cmd == SIOCSIFOPPORTUNISTIC) {
7059 /*
7060 * XXX: Use priv_check_cred() instead of root check?
7061 */
7062 if ((result = proc_suser(p)) != 0)
7063 return (result);
7064
7065 if (ifr->ifr_opportunistic.ifo_flags ==
7066 IFRIFOF_BLOCK_OPPORTUNISTIC)
7067 level = IFNET_THROTTLE_OPPORTUNISTIC;
7068 else if (ifr->ifr_opportunistic.ifo_flags == 0)
7069 level = IFNET_THROTTLE_OFF;
7070 else
7071 result = EINVAL;
7072
7073 if (result == 0)
7074 result = ifnet_set_throttle(ifp, level);
7075 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
7076 ifr->ifr_opportunistic.ifo_flags = 0;
7077 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
7078 ifr->ifr_opportunistic.ifo_flags |=
7079 IFRIFOF_BLOCK_OPPORTUNISTIC;
7080 }
7081 }
7082
7083 /*
7084 * Return the count of current opportunistic connections
7085 * over the interface.
7086 */
7087 if (result == 0) {
7088 uint32_t flags = 0;
7089 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
7090 INPCB_OPPORTUNISTIC_SETCMD : 0;
7091 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
7092 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
7093 ifr->ifr_opportunistic.ifo_inuse =
7094 udp_count_opportunistic(ifp->if_index, flags) +
7095 tcp_count_opportunistic(ifp->if_index, flags);
7096 }
7097
7098 if (result == EALREADY)
7099 result = 0;
7100
7101 return (result);
7102}
7103
7104int
7105ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
7106{
7107 struct ifclassq *ifq;
7108 int err = 0;
7109
7110 if (!(ifp->if_eflags & IFEF_TXSTART))
7111 return (ENXIO);
7112
7113 *level = IFNET_THROTTLE_OFF;
7114
7115 ifq = &ifp->if_snd;
7116 IFCQ_LOCK(ifq);
7117 /* Throttling works only for IFCQ, not ALTQ instances */
7118 if (IFCQ_IS_ENABLED(ifq))
7119 IFCQ_GET_THROTTLE(ifq, *level, err);
7120 IFCQ_UNLOCK(ifq);
7121
7122 return (err);
7123}
7124
7125int
7126ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
7127{
7128 struct ifclassq *ifq;
7129 int err = 0;
7130
7131 if (!(ifp->if_eflags & IFEF_TXSTART))
7132 return (ENXIO);
7133
39236c6e
A
7134 ifq = &ifp->if_snd;
7135
316670eb
A
7136 switch (level) {
7137 case IFNET_THROTTLE_OFF:
7138 case IFNET_THROTTLE_OPPORTUNISTIC:
7139#if PF_ALTQ
7140 /* Throttling works only for IFCQ, not ALTQ instances */
7141 if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
7142 return (ENXIO);
7143#endif /* PF_ALTQ */
7144 break;
7145 default:
7146 return (EINVAL);
7147 }
7148
316670eb
A
7149 IFCQ_LOCK(ifq);
7150 if (IFCQ_IS_ENABLED(ifq))
7151 IFCQ_SET_THROTTLE(ifq, level, err);
7152 IFCQ_UNLOCK(ifq);
7153
7154 if (err == 0) {
39236c6e
A
7155 printf("%s: throttling level set to %d\n", if_name(ifp),
7156 level);
316670eb
A
7157 if (level == IFNET_THROTTLE_OFF)
7158 ifnet_start(ifp);
7159 }
7160
7161 return (err);
7162}
39236c6e
A
7163
7164errno_t
7165ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7166 struct proc *p)
7167{
7168#pragma unused(p)
7169 errno_t result = 0;
7170 uint32_t flags;
7171 int level, category, subcategory;
7172
7173 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7174
7175 if (cmd == SIOCSIFLOG) {
7176 if ((result = priv_check_cred(kauth_cred_get(),
7177 PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
7178 return (result);
7179
7180 level = ifr->ifr_log.ifl_level;
7181 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX)
7182 result = EINVAL;
7183
7184 flags = ifr->ifr_log.ifl_flags;
7185 if ((flags &= IFNET_LOGF_MASK) == 0)
7186 result = EINVAL;
7187
7188 category = ifr->ifr_log.ifl_category;
7189 subcategory = ifr->ifr_log.ifl_subcategory;
7190
7191 if (result == 0)
7192 result = ifnet_set_log(ifp, level, flags,
7193 category, subcategory);
7194 } else {
7195 result = ifnet_get_log(ifp, &level, &flags, &category,
7196 &subcategory);
7197 if (result == 0) {
7198 ifr->ifr_log.ifl_level = level;
7199 ifr->ifr_log.ifl_flags = flags;
7200 ifr->ifr_log.ifl_category = category;
7201 ifr->ifr_log.ifl_subcategory = subcategory;
7202 }
7203 }
7204
7205 return (result);
7206}
7207
7208int
7209ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7210 int32_t category, int32_t subcategory)
7211{
7212 int err = 0;
7213
7214 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7215 VERIFY(flags & IFNET_LOGF_MASK);
7216
7217 /*
7218 * The logging level applies to all facilities; make sure to
7219 * update them all with the most current level.
7220 */
7221 flags |= ifp->if_log.flags;
7222
7223 if (ifp->if_output_ctl != NULL) {
7224 struct ifnet_log_params l;
7225
7226 bzero(&l, sizeof (l));
7227 l.level = level;
7228 l.flags = flags;
7229 l.flags &= ~IFNET_LOGF_DLIL;
7230 l.category = category;
7231 l.subcategory = subcategory;
7232
7233 /* Send this request to lower layers */
7234 if (l.flags != 0) {
7235 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7236 sizeof (l), &l);
7237 }
7238 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7239 /*
7240 * If targeted to the lower layers without an output
7241 * control callback registered on the interface, just
7242 * silently ignore facilities other than ours.
7243 */
7244 flags &= IFNET_LOGF_DLIL;
7245 if (flags == 0 && (!ifp->if_log.flags & IFNET_LOGF_DLIL))
7246 level = 0;
7247 }
7248
7249 if (err == 0) {
7250 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT)
7251 ifp->if_log.flags = 0;
7252 else
7253 ifp->if_log.flags |= flags;
7254
7255 log(LOG_INFO, "%s: logging level set to %d flags=%b "
7256 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
7257 ifp->if_log.level, ifp->if_log.flags,
7258 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
7259 category, subcategory);
7260 }
7261
7262 return (err);
7263}
7264
7265int
7266ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7267 int32_t *category, int32_t *subcategory)
7268{
7269 if (level != NULL)
7270 *level = ifp->if_log.level;
7271 if (flags != NULL)
7272 *flags = ifp->if_log.flags;
7273 if (category != NULL)
7274 *category = ifp->if_log.category;
7275 if (subcategory != NULL)
7276 *subcategory = ifp->if_log.subcategory;
7277
7278 return (0);
7279}
7280
7281int
7282ifnet_notify_address(struct ifnet *ifp, int af)
7283{
7284 struct ifnet_notify_address_params na;
7285
7286#if PF
7287 (void) pf_ifaddr_hook(ifp);
7288#endif /* PF */
7289
7290 if (ifp->if_output_ctl == NULL)
7291 return (EOPNOTSUPP);
7292
7293 bzero(&na, sizeof (na));
7294 na.address_family = af;
7295
7296 return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7297 sizeof (na), &na));
7298}
7299
7300errno_t
7301ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7302{
7303 if (ifp == NULL || flowid == NULL) {
7304 return (EINVAL);
7305 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7306 !(ifp->if_refflags & IFRF_ATTACHED)) {
7307 return (ENXIO);
7308 }
7309
7310 *flowid = ifp->if_flowhash;
7311
7312 return (0);
7313}
7314
7315errno_t
7316ifnet_disable_output(struct ifnet *ifp)
7317{
7318 int err;
7319
7320 if (ifp == NULL) {
7321 return (EINVAL);
7322 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7323 !(ifp->if_refflags & IFRF_ATTACHED)) {
7324 return (ENXIO);
7325 }
7326
7327 if ((err = ifnet_fc_add(ifp)) == 0) {
7328 lck_mtx_lock_spin(&ifp->if_start_lock);
7329 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7330 lck_mtx_unlock(&ifp->if_start_lock);
7331 }
7332 return (err);
7333}
7334
7335errno_t
7336ifnet_enable_output(struct ifnet *ifp)
7337{
7338 if (ifp == NULL) {
7339 return (EINVAL);
7340 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7341 !(ifp->if_refflags & IFRF_ATTACHED)) {
7342 return (ENXIO);
7343 }
7344
7345 ifnet_start_common(ifp, 1);
7346 return (0);
7347}
7348
7349void
7350ifnet_flowadv(uint32_t flowhash)
7351{
7352 struct ifnet_fc_entry *ifce;
7353 struct ifnet *ifp;
7354
7355 ifce = ifnet_fc_get(flowhash);
7356 if (ifce == NULL)
7357 return;
7358
7359 VERIFY(ifce->ifce_ifp != NULL);
7360 ifp = ifce->ifce_ifp;
7361
7362 /* flow hash gets recalculated per attach, so check */
7363 if (ifnet_is_attached(ifp, 1)) {
7364 if (ifp->if_flowhash == flowhash)
7365 (void) ifnet_enable_output(ifp);
7366 ifnet_decr_iorefcnt(ifp);
7367 }
7368 ifnet_fc_entry_free(ifce);
7369}
7370
7371/*
7372 * Function to compare ifnet_fc_entries in ifnet flow control tree
7373 */
7374static inline int
7375ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7376{
7377 return (fc1->ifce_flowhash - fc2->ifce_flowhash);
7378}
7379
7380static int
7381ifnet_fc_add(struct ifnet *ifp)
7382{
7383 struct ifnet_fc_entry keyfc, *ifce;
7384 uint32_t flowhash;
7385
7386 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7387 VERIFY(ifp->if_flowhash != 0);
7388 flowhash = ifp->if_flowhash;
7389
7390 bzero(&keyfc, sizeof (keyfc));
7391 keyfc.ifce_flowhash = flowhash;
7392
7393 lck_mtx_lock_spin(&ifnet_fc_lock);
7394 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7395 if (ifce != NULL && ifce->ifce_ifp == ifp) {
7396 /* Entry is already in ifnet_fc_tree, return */
7397 lck_mtx_unlock(&ifnet_fc_lock);
7398 return (0);
7399 }
7400
7401 if (ifce != NULL) {
7402 /*
7403 * There is a different fc entry with the same flow hash
7404 * but different ifp pointer. There can be a collision
7405 * on flow hash but the probability is low. Let's just
7406 * avoid adding a second one when there is a collision.
7407 */
7408 lck_mtx_unlock(&ifnet_fc_lock);
7409 return (EAGAIN);
7410 }
7411
7412 /* become regular mutex */
7413 lck_mtx_convert_spin(&ifnet_fc_lock);
7414
7415 ifce = zalloc_noblock(ifnet_fc_zone);
7416 if (ifce == NULL) {
7417 /* memory allocation failed */
7418 lck_mtx_unlock(&ifnet_fc_lock);
7419 return (ENOMEM);
7420 }
7421 bzero(ifce, ifnet_fc_zone_size);
7422
7423 ifce->ifce_flowhash = flowhash;
7424 ifce->ifce_ifp = ifp;
7425
7426 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7427 lck_mtx_unlock(&ifnet_fc_lock);
7428 return (0);
7429}
7430
7431static struct ifnet_fc_entry *
7432ifnet_fc_get(uint32_t flowhash)
7433{
7434 struct ifnet_fc_entry keyfc, *ifce;
7435 struct ifnet *ifp;
7436
7437 bzero(&keyfc, sizeof (keyfc));
7438 keyfc.ifce_flowhash = flowhash;
7439
7440 lck_mtx_lock_spin(&ifnet_fc_lock);
7441 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7442 if (ifce == NULL) {
7443 /* Entry is not present in ifnet_fc_tree, return */
7444 lck_mtx_unlock(&ifnet_fc_lock);
7445 return (NULL);
7446 }
7447
7448 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7449
7450 VERIFY(ifce->ifce_ifp != NULL);
7451 ifp = ifce->ifce_ifp;
7452
7453 /* become regular mutex */
7454 lck_mtx_convert_spin(&ifnet_fc_lock);
7455
7456 if (!ifnet_is_attached(ifp, 0)) {
7457 /*
7458 * This ifp is not attached or in the process of being
7459 * detached; just don't process it.
7460 */
7461 ifnet_fc_entry_free(ifce);
7462 ifce = NULL;
7463 }
7464 lck_mtx_unlock(&ifnet_fc_lock);
7465
7466 return (ifce);
7467}
7468
7469static void
7470ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7471{
7472 zfree(ifnet_fc_zone, ifce);
7473}
7474
7475static uint32_t
7476ifnet_calc_flowhash(struct ifnet *ifp)
7477{
7478 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7479 uint32_t flowhash = 0;
7480
7481 if (ifnet_flowhash_seed == 0)
7482 ifnet_flowhash_seed = RandomULong();
7483
7484 bzero(&fh, sizeof (fh));
7485
7486 (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name);
7487 fh.ifk_unit = ifp->if_unit;
7488 fh.ifk_flags = ifp->if_flags;
7489 fh.ifk_eflags = ifp->if_eflags;
7490 fh.ifk_capabilities = ifp->if_capabilities;
7491 fh.ifk_capenable = ifp->if_capenable;
7492 fh.ifk_output_sched_model = ifp->if_output_sched_model;
7493 fh.ifk_rand1 = RandomULong();
7494 fh.ifk_rand2 = RandomULong();
7495
7496try_again:
7497 flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed);
7498 if (flowhash == 0) {
7499 /* try to get a non-zero flowhash */
7500 ifnet_flowhash_seed = RandomULong();
7501 goto try_again;
7502 }
7503
7504 return (flowhash);
7505}
7506
3e170ce0
A
7507int
7508ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
7509 uint16_t flags, uint8_t *data)
7510{
7511#pragma unused(flags)
7512 int error = 0;
7513
7514 switch (family) {
7515 case AF_INET:
7516 if_inetdata_lock_exclusive(ifp);
7517 if (IN_IFEXTRA(ifp) != NULL) {
7518 if (len == 0) {
7519 /* Allow clearing the signature */
7520 IN_IFEXTRA(ifp)->netsig_len = 0;
7521 bzero(IN_IFEXTRA(ifp)->netsig,
7522 sizeof (IN_IFEXTRA(ifp)->netsig));
7523 if_inetdata_lock_done(ifp);
7524 break;
7525 } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) {
7526 error = EINVAL;
7527 if_inetdata_lock_done(ifp);
7528 break;
7529 }
7530 IN_IFEXTRA(ifp)->netsig_len = len;
7531 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
7532 } else {
7533 error = ENOMEM;
7534 }
7535 if_inetdata_lock_done(ifp);
7536 break;
7537
7538 case AF_INET6:
7539 if_inet6data_lock_exclusive(ifp);
7540 if (IN6_IFEXTRA(ifp) != NULL) {
7541 if (len == 0) {
7542 /* Allow clearing the signature */
7543 IN6_IFEXTRA(ifp)->netsig_len = 0;
7544 bzero(IN6_IFEXTRA(ifp)->netsig,
7545 sizeof (IN6_IFEXTRA(ifp)->netsig));
7546 if_inet6data_lock_done(ifp);
7547 break;
7548 } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) {
7549 error = EINVAL;
7550 if_inet6data_lock_done(ifp);
7551 break;
7552 }
7553 IN6_IFEXTRA(ifp)->netsig_len = len;
7554 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
7555 } else {
7556 error = ENOMEM;
7557 }
7558 if_inet6data_lock_done(ifp);
7559 break;
7560
7561 default:
7562 error = EINVAL;
7563 break;
7564 }
7565
7566 return (error);
7567}
7568
7569int
7570ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
7571 uint16_t *flags, uint8_t *data)
7572{
7573 int error = 0;
7574
7575 if (ifp == NULL || len == NULL || flags == NULL || data == NULL)
7576 return (EINVAL);
7577
7578 switch (family) {
7579 case AF_INET:
7580 if_inetdata_lock_shared(ifp);
7581 if (IN_IFEXTRA(ifp) != NULL) {
7582 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
7583 error = EINVAL;
7584 if_inetdata_lock_done(ifp);
7585 break;
7586 }
7587 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0)
7588 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
7589 else
7590 error = ENOENT;
7591 } else {
7592 error = ENOMEM;
7593 }
7594 if_inetdata_lock_done(ifp);
7595 break;
7596
7597 case AF_INET6:
7598 if_inet6data_lock_shared(ifp);
7599 if (IN6_IFEXTRA(ifp) != NULL) {
7600 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
7601 error = EINVAL;
7602 if_inet6data_lock_done(ifp);
7603 break;
7604 }
7605 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0)
7606 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
7607 else
7608 error = ENOENT;
7609 } else {
7610 error = ENOMEM;
7611 }
7612 if_inet6data_lock_done(ifp);
7613 break;
7614
7615 default:
7616 error = EINVAL;
7617 break;
7618 }
7619
7620 if (error == 0)
7621 *flags = 0;
7622
7623 return (error);
7624}
7625
39236c6e
A
7626static void
7627dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
7628 protocol_family_t pf)
7629{
7630#pragma unused(ifp)
7631 uint32_t did_sw;
7632
7633 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
7634 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6)))
7635 return;
7636
7637 switch (pf) {
7638 case PF_INET:
7639 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
7640 if (did_sw & CSUM_DELAY_IP)
7641 hwcksum_dbg_finalized_hdr++;
7642 if (did_sw & CSUM_DELAY_DATA)
7643 hwcksum_dbg_finalized_data++;
7644 break;
7645#if INET6
7646 case PF_INET6:
7647 /*
7648 * Checksum offload should not have been enabled when
7649 * extension headers exist; that also means that we
7650 * cannot force-finalize packets with extension headers.
7651 * Indicate to the callee should it skip such case by
7652 * setting optlen to -1.
7653 */
7654 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
7655 m->m_pkthdr.csum_flags);
7656 if (did_sw & CSUM_DELAY_IPV6_DATA)
7657 hwcksum_dbg_finalized_data++;
7658 break;
7659#endif /* INET6 */
7660 default:
7661 return;
7662 }
7663}
7664
7665static void
7666dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
7667 protocol_family_t pf)
7668{
7669 uint16_t sum;
7670 uint32_t hlen;
7671
7672 if (frame_header == NULL ||
7673 frame_header < (char *)mbuf_datastart(m) ||
7674 frame_header > (char *)m->m_data) {
7675 printf("%s: frame header pointer 0x%llx out of range "
7676 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
7677 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
7678 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
7679 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
7680 (uint64_t)VM_KERNEL_ADDRPERM(m));
7681 return;
7682 }
7683 hlen = (m->m_data - frame_header);
7684
7685 switch (pf) {
7686 case PF_INET:
7687#if INET6
7688 case PF_INET6:
7689#endif /* INET6 */
7690 break;
7691 default:
7692 return;
7693 }
7694
7695 /*
7696 * Force partial checksum offload; useful to simulate cases
7697 * where the hardware does not support partial checksum offload,
7698 * in order to validate correctness throughout the layers above.
7699 */
7700 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
7701 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
7702
7703 if (foff > (uint32_t)m->m_pkthdr.len)
7704 return;
7705
7706 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
7707
7708 /* Compute 16-bit 1's complement sum from forced offset */
7709 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
7710
7711 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
7712 m->m_pkthdr.csum_rx_val = sum;
7713 m->m_pkthdr.csum_rx_start = (foff + hlen);
7714
7715 hwcksum_dbg_partial_forced++;
7716 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
7717 }
7718
7719 /*
7720 * Partial checksum offload verification (and adjustment);
7721 * useful to validate and test cases where the hardware
7722 * supports partial checksum offload.
7723 */
7724 if ((m->m_pkthdr.csum_flags &
7725 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
7726 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
7727 uint32_t rxoff;
7728
7729 /* Start offset must begin after frame header */
7730 rxoff = m->m_pkthdr.csum_rx_start;
7731 if (hlen > rxoff) {
7732 hwcksum_dbg_bad_rxoff++;
7733 if (dlil_verbose) {
7734 printf("%s: partial cksum start offset %d "
7735 "is less than frame header length %d for "
7736 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
7737 (uint64_t)VM_KERNEL_ADDRPERM(m));
7738 }
7739 return;
7740 }
7741 rxoff -=hlen;
7742
7743 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
7744 /*
7745 * Compute the expected 16-bit 1's complement sum;
7746 * skip this if we've already computed it above
7747 * when partial checksum offload is forced.
7748 */
7749 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
7750
7751 /* Hardware or driver is buggy */
7752 if (sum != m->m_pkthdr.csum_rx_val) {
7753 hwcksum_dbg_bad_cksum++;
7754 if (dlil_verbose) {
7755 printf("%s: bad partial cksum value "
7756 "0x%x (expected 0x%x) for mbuf "
7757 "0x%llx [rx_start %d]\n",
7758 if_name(ifp),
7759 m->m_pkthdr.csum_rx_val, sum,
7760 (uint64_t)VM_KERNEL_ADDRPERM(m),
7761 m->m_pkthdr.csum_rx_start);
7762 }
7763 return;
7764 }
7765 }
7766 hwcksum_dbg_verified++;
7767
7768 /*
7769 * This code allows us to emulate various hardwares that
7770 * perform 16-bit 1's complement sum beginning at various
7771 * start offset values.
7772 */
7773 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
7774 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
7775
7776 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len)
7777 return;
7778
7779 sum = m_adj_sum16(m, rxoff, aoff, sum);
7780
7781 m->m_pkthdr.csum_rx_val = sum;
7782 m->m_pkthdr.csum_rx_start = (aoff + hlen);
7783
7784 hwcksum_dbg_adjusted++;
7785 }
7786 }
7787}
7788
7789static int
7790sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
7791{
7792#pragma unused(arg1, arg2)
7793 u_int32_t i;
7794 int err;
7795
7796 i = hwcksum_dbg_mode;
7797
7798 err = sysctl_handle_int(oidp, &i, 0, req);
7799 if (err != 0 || req->newptr == USER_ADDR_NULL)
7800 return (err);
7801
7802 if (hwcksum_dbg == 0)
7803 return (ENODEV);
7804
7805 if ((i & ~HWCKSUM_DBG_MASK) != 0)
7806 return (EINVAL);
7807
7808 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
7809
7810 return (err);
7811}
7812
7813static int
7814sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
7815{
7816#pragma unused(arg1, arg2)
7817 u_int32_t i;
7818 int err;
7819
7820 i = hwcksum_dbg_partial_rxoff_forced;
7821
7822 err = sysctl_handle_int(oidp, &i, 0, req);
7823 if (err != 0 || req->newptr == USER_ADDR_NULL)
7824 return (err);
7825
7826 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED))
7827 return (ENODEV);
7828
7829 hwcksum_dbg_partial_rxoff_forced = i;
7830
7831 return (err);
7832}
7833
7834static int
7835sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
7836{
7837#pragma unused(arg1, arg2)
7838 u_int32_t i;
7839 int err;
7840
7841 i = hwcksum_dbg_partial_rxoff_adj;
7842
7843 err = sysctl_handle_int(oidp, &i, 0, req);
7844 if (err != 0 || req->newptr == USER_ADDR_NULL)
7845 return (err);
7846
7847 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ))
7848 return (ENODEV);
7849
7850 hwcksum_dbg_partial_rxoff_adj = i;
7851
7852 return (err);
7853}
7854
3e170ce0
A
7855static int
7856sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
7857{
7858#pragma unused(oidp, arg1, arg2)
7859 int err;
7860
7861 if (req->oldptr == USER_ADDR_NULL) {
7862
7863 }
7864 if (req->newptr != USER_ADDR_NULL) {
7865 return (EPERM);
7866 }
7867 err = SYSCTL_OUT(req, &tx_chain_len_stats,
7868 sizeof(struct chain_len_stats));
7869
7870 return (err);
7871}
7872
7873
39236c6e
A
7874#if DEBUG
7875/* Blob for sum16 verification */
7876static uint8_t sumdata[] = {
7877 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
7878 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
7879 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
7880 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
7881 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
7882 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
7883 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
7884 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
7885 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
7886 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
7887 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
7888 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
7889 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
7890 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
7891 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
7892 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
7893 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
7894 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
7895 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
7896 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
7897 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
7898 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
7899 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
7900 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
7901 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
7902 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
7903 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
7904 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
7905 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
7906 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
7907 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
7908 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
7909 0xc8, 0x28, 0x02, 0x00, 0x00
7910};
7911
7912/* Precomputed 16-bit 1's complement sums for various spans of the above data */
7913static struct {
7914 int len;
7915 uint16_t sum;
7916} sumtbl[] = {
7917 { 11, 0xcb6d },
7918 { 20, 0x20dd },
7919 { 27, 0xbabd },
7920 { 32, 0xf3e8 },
7921 { 37, 0x197d },
7922 { 43, 0x9eae },
7923 { 64, 0x4678 },
7924 { 127, 0x9399 },
7925 { 256, 0xd147 },
7926 { 325, 0x0358 }
7927};
7928#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
7929
7930static void
7931dlil_verify_sum16(void)
7932{
7933 struct mbuf *m;
7934 uint8_t *buf;
7935 int n;
7936
7937 /* Make sure test data plus extra room for alignment fits in cluster */
7938 _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES);
7939
7940 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
7941 MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */
7942 buf = mtod(m, uint8_t *); /* base address */
7943
7944 for (n = 0; n < SUMTBL_MAX; n++) {
7945 uint16_t len = sumtbl[n].len;
7946 int i;
7947
7948 /* Verify for all possible alignments */
7949 for (i = 0; i < (int)sizeof (uint64_t); i++) {
7950 uint16_t sum;
7951 uint8_t *c;
7952
7953 /* Copy over test data to mbuf */
7954 VERIFY(len <= sizeof (sumdata));
7955 c = buf + i;
7956 bcopy(sumdata, c, len);
7957
7958 /* Zero-offset test (align by data pointer) */
7959 m->m_data = (caddr_t)c;
7960 m->m_len = len;
7961 sum = m_sum16(m, 0, len);
7962
7963 /* Something is horribly broken; stop now */
7964 if (sum != sumtbl[n].sum) {
7965 panic("%s: broken m_sum16 for len=%d align=%d "
7966 "sum=0x%04x [expected=0x%04x]\n", __func__,
7967 len, i, sum, sumtbl[n].sum);
7968 /* NOTREACHED */
7969 }
7970
7971 /* Alignment test by offset (fixed data pointer) */
7972 m->m_data = (caddr_t)buf;
7973 m->m_len = i + len;
7974 sum = m_sum16(m, i, len);
7975
7976 /* Something is horribly broken; stop now */
7977 if (sum != sumtbl[n].sum) {
7978 panic("%s: broken m_sum16 for len=%d offset=%d "
7979 "sum=0x%04x [expected=0x%04x]\n", __func__,
7980 len, i, sum, sumtbl[n].sum);
7981 /* NOTREACHED */
7982 }
7983#if INET
7984 /* Simple sum16 contiguous buffer test by aligment */
7985 sum = b_sum16(c, len);
7986
7987 /* Something is horribly broken; stop now */
7988 if (sum != sumtbl[n].sum) {
7989 panic("%s: broken b_sum16 for len=%d align=%d "
7990 "sum=0x%04x [expected=0x%04x]\n", __func__,
7991 len, i, sum, sumtbl[n].sum);
7992 /* NOTREACHED */
7993 }
7994#endif /* INET */
7995 }
7996 }
7997 m_freem(m);
7998
7999 printf("DLIL: SUM16 self-tests PASSED\n");
8000}
8001#endif /* DEBUG */
8002
8003#define CASE_STRINGIFY(x) case x: return #x
8004
8005__private_extern__ const char *
8006dlil_kev_dl_code_str(u_int32_t event_code)
8007{
8008 switch (event_code) {
8009 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
8010 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
8011 CASE_STRINGIFY(KEV_DL_SIFMTU);
8012 CASE_STRINGIFY(KEV_DL_SIFPHYS);
8013 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
8014 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
8015 CASE_STRINGIFY(KEV_DL_ADDMULTI);
8016 CASE_STRINGIFY(KEV_DL_DELMULTI);
8017 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
8018 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
8019 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
8020 CASE_STRINGIFY(KEV_DL_LINK_OFF);
8021 CASE_STRINGIFY(KEV_DL_LINK_ON);
8022 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
8023 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
8024 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
8025 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
8026 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
8027 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
8028 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
8029 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
8030 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
8031 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
8032 CASE_STRINGIFY(KEV_DL_ISSUES);
8033 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
8034 default:
8035 break;
8036 }
8037 return ("");
8038}
3e170ce0
A
8039
8040/*
8041 * Mirror the arguments of ifnet_get_local_ports_extended()
8042 * ifindex
8043 * protocol
8044 * flags
8045 */
8046static int
8047sysctl_get_ports_used SYSCTL_HANDLER_ARGS
8048{
8049#pragma unused(oidp)
8050 int *name = (int *)arg1;
8051 int namelen = arg2;
8052 int error = 0;
8053 int idx;
8054 protocol_family_t protocol;
8055 u_int32_t flags;
8056 ifnet_t ifp = NULL;
8057 u_int8_t *bitfield = NULL;
8058
8059 if (req->newptr) {
8060 error = EPERM;
8061 goto done;
8062 }
8063 if (namelen != 3) {
8064 error = ENOENT;
8065 goto done;
8066 }
8067
8068 if (req->oldptr == USER_ADDR_NULL) {
8069 req->oldidx = bitstr_size(65536);
8070 goto done;
8071 }
8072 if (req->oldlen < bitstr_size(65536)) {
8073 error = ENOMEM;
8074 goto done;
8075 }
8076
8077 idx = name[0];
8078 protocol = name[1];
8079 flags = name[2];
8080
8081
8082 ifnet_head_lock_shared();
8083 if (idx > if_index) {
8084 ifnet_head_done();
8085 error = ENOENT;
8086 goto done;
8087 }
8088 ifp = ifindex2ifnet[idx];
8089 ifnet_head_done();
8090
8091 bitfield = _MALLOC(bitstr_size(65536), M_TEMP, M_WAITOK);
8092 if (bitfield == NULL) {
8093 error = ENOMEM;
8094 goto done;
8095 }
8096 error = ifnet_get_local_ports_extended(ifp, protocol, flags, bitfield);
8097 if (error != 0) {
8098 printf("%s: ifnet_get_local_ports_extended() error %d\n",
8099 __func__, error);
8100 goto done;
8101 }
8102 error = SYSCTL_OUT(req, bitfield, bitstr_size(65536));
8103done:
8104 if (bitfield != NULL)
8105 _FREE(bitfield, M_TEMP);
8106 return (error);
8107}
8108