]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/dlil.c
xnu-2422.1.72.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 1999-2013 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
2d21ac55
A
28/*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
39236c6e 34#include <stddef.h>
1c79356b 35
1c79356b
A
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/socket.h>
91447636
A
42#include <sys/domain.h>
43#include <sys/user.h>
2d21ac55 44#include <sys/random.h>
316670eb 45#include <sys/socketvar.h>
1c79356b
A
46#include <net/if_dl.h>
47#include <net/if.h>
91447636 48#include <net/route.h>
1c79356b
A
49#include <net/if_var.h>
50#include <net/dlil.h>
91447636 51#include <net/if_arp.h>
316670eb 52#include <net/iptap.h>
39236c6e 53#include <net/pktap.h>
1c79356b
A
54#include <sys/kern_event.h>
55#include <sys/kdebug.h>
6d2010ae 56#include <sys/mcache.h>
39236c6e
A
57#include <sys/syslog.h>
58#include <sys/protosw.h>
59#include <sys/priv.h>
1c79356b 60
91447636 61#include <kern/assert.h>
1c79356b 62#include <kern/task.h>
9bccf70c
A
63#include <kern/thread.h>
64#include <kern/sched_prim.h>
91447636 65#include <kern/locks.h>
6d2010ae 66#include <kern/zalloc.h>
9bccf70c 67
39236c6e 68#include <net/kpi_protocol.h>
1c79356b 69#include <net/if_types.h>
6d2010ae 70#include <net/if_llreach.h>
91447636 71#include <net/kpi_interfacefilter.h>
316670eb
A
72#include <net/classq/classq.h>
73#include <net/classq/classq_sfb.h>
39236c6e
A
74#include <net/flowhash.h>
75#include <net/ntstat.h>
91447636 76
6d2010ae
A
77#if INET
78#include <netinet/in_var.h>
79#include <netinet/igmp_var.h>
316670eb
A
80#include <netinet/ip_var.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_var.h>
83#include <netinet/udp.h>
84#include <netinet/udp_var.h>
85#include <netinet/if_ether.h>
86#include <netinet/in_pcb.h>
6d2010ae
A
87#endif /* INET */
88
89#if INET6
90#include <netinet6/in6_var.h>
91#include <netinet6/nd6.h>
92#include <netinet6/mld6_var.h>
39236c6e 93#include <netinet6/scope6_var.h>
6d2010ae
A
94#endif /* INET6 */
95
91447636 96#include <libkern/OSAtomic.h>
39236c6e 97#include <libkern/tree.h>
1c79356b 98
39236c6e 99#include <dev/random/randomdev.h>
d52fe63f 100#include <machine/machine_routines.h>
1c79356b 101
2d21ac55 102#include <mach/thread_act.h>
6d2010ae 103#include <mach/sdt.h>
2d21ac55 104
39236c6e
A
105#if CONFIG_MACF
106#include <sys/kauth.h>
2d21ac55 107#include <security/mac_framework.h>
39236c6e
A
108#include <net/ethernet.h>
109#include <net/firewire.h>
110#endif
2d21ac55 111
b0d623f7
A
112#if PF
113#include <net/pfvar.h>
114#endif /* PF */
316670eb
A
115#if PF_ALTQ
116#include <net/altq/altq.h>
117#endif /* PF_ALTQ */
118#include <net/pktsched/pktsched.h>
b0d623f7 119
6d2010ae
A
120#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
121#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
1c79356b
A
122#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
123#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
124#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
125
1c79356b
A
126#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
127#define MAX_LINKADDR 4 /* LONGWORDS */
128#define M_NKE M_IFADDR
129
2d21ac55 130#if 1
91447636
A
131#define DLIL_PRINTF printf
132#else
133#define DLIL_PRINTF kprintf
134#endif
135
6d2010ae
A
136#define IF_DATA_REQUIRE_ALIGNED_64(f) \
137 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
d1ecb069 138
6d2010ae
A
139#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
140 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
141
91447636 142enum {
2d21ac55
A
143 kProtoKPI_v1 = 1,
144 kProtoKPI_v2 = 2
91447636
A
145};
146
6d2010ae
A
147/*
148 * List of if_proto structures in if_proto_hash[] is protected by
149 * the ifnet lock. The rest of the fields are initialized at protocol
150 * attach time and never change, thus no lock required as long as
151 * a reference to it is valid, via if_proto_ref().
152 */
91447636 153struct if_proto {
6d2010ae
A
154 SLIST_ENTRY(if_proto) next_hash;
155 u_int32_t refcount;
156 u_int32_t detached;
157 struct ifnet *ifp;
91447636 158 protocol_family_t protocol_family;
6d2010ae 159 int proto_kpi;
91447636 160 union {
91447636 161 struct {
6d2010ae
A
162 proto_media_input input;
163 proto_media_preout pre_output;
164 proto_media_event event;
165 proto_media_ioctl ioctl;
91447636
A
166 proto_media_detached detached;
167 proto_media_resolve_multi resolve_multi;
168 proto_media_send_arp send_arp;
169 } v1;
2d21ac55
A
170 struct {
171 proto_media_input_v2 input;
6d2010ae
A
172 proto_media_preout pre_output;
173 proto_media_event event;
174 proto_media_ioctl ioctl;
2d21ac55
A
175 proto_media_detached detached;
176 proto_media_resolve_multi resolve_multi;
177 proto_media_send_arp send_arp;
178 } v2;
91447636 179 } kpi;
1c79356b
A
180};
181
91447636
A
182SLIST_HEAD(proto_hash_entry, if_proto);
183
6d2010ae
A
184#define DLIL_SDLMAXLEN 64
185#define DLIL_SDLDATALEN \
186 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
1c79356b 187
9bccf70c 188struct dlil_ifnet {
6d2010ae
A
189 struct ifnet dl_if; /* public ifnet */
190 /*
316670eb 191 * DLIL private fields, protected by dl_if_lock
6d2010ae
A
192 */
193 decl_lck_mtx_data(, dl_if_lock);
194 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
195 u_int32_t dl_if_flags; /* flags (below) */
196 u_int32_t dl_if_refcnt; /* refcnt */
197 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
198 void *dl_if_uniqueid; /* unique interface id */
199 size_t dl_if_uniqueid_len; /* length of the unique id */
200 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
39236c6e 201 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
6d2010ae
A
202 struct {
203 struct ifaddr ifa; /* lladdr ifa */
204 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
205 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
206 } dl_if_lladdr;
316670eb
A
207 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
208 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
6d2010ae
A
209 ctrace_t dl_if_attach; /* attach PC stacktrace */
210 ctrace_t dl_if_detach; /* detach PC stacktrace */
211};
212
213/* Values for dl_if_flags (private to DLIL) */
214#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
215#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
216#define DLIF_DEBUG 0x4 /* has debugging info */
217
218#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
219
220/* For gdb */
221__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
222
223struct dlil_ifnet_dbg {
224 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
225 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
226 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
227 /*
228 * Circular lists of ifnet_{reference,release} callers.
229 */
230 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
231 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
1c79356b
A
232};
233
6d2010ae
A
234#define DLIL_TO_IFP(s) (&s->dl_if)
235#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
236
91447636
A
237struct ifnet_filter {
238 TAILQ_ENTRY(ifnet_filter) filt_next;
6d2010ae 239 u_int32_t filt_skip;
39236c6e 240 u_int32_t filt_flags;
6d2010ae
A
241 ifnet_t filt_ifp;
242 const char *filt_name;
243 void *filt_cookie;
244 protocol_family_t filt_protocol;
245 iff_input_func filt_input;
246 iff_output_func filt_output;
247 iff_event_func filt_event;
248 iff_ioctl_func filt_ioctl;
249 iff_detached_func filt_detached;
1c79356b
A
250};
251
2d21ac55 252struct proto_input_entry;
55e303ae 253
91447636 254static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
91447636 255static lck_grp_t *dlil_lock_group;
6d2010ae 256lck_grp_t *ifnet_lock_group;
91447636 257static lck_grp_t *ifnet_head_lock_group;
316670eb
A
258static lck_grp_t *ifnet_snd_lock_group;
259static lck_grp_t *ifnet_rcv_lock_group;
6d2010ae 260lck_attr_t *ifnet_lock_attr;
7ddcb079
A
261decl_lck_rw_data(static, ifnet_head_lock);
262decl_lck_mtx_data(static, dlil_ifnet_lock);
39236c6e 263u_int32_t dlil_filter_disable_tso_count = 0;
316670eb 264
6d2010ae
A
265#if DEBUG
266static unsigned int ifnet_debug = 1; /* debugging (enabled) */
267#else
268static unsigned int ifnet_debug; /* debugging (disabled) */
269#endif /* !DEBUG */
270static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
271static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
272static struct zone *dlif_zone; /* zone for dlil_ifnet */
273
274#define DLIF_ZONE_MAX 64 /* maximum elements in zone */
275#define DLIF_ZONE_NAME "ifnet" /* zone name */
276
277static unsigned int dlif_filt_size; /* size of ifnet_filter */
278static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
279
280#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
281#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
282
6d2010ae
A
283static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
284static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
285
286#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
287#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
288
289static unsigned int dlif_proto_size; /* size of if_proto */
290static struct zone *dlif_proto_zone; /* zone for if_proto */
291
292#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
293#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
294
316670eb
A
295static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
296static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
297static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
298
299#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
300#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
301
302static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
303static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
304static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
305
306#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
307#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
308
d1ecb069
A
309/*
310 * Updating this variable should be done by first acquiring the global
311 * radix node head (rnh_lock), in tandem with settting/clearing the
312 * PR_AGGDRAIN for routedomain.
313 */
314u_int32_t ifnet_aggressive_drainers;
315static u_int32_t net_rtref;
d1ecb069 316
316670eb
A
317static struct dlil_main_threading_info dlil_main_input_thread_info;
318__private_extern__ struct dlil_threading_info *dlil_main_input_thread =
319 (struct dlil_threading_info *)&dlil_main_input_thread_info;
2d21ac55 320
91447636 321static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg);
91447636 322static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
6d2010ae
A
323static void dlil_if_trace(struct dlil_ifnet *, int);
324static void if_proto_ref(struct if_proto *);
325static void if_proto_free(struct if_proto *);
326static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
327static int dlil_ifp_proto_count(struct ifnet *);
328static void if_flt_monitor_busy(struct ifnet *);
329static void if_flt_monitor_unbusy(struct ifnet *);
330static void if_flt_monitor_enter(struct ifnet *);
331static void if_flt_monitor_leave(struct ifnet *);
332static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
333 char **, protocol_family_t);
334static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
335 protocol_family_t);
336static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
337 const struct sockaddr_dl *);
338static int ifnet_lookup(struct ifnet *);
339static void if_purgeaddrs(struct ifnet *);
340
341static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
342 struct mbuf *, char *);
343static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
344 struct mbuf *);
345static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
346 mbuf_t *, const struct sockaddr *, void *, char *, char *);
347static void ifproto_media_event(struct ifnet *, protocol_family_t,
348 const struct kev_msg *);
349static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
350 unsigned long, void *);
351static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
352 struct sockaddr_dl *, size_t);
353static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
354 const struct sockaddr_dl *, const struct sockaddr *,
355 const struct sockaddr_dl *, const struct sockaddr *);
356
357static errno_t ifp_if_output(struct ifnet *, struct mbuf *);
316670eb
A
358static void ifp_if_start(struct ifnet *);
359static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
360 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
361static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
6d2010ae
A
362static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
363 protocol_family_t *);
364static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
365 const struct ifnet_demux_desc *, u_int32_t);
366static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
367static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
368static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
39236c6e
A
369 const struct sockaddr *, const char *, const char *);
370static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
371 const struct sockaddr *, const char *, const char *,
372 u_int32_t *, u_int32_t *);
6d2010ae
A
373static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
374static void ifp_if_free(struct ifnet *);
375static void ifp_if_event(struct ifnet *, const struct kev_msg *);
316670eb
A
376static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
377static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
6d2010ae 378
316670eb
A
379static void dlil_main_input_thread_func(void *, wait_result_t);
380static void dlil_input_thread_func(void *, wait_result_t);
381static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
6d2010ae 382static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
316670eb
A
383static void dlil_terminate_input_thread(struct dlil_threading_info *);
384static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
385 struct dlil_threading_info *, boolean_t);
386static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
387static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
388 u_int32_t, ifnet_model_t, boolean_t);
389static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
390 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
391
39236c6e
A
392#if DEBUG
393static void dlil_verify_sum16(void);
394#endif /* DEBUG */
395static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
396 protocol_family_t);
397static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
398 protocol_family_t);
399
316670eb
A
400static void ifnet_detacher_thread_func(void *, wait_result_t);
401static int ifnet_detacher_thread_cont(int);
6d2010ae
A
402static void ifnet_detach_final(struct ifnet *);
403static void ifnet_detaching_enqueue(struct ifnet *);
404static struct ifnet *ifnet_detaching_dequeue(void);
405
316670eb
A
406static void ifnet_start_thread_fn(void *, wait_result_t);
407static void ifnet_poll_thread_fn(void *, wait_result_t);
408static void ifnet_poll(struct ifnet *);
409
6d2010ae
A
410static void ifp_src_route_copyout(struct ifnet *, struct route *);
411static void ifp_src_route_copyin(struct ifnet *, struct route *);
412#if INET6
413static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
414static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
415#endif /* INET6 */
416
316670eb 417static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
39236c6e
A
418static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
419static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
420static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
421static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
422static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
316670eb
A
423static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
424static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
39236c6e
A
425static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
426static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
427static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
316670eb 428
6d2010ae
A
429/* The following are protected by dlil_ifnet_lock */
430static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
431static u_int32_t ifnet_detaching_cnt;
432static void *ifnet_delayed_run; /* wait channel for detaching thread */
433
39236c6e
A
434decl_lck_mtx_data(static, ifnet_fc_lock);
435
436static uint32_t ifnet_flowhash_seed;
437
438struct ifnet_flowhash_key {
439 char ifk_name[IFNAMSIZ];
440 uint32_t ifk_unit;
441 uint32_t ifk_flags;
442 uint32_t ifk_eflags;
443 uint32_t ifk_capabilities;
444 uint32_t ifk_capenable;
445 uint32_t ifk_output_sched_model;
446 uint32_t ifk_rand1;
447 uint32_t ifk_rand2;
448};
449
450/* Flow control entry per interface */
451struct ifnet_fc_entry {
452 RB_ENTRY(ifnet_fc_entry) ifce_entry;
453 u_int32_t ifce_flowhash;
454 struct ifnet *ifce_ifp;
455};
456
457static uint32_t ifnet_calc_flowhash(struct ifnet *);
458static int ifce_cmp(const struct ifnet_fc_entry *,
459 const struct ifnet_fc_entry *);
460static int ifnet_fc_add(struct ifnet *);
461static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
462static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
463
464/* protected by ifnet_fc_lock */
465RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
466RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
467RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
468
469static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
470static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
471
472#define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
473#define IFNET_FC_ZONE_MAX 32
474
6d2010ae
A
475extern void bpfdetach(struct ifnet*);
476extern void proto_input_run(void);
91447636 477
316670eb
A
478extern uint32_t udp_count_opportunistic(unsigned int ifindex,
479 u_int32_t flags);
480extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
481 u_int32_t flags);
482
6d2010ae 483__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
91447636 484
39236c6e
A
485#if CONFIG_MACF
486int dlil_lladdr_ckreq = 0;
487#endif
488
b0d623f7 489#if DEBUG
39236c6e 490int dlil_verbose = 1;
b0d623f7 491#else
39236c6e 492int dlil_verbose = 0;
b0d623f7 493#endif /* DEBUG */
6d2010ae 494#if IFNET_INPUT_SANITY_CHK
6d2010ae 495/* sanity checking of input packet lists received */
316670eb
A
496static u_int32_t dlil_input_sanity_check = 0;
497#endif /* IFNET_INPUT_SANITY_CHK */
498/* rate limit debug messages */
499struct timespec dlil_dbgrate = { 1, 0 };
b0d623f7 500
6d2010ae 501SYSCTL_DECL(_net_link_generic_system);
91447636 502
39236c6e
A
503#if CONFIG_MACF
504SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_lladdr_ckreq,
505 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_lladdr_ckreq, 0,
506 "Require MACF system info check to expose link-layer address");
507#endif
508
316670eb
A
509SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
510 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
511
512#define IF_SNDQ_MINLEN 32
513u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
514SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
515 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
516 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
517
518#define IF_RCVQ_MINLEN 32
519#define IF_RCVQ_MAXLEN 256
520u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
521SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
522 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
523 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
524
39236c6e 525#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
316670eb
A
526static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
527SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
528 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
529 "ilog2 of EWMA decay rate of avg inbound packets");
530
39236c6e
A
531#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
532#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
316670eb 533static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
39236c6e
A
534SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
535 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
536 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
537 "Q", "input poll mode freeze time");
316670eb 538
39236c6e
A
539#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
540#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
316670eb 541static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
39236c6e
A
542SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
543 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
544 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
545 "Q", "input poll sampling time");
546
547#define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
548#define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
549static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
550SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
551 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
552 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
553 "Q", "input poll interval (time)");
554
555#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
316670eb
A
556static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
557SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
558 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
559 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
560
39236c6e 561#define IF_RXPOLL_WLOWAT 10
316670eb 562static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
39236c6e
A
563SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
564 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
565 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
566 "I", "input poll wakeup low watermark");
316670eb 567
39236c6e 568#define IF_RXPOLL_WHIWAT 100
316670eb 569static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
39236c6e
A
570SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
571 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
572 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
573 "I", "input poll wakeup high watermark");
316670eb
A
574
575static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
576SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
577 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
578 "max packets per poll call");
579
580static u_int32_t if_rxpoll = 1;
581SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
582 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
583 sysctl_rxpoll, "I", "enable opportunistic input polling");
584
585u_int32_t if_bw_smoothing_val = 3;
586SYSCTL_UINT(_net_link_generic_system, OID_AUTO, if_bw_smoothing_val,
587 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_smoothing_val, 0, "");
588
589u_int32_t if_bw_measure_size = 10;
590SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_bw_measure_size,
591 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_measure_size, 0, "");
592
593static u_int32_t cur_dlil_input_threads = 0;
594SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
595 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads , 0,
596 "Current number of DLIL input threads");
91447636 597
6d2010ae 598#if IFNET_INPUT_SANITY_CHK
316670eb
A
599SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
600 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check , 0,
6d2010ae 601 "Turn on sanity checking in DLIL input");
316670eb 602#endif /* IFNET_INPUT_SANITY_CHK */
1c79356b 603
316670eb
A
604static u_int32_t if_flowadv = 1;
605SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
606 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
607 "enable flow-advisory mechanism");
608
39236c6e
A
609static uint64_t hwcksum_in_invalidated = 0;
610SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
611 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
612 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
613
614uint32_t hwcksum_dbg = 0;
615SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
616 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
617 "enable hardware cksum debugging");
618
619#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
620#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
621#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
622#define HWCKSUM_DBG_MASK \
623 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
624 HWCKSUM_DBG_FINALIZE_FORCED)
625
626static uint32_t hwcksum_dbg_mode = 0;
627SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
628 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
629 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
630
631static uint64_t hwcksum_dbg_partial_forced = 0;
632SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
633 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
634 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
635
636static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
637SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
638 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
639 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
640
641static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
642SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
643 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
644 &hwcksum_dbg_partial_rxoff_forced, 0,
645 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
646 "forced partial cksum rx offset");
647
648static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
649SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
650 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
651 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
652 "adjusted partial cksum rx offset");
653
654static uint64_t hwcksum_dbg_verified = 0;
655SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
656 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
657 &hwcksum_dbg_verified, "packets verified for having good checksum");
658
659static uint64_t hwcksum_dbg_bad_cksum = 0;
660SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
661 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
662 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
663
664static uint64_t hwcksum_dbg_bad_rxoff = 0;
665SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
666 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
667 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
668
669static uint64_t hwcksum_dbg_adjusted = 0;
670SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
671 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
672 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
673
674static uint64_t hwcksum_dbg_finalized_hdr = 0;
675SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
676 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
677 &hwcksum_dbg_finalized_hdr, "finalized headers");
678
679static uint64_t hwcksum_dbg_finalized_data = 0;
680SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
681 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
682 &hwcksum_dbg_finalized_data, "finalized payloads");
683
684uint32_t hwcksum_tx = 1;
685SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
686 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
687 "enable transmit hardware checksum offload");
688
689uint32_t hwcksum_rx = 1;
690SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
691 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
692 "enable receive hardware checksum offload");
693
316670eb 694unsigned int net_rxpoll = 1;
6d2010ae
A
695unsigned int net_affinity = 1;
696static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
1c79356b 697
b36670ce
A
698extern u_int32_t inject_buckets;
699
2d21ac55
A
700static lck_grp_attr_t *dlil_grp_attributes = NULL;
701static lck_attr_t *dlil_lck_attributes = NULL;
91447636 702
91447636 703
316670eb
A
704#define DLIL_INPUT_CHECK(m, ifp) { \
705 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
706 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
707 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
708 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
709 /* NOTREACHED */ \
710 } \
711}
712
713#define DLIL_EWMA(old, new, decay) do { \
714 u_int32_t _avg; \
715 if ((_avg = (old)) > 0) \
716 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
717 else \
718 _avg = (new); \
719 (old) = _avg; \
720} while (0)
721
722#define MBPS (1ULL * 1000 * 1000)
723#define GBPS (MBPS * 1000)
724
725struct rxpoll_time_tbl {
726 u_int64_t speed; /* downlink speed */
727 u_int32_t plowat; /* packets low watermark */
728 u_int32_t phiwat; /* packets high watermark */
729 u_int32_t blowat; /* bytes low watermark */
730 u_int32_t bhiwat; /* bytes high watermark */
731};
732
733static struct rxpoll_time_tbl rxpoll_tbl[] = {
734 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
735 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
736 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
737 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
738 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
739 { 0, 0, 0, 0, 0 }
740};
741
39236c6e 742int
b0d623f7 743proto_hash_value(u_int32_t protocol_family)
91447636 744{
4a3eedf9
A
745 /*
746 * dlil_proto_unplumb_all() depends on the mapping between
747 * the hash bucket index and the protocol family defined
748 * here; future changes must be applied there as well.
749 */
91447636
A
750 switch(protocol_family) {
751 case PF_INET:
6d2010ae 752 return (0);
91447636 753 case PF_INET6:
6d2010ae 754 return (1);
91447636 755 case PF_VLAN:
39236c6e 756 return (2);
6d2010ae 757 case PF_UNSPEC:
91447636 758 default:
39236c6e 759 return (3);
91447636
A
760 }
761}
762
6d2010ae
A
763/*
764 * Caller must already be holding ifnet lock.
765 */
766static struct if_proto *
b0d623f7 767find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
1c79356b 768{
91447636 769 struct if_proto *proto = NULL;
b0d623f7 770 u_int32_t i = proto_hash_value(protocol_family);
6d2010ae
A
771
772 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
773
774 if (ifp->if_proto_hash != NULL)
91447636 775 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6d2010ae
A
776
777 while (proto != NULL && proto->protocol_family != protocol_family)
91447636 778 proto = SLIST_NEXT(proto, next_hash);
6d2010ae
A
779
780 if (proto != NULL)
781 if_proto_ref(proto);
782
783 return (proto);
1c79356b
A
784}
785
91447636
A
786static void
787if_proto_ref(struct if_proto *proto)
1c79356b 788{
6d2010ae 789 atomic_add_32(&proto->refcount, 1);
1c79356b
A
790}
791
6d2010ae
A
792extern void if_rtproto_del(struct ifnet *ifp, int protocol);
793
91447636
A
794static void
795if_proto_free(struct if_proto *proto)
0b4e3aa0 796{
6d2010ae
A
797 u_int32_t oldval;
798 struct ifnet *ifp = proto->ifp;
799 u_int32_t proto_family = proto->protocol_family;
800 struct kev_dl_proto_data ev_pr_data;
801
802 oldval = atomic_add_32_ov(&proto->refcount, -1);
803 if (oldval > 1)
804 return;
805
806 /* No more reference on this, protocol must have been detached */
807 VERIFY(proto->detached);
808
809 if (proto->proto_kpi == kProtoKPI_v1) {
810 if (proto->kpi.v1.detached)
811 proto->kpi.v1.detached(ifp, proto->protocol_family);
812 }
813 if (proto->proto_kpi == kProtoKPI_v2) {
814 if (proto->kpi.v2.detached)
815 proto->kpi.v2.detached(ifp, proto->protocol_family);
91447636 816 }
6d2010ae
A
817
818 /*
819 * Cleanup routes that may still be in the routing table for that
820 * interface/protocol pair.
821 */
822 if_rtproto_del(ifp, proto_family);
823
824 /*
825 * The reserved field carries the number of protocol still attached
826 * (subject to change)
827 */
828 ifnet_lock_shared(ifp);
829 ev_pr_data.proto_family = proto_family;
830 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
831 ifnet_lock_done(ifp);
832
833 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
834 (struct net_event_data *)&ev_pr_data,
835 sizeof(struct kev_dl_proto_data));
836
837 zfree(dlif_proto_zone, proto);
0b4e3aa0
A
838}
839
91447636 840__private_extern__ void
6d2010ae 841ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
1c79356b 842{
6d2010ae
A
843 unsigned int type = 0;
844 int ass = 1;
845
846 switch (what) {
847 case IFNET_LCK_ASSERT_EXCLUSIVE:
848 type = LCK_RW_ASSERT_EXCLUSIVE;
849 break;
850
851 case IFNET_LCK_ASSERT_SHARED:
852 type = LCK_RW_ASSERT_SHARED;
853 break;
854
855 case IFNET_LCK_ASSERT_OWNED:
856 type = LCK_RW_ASSERT_HELD;
857 break;
858
859 case IFNET_LCK_ASSERT_NOTOWNED:
860 /* nothing to do here for RW lock; bypass assert */
861 ass = 0;
862 break;
863
864 default:
865 panic("bad ifnet assert type: %d", what);
866 /* NOTREACHED */
867 }
868 if (ass)
869 lck_rw_assert(&ifp->if_lock, type);
1c79356b
A
870}
871
91447636 872__private_extern__ void
6d2010ae 873ifnet_lock_shared(struct ifnet *ifp)
1c79356b 874{
6d2010ae 875 lck_rw_lock_shared(&ifp->if_lock);
1c79356b
A
876}
877
91447636 878__private_extern__ void
6d2010ae 879ifnet_lock_exclusive(struct ifnet *ifp)
0b4e3aa0 880{
6d2010ae 881 lck_rw_lock_exclusive(&ifp->if_lock);
0b4e3aa0
A
882}
883
91447636 884__private_extern__ void
6d2010ae 885ifnet_lock_done(struct ifnet *ifp)
1c79356b 886{
6d2010ae 887 lck_rw_done(&ifp->if_lock);
1c79356b
A
888}
889
39236c6e
A
890#if INET6
891__private_extern__ void
892if_inet6data_lock_shared(struct ifnet *ifp)
893{
894 lck_rw_lock_shared(&ifp->if_inet6data_lock);
895}
896
897__private_extern__ void
898if_inet6data_lock_exclusive(struct ifnet *ifp)
899{
900 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
901}
902
903__private_extern__ void
904if_inet6data_lock_done(struct ifnet *ifp)
905{
906 lck_rw_done(&ifp->if_inet6data_lock);
907}
908#endif
909
91447636 910__private_extern__ void
2d21ac55 911ifnet_head_lock_shared(void)
1c79356b 912{
6d2010ae 913 lck_rw_lock_shared(&ifnet_head_lock);
1c79356b
A
914}
915
91447636 916__private_extern__ void
2d21ac55 917ifnet_head_lock_exclusive(void)
91447636 918{
6d2010ae 919 lck_rw_lock_exclusive(&ifnet_head_lock);
91447636 920}
1c79356b 921
91447636 922__private_extern__ void
2d21ac55 923ifnet_head_done(void)
1c79356b 924{
6d2010ae 925 lck_rw_done(&ifnet_head_lock);
91447636 926}
1c79356b 927
6d2010ae
A
928/*
929 * Caller must already be holding ifnet lock.
930 */
931static int
932dlil_ifp_proto_count(struct ifnet * ifp)
91447636 933{
6d2010ae
A
934 int i, count = 0;
935
936 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
937
938 if (ifp->if_proto_hash == NULL)
939 goto done;
940
941 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
942 struct if_proto *proto;
943 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
944 count++;
91447636
A
945 }
946 }
6d2010ae
A
947done:
948 return (count);
91447636 949}
1c79356b 950
91447636 951__private_extern__ void
6d2010ae
A
952dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
953 u_int32_t event_code, struct net_event_data *event_data,
954 u_int32_t event_data_len)
91447636 955{
6d2010ae
A
956 struct net_event_data ev_data;
957 struct kev_msg ev_msg;
958
959 bzero(&ev_msg, sizeof (ev_msg));
960 bzero(&ev_data, sizeof (ev_data));
961 /*
2d21ac55 962 * a net event always starts with a net_event_data structure
91447636
A
963 * but the caller can generate a simple net event or
964 * provide a longer event structure to post
965 */
6d2010ae
A
966 ev_msg.vendor_code = KEV_VENDOR_APPLE;
967 ev_msg.kev_class = KEV_NETWORK_CLASS;
968 ev_msg.kev_subclass = event_subclass;
969 ev_msg.event_code = event_code;
970
971 if (event_data == NULL) {
91447636
A
972 event_data = &ev_data;
973 event_data_len = sizeof(struct net_event_data);
974 }
6d2010ae 975
91447636
A
976 strncpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
977 event_data->if_family = ifp->if_family;
b0d623f7 978 event_data->if_unit = (u_int32_t) ifp->if_unit;
6d2010ae 979
91447636 980 ev_msg.dv[0].data_length = event_data_len;
6d2010ae 981 ev_msg.dv[0].data_ptr = event_data;
91447636 982 ev_msg.dv[1].data_length = 0;
6d2010ae 983
91447636 984 dlil_event_internal(ifp, &ev_msg);
1c79356b
A
985}
986
316670eb
A
987__private_extern__ int
988dlil_alloc_local_stats(struct ifnet *ifp)
989{
990 int ret = EINVAL;
991 void *buf, *base, **pbuf;
992
993 if (ifp == NULL)
994 goto end;
995
996 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
997 /* allocate tcpstat_local structure */
998 buf = zalloc(dlif_tcpstat_zone);
999 if (buf == NULL) {
1000 ret = ENOMEM;
1001 goto end;
1002 }
1003 bzero(buf, dlif_tcpstat_bufsize);
1004
1005 /* Get the 64-bit aligned base address for this object */
1006 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1007 sizeof (u_int64_t));
1008 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1009 ((intptr_t)buf + dlif_tcpstat_bufsize));
1010
1011 /*
1012 * Wind back a pointer size from the aligned base and
1013 * save the original address so we can free it later.
1014 */
1015 pbuf = (void **)((intptr_t)base - sizeof (void *));
1016 *pbuf = buf;
1017 ifp->if_tcp_stat = base;
1018
1019 /* allocate udpstat_local structure */
1020 buf = zalloc(dlif_udpstat_zone);
1021 if (buf == NULL) {
1022 ret = ENOMEM;
1023 goto end;
1024 }
1025 bzero(buf, dlif_udpstat_bufsize);
1026
1027 /* Get the 64-bit aligned base address for this object */
1028 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1029 sizeof (u_int64_t));
1030 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1031 ((intptr_t)buf + dlif_udpstat_bufsize));
1032
1033 /*
1034 * Wind back a pointer size from the aligned base and
1035 * save the original address so we can free it later.
1036 */
1037 pbuf = (void **)((intptr_t)base - sizeof (void *));
1038 *pbuf = buf;
1039 ifp->if_udp_stat = base;
1040
1041 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
1042 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
1043
1044 ret = 0;
1045 }
1046
1047end:
1048 if (ret != 0) {
1049 if (ifp->if_tcp_stat != NULL) {
1050 pbuf = (void **)
1051 ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
1052 zfree(dlif_tcpstat_zone, *pbuf);
1053 ifp->if_tcp_stat = NULL;
1054 }
1055 if (ifp->if_udp_stat != NULL) {
1056 pbuf = (void **)
1057 ((intptr_t)ifp->if_udp_stat - sizeof (void *));
1058 zfree(dlif_udpstat_zone, *pbuf);
1059 ifp->if_udp_stat = NULL;
1060 }
1061 }
1062
1063 return (ret);
1064}
1065
6d2010ae 1066static int
316670eb 1067dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
2d21ac55 1068{
316670eb
A
1069 thread_continue_t func;
1070 u_int32_t limit;
2d21ac55
A
1071 int error;
1072
316670eb
A
1073 /* NULL ifp indicates the main input thread, called at dlil_init time */
1074 if (ifp == NULL) {
1075 func = dlil_main_input_thread_func;
1076 VERIFY(inp == dlil_main_input_thread);
1077 (void) strlcat(inp->input_name,
1078 "main_input", DLIL_THREADNAME_LEN);
1079 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1080 func = dlil_rxpoll_input_thread_func;
1081 VERIFY(inp != dlil_main_input_thread);
1082 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
39236c6e 1083 "%s_input_poll", if_name(ifp));
6d2010ae 1084 } else {
316670eb
A
1085 func = dlil_input_thread_func;
1086 VERIFY(inp != dlil_main_input_thread);
1087 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
39236c6e 1088 "%s_input", if_name(ifp));
6d2010ae 1089 }
316670eb 1090 VERIFY(inp->input_thr == THREAD_NULL);
2d21ac55 1091
316670eb
A
1092 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1093 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1094
1095 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1096 inp->ifp = ifp; /* NULL for main input thread */
1097
1098 net_timerclear(&inp->mode_holdtime);
1099 net_timerclear(&inp->mode_lasttime);
1100 net_timerclear(&inp->sample_holdtime);
1101 net_timerclear(&inp->sample_lasttime);
1102 net_timerclear(&inp->dbg_lasttime);
1103
1104 /*
1105 * For interfaces that support opportunistic polling, set the
1106 * low and high watermarks for outstanding inbound packets/bytes.
1107 * Also define freeze times for transitioning between modes
1108 * and updating the average.
1109 */
1110 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1111 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
39236c6e 1112 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
316670eb
A
1113 } else {
1114 limit = (u_int32_t)-1;
1115 }
1116
1117 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit);
1118 if (inp == dlil_main_input_thread) {
1119 struct dlil_main_threading_info *inpm =
1120 (struct dlil_main_threading_info *)inp;
1121 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit);
1122 }
2d21ac55 1123
316670eb
A
1124 error = kernel_thread_start(func, inp, &inp->input_thr);
1125 if (error == KERN_SUCCESS) {
1126 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
6d2010ae 1127 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
2d21ac55 1128 /*
316670eb
A
1129 * We create an affinity set so that the matching workloop
1130 * thread or the starter thread (for loopback) can be
1131 * scheduled on the same processor set as the input thread.
2d21ac55 1132 */
316670eb
A
1133 if (net_affinity) {
1134 struct thread *tp = inp->input_thr;
2d21ac55
A
1135 u_int32_t tag;
1136 /*
1137 * Randomize to reduce the probability
1138 * of affinity tag namespace collision.
1139 */
1140 read_random(&tag, sizeof (tag));
1141 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1142 thread_reference(tp);
316670eb
A
1143 inp->tag = tag;
1144 inp->net_affinity = TRUE;
2d21ac55
A
1145 }
1146 }
316670eb
A
1147 } else if (inp == dlil_main_input_thread) {
1148 panic_plain("%s: couldn't create main input thread", __func__);
1149 /* NOTREACHED */
2d21ac55 1150 } else {
39236c6e
A
1151 panic_plain("%s: couldn't create %s input thread", __func__,
1152 if_name(ifp));
6d2010ae 1153 /* NOTREACHED */
2d21ac55 1154 }
b0d623f7 1155 OSAddAtomic(1, &cur_dlil_input_threads);
316670eb 1156
6d2010ae 1157 return (error);
2d21ac55
A
1158}
1159
316670eb
A
1160static void
1161dlil_terminate_input_thread(struct dlil_threading_info *inp)
1162{
1163 struct ifnet *ifp;
1164
1165 VERIFY(current_thread() == inp->input_thr);
1166 VERIFY(inp != dlil_main_input_thread);
1167
1168 OSAddAtomic(-1, &cur_dlil_input_threads);
1169
1170 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1171 lck_grp_free(inp->lck_grp);
1172
1173 inp->input_waiting = 0;
1174 inp->wtot = 0;
1175 bzero(inp->input_name, sizeof (inp->input_name));
1176 ifp = inp->ifp;
1177 inp->ifp = NULL;
1178 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1179 qlimit(&inp->rcvq_pkts) = 0;
1180 bzero(&inp->stats, sizeof (inp->stats));
1181
1182 VERIFY(!inp->net_affinity);
1183 inp->input_thr = THREAD_NULL;
1184 VERIFY(inp->wloop_thr == THREAD_NULL);
1185 VERIFY(inp->poll_thr == THREAD_NULL);
1186 VERIFY(inp->tag == 0);
1187
1188 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1189 bzero(&inp->tstats, sizeof (inp->tstats));
1190 bzero(&inp->pstats, sizeof (inp->pstats));
1191 bzero(&inp->sstats, sizeof (inp->sstats));
1192
1193 net_timerclear(&inp->mode_holdtime);
1194 net_timerclear(&inp->mode_lasttime);
1195 net_timerclear(&inp->sample_holdtime);
1196 net_timerclear(&inp->sample_lasttime);
1197 net_timerclear(&inp->dbg_lasttime);
1198
1199#if IFNET_INPUT_SANITY_CHK
1200 inp->input_mbuf_cnt = 0;
1201#endif /* IFNET_INPUT_SANITY_CHK */
1202
1203 if (dlil_verbose) {
39236c6e
A
1204 printf("%s: input thread terminated\n",
1205 if_name(ifp));
316670eb
A
1206 }
1207
1208 /* for the extra refcnt from kernel_thread_start() */
1209 thread_deallocate(current_thread());
1210
1211 /* this is the end */
1212 thread_terminate(current_thread());
1213 /* NOTREACHED */
1214}
1215
2d21ac55
A
1216static kern_return_t
1217dlil_affinity_set(struct thread *tp, u_int32_t tag)
1218{
1219 thread_affinity_policy_data_t policy;
1220
1221 bzero(&policy, sizeof (policy));
1222 policy.affinity_tag = tag;
1223 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1224 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
1225}
1226
91447636
A
1227void
1228dlil_init(void)
1229{
6d2010ae
A
1230 thread_t thread = THREAD_NULL;
1231
1232 /*
1233 * The following fields must be 64-bit aligned for atomic operations.
1234 */
1235 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1236 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
1237 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1238 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1239 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1240 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1241 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1242 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1243 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1244 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1245 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1246 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1247 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1248 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1249 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1250
1251 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1252 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
1253 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1254 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1255 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1256 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1257 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1258 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1259 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1260 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1261 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1262 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
39236c6e
A
1263 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1264 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1265 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
6d2010ae
A
1266
1267 /*
1268 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1269 */
1270 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1271 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1272 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1273 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1274 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
39236c6e
A
1275 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1276 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1277 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1278 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
6d2010ae
A
1279 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1280 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1281 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1282 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1283
39236c6e
A
1284 /*
1285 * ... as well as the mbuf checksum flags counterparts.
1286 */
1287 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1288 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1289 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1290 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1291 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1292 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1293 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1294 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1295 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1296 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1297
6d2010ae
A
1298 /*
1299 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1300 */
1301 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
316670eb 1302 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
6d2010ae 1303
39236c6e
A
1304 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1305 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1306 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1307 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1308
1309 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1310 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1311 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1312
1313 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1314 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1315 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1316 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1317 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1318 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1319 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1320 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1321 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1322 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1323 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1324 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1325 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1326 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1327 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1328 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1329
1330 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1331 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1332 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1333 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1334 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1335
1336 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1337 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1338
6d2010ae
A
1339 PE_parse_boot_argn("net_affinity", &net_affinity,
1340 sizeof (net_affinity));
b0d623f7 1341
316670eb
A
1342 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
1343
d1ecb069 1344 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
6d2010ae
A
1345
1346 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
1347
1348 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
1349 sizeof (struct dlil_ifnet_dbg);
1350 /* Enforce 64-bit alignment for dlil_ifnet structure */
1351 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
1352 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
1353 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1354 0, DLIF_ZONE_NAME);
1355 if (dlif_zone == NULL) {
316670eb
A
1356 panic_plain("%s: failed allocating %s", __func__,
1357 DLIF_ZONE_NAME);
6d2010ae
A
1358 /* NOTREACHED */
1359 }
1360 zone_change(dlif_zone, Z_EXPAND, TRUE);
1361 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1362
1363 dlif_filt_size = sizeof (struct ifnet_filter);
1364 dlif_filt_zone = zinit(dlif_filt_size,
1365 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1366 if (dlif_filt_zone == NULL) {
316670eb 1367 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1368 DLIF_FILT_ZONE_NAME);
1369 /* NOTREACHED */
1370 }
1371 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1372 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1373
6d2010ae
A
1374 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
1375 dlif_phash_zone = zinit(dlif_phash_size,
1376 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1377 if (dlif_phash_zone == NULL) {
316670eb 1378 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1379 DLIF_PHASH_ZONE_NAME);
1380 /* NOTREACHED */
1381 }
1382 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1383 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1384
1385 dlif_proto_size = sizeof (struct if_proto);
1386 dlif_proto_zone = zinit(dlif_proto_size,
1387 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1388 if (dlif_proto_zone == NULL) {
316670eb 1389 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1390 DLIF_PROTO_ZONE_NAME);
1391 /* NOTREACHED */
1392 }
1393 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1394 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1395
316670eb
A
1396 dlif_tcpstat_size = sizeof (struct tcpstat_local);
1397 /* Enforce 64-bit alignment for tcpstat_local structure */
1398 dlif_tcpstat_bufsize =
1399 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
1400 dlif_tcpstat_bufsize =
1401 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
1402 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1403 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1404 DLIF_TCPSTAT_ZONE_NAME);
1405 if (dlif_tcpstat_zone == NULL) {
1406 panic_plain("%s: failed allocating %s", __func__,
1407 DLIF_TCPSTAT_ZONE_NAME);
1408 /* NOTREACHED */
1409 }
1410 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1411 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1412
1413 dlif_udpstat_size = sizeof (struct udpstat_local);
1414 /* Enforce 64-bit alignment for udpstat_local structure */
1415 dlif_udpstat_bufsize =
1416 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
1417 dlif_udpstat_bufsize =
1418 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
1419 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1420 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1421 DLIF_UDPSTAT_ZONE_NAME);
1422 if (dlif_udpstat_zone == NULL) {
1423 panic_plain("%s: failed allocating %s", __func__,
1424 DLIF_UDPSTAT_ZONE_NAME);
1425 /* NOTREACHED */
1426 }
1427 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1428 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1429
6d2010ae 1430 ifnet_llreach_init();
d1ecb069 1431
91447636 1432 TAILQ_INIT(&dlil_ifnet_head);
91447636 1433 TAILQ_INIT(&ifnet_head);
6d2010ae
A
1434 TAILQ_INIT(&ifnet_detaching_head);
1435
91447636 1436 /* Setup the lock groups we will use */
2d21ac55 1437 dlil_grp_attributes = lck_grp_attr_alloc_init();
91447636 1438
316670eb 1439 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
6d2010ae
A
1440 dlil_grp_attributes);
1441 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1442 dlil_grp_attributes);
1443 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1444 dlil_grp_attributes);
316670eb
A
1445 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1446 dlil_grp_attributes);
1447 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
6d2010ae
A
1448 dlil_grp_attributes);
1449
91447636 1450 /* Setup the lock attributes we will use */
2d21ac55 1451 dlil_lck_attributes = lck_attr_alloc_init();
6d2010ae 1452
91447636 1453 ifnet_lock_attr = lck_attr_alloc_init();
6d2010ae
A
1454
1455 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1456 dlil_lck_attributes);
1457 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1458
39236c6e
A
1459 /* Setup interface flow control related items */
1460 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
316670eb 1461
39236c6e
A
1462 ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry);
1463 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1464 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1465 if (ifnet_fc_zone == NULL) {
1466 panic_plain("%s: failed allocating %s", __func__,
1467 IFNET_FC_ZONE_NAME);
1468 /* NOTREACHED */
1469 }
1470 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1471 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
6d2010ae 1472
39236c6e 1473 /* Initialize interface address subsystem */
6d2010ae 1474 ifa_init();
39236c6e
A
1475
1476#if PF
1477 /* Initialize the packet filter */
1478 pfinit();
1479#endif /* PF */
1480
1481 /* Initialize queue algorithms */
1482 classq_init();
1483
1484 /* Initialize packet schedulers */
1485 pktsched_init();
1486
1487 /* Initialize flow advisory subsystem */
1488 flowadv_init();
1489
1490 /* Initialize the pktap virtual interface */
1491 pktap_init();
1492
1493#if DEBUG
1494 /* Run self-tests */
1495 dlil_verify_sum16();
1496#endif /* DEBUG */
1497
91447636 1498 /*
316670eb
A
1499 * Create and start up the main DLIL input thread and the interface
1500 * detacher threads once everything is initialized.
91447636 1501 */
316670eb 1502 dlil_create_input_thread(NULL, dlil_main_input_thread);
2d21ac55 1503
316670eb
A
1504 if (kernel_thread_start(ifnet_detacher_thread_func,
1505 NULL, &thread) != KERN_SUCCESS) {
1506 panic_plain("%s: couldn't create detacher thread", __func__);
6d2010ae
A
1507 /* NOTREACHED */
1508 }
b0d623f7 1509 thread_deallocate(thread);
91447636 1510}
1c79356b 1511
6d2010ae
A
1512static void
1513if_flt_monitor_busy(struct ifnet *ifp)
1514{
1515 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1516
1517 ++ifp->if_flt_busy;
1518 VERIFY(ifp->if_flt_busy != 0);
1519}
1520
1521static void
1522if_flt_monitor_unbusy(struct ifnet *ifp)
1523{
1524 if_flt_monitor_leave(ifp);
1525}
1526
1527static void
1528if_flt_monitor_enter(struct ifnet *ifp)
1529{
1530 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1531
1532 while (ifp->if_flt_busy) {
1533 ++ifp->if_flt_waiters;
1534 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1535 (PZERO - 1), "if_flt_monitor", NULL);
1536 }
1537 if_flt_monitor_busy(ifp);
1538}
1539
1540static void
1541if_flt_monitor_leave(struct ifnet *ifp)
1542{
1543 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1544
1545 VERIFY(ifp->if_flt_busy != 0);
1546 --ifp->if_flt_busy;
1547
1548 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1549 ifp->if_flt_waiters = 0;
1550 wakeup(&ifp->if_flt_head);
1551 }
1552}
1553
2d21ac55 1554__private_extern__ int
6d2010ae 1555dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
39236c6e 1556 interface_filter_t *filter_ref, u_int32_t flags)
6d2010ae
A
1557{
1558 int retval = 0;
1559 struct ifnet_filter *filter = NULL;
9bccf70c 1560
6d2010ae
A
1561 ifnet_head_lock_shared();
1562 /* Check that the interface is in the global list */
1563 if (!ifnet_lookup(ifp)) {
1564 retval = ENXIO;
1565 goto done;
1566 }
1567
1568 filter = zalloc(dlif_filt_zone);
1569 if (filter == NULL) {
1570 retval = ENOMEM;
1571 goto done;
1572 }
1573 bzero(filter, dlif_filt_size);
1574
1575 /* refcnt held above during lookup */
39236c6e 1576 filter->filt_flags = flags;
91447636
A
1577 filter->filt_ifp = ifp;
1578 filter->filt_cookie = if_filter->iff_cookie;
1579 filter->filt_name = if_filter->iff_name;
1580 filter->filt_protocol = if_filter->iff_protocol;
1581 filter->filt_input = if_filter->iff_input;
1582 filter->filt_output = if_filter->iff_output;
1583 filter->filt_event = if_filter->iff_event;
1584 filter->filt_ioctl = if_filter->iff_ioctl;
1585 filter->filt_detached = if_filter->iff_detached;
6d2010ae
A
1586
1587 lck_mtx_lock(&ifp->if_flt_lock);
1588 if_flt_monitor_enter(ifp);
1589
1590 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636 1591 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
6d2010ae
A
1592
1593 if_flt_monitor_leave(ifp);
1594 lck_mtx_unlock(&ifp->if_flt_lock);
1595
91447636 1596 *filter_ref = filter;
b0d623f7
A
1597
1598 /*
1599 * Bump filter count and route_generation ID to let TCP
1600 * know it shouldn't do TSO on this connection
1601 */
39236c6e
A
1602 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1603 OSAddAtomic(1, &dlil_filter_disable_tso_count);
b0d623f7 1604 routegenid_update();
39236c6e 1605 }
6d2010ae 1606 if (dlil_verbose) {
39236c6e
A
1607 printf("%s: %s filter attached\n", if_name(ifp),
1608 if_filter->iff_name);
6d2010ae
A
1609 }
1610done:
1611 ifnet_head_done();
1612 if (retval != 0 && ifp != NULL) {
39236c6e
A
1613 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1614 if_name(ifp), if_filter->iff_name, retval);
6d2010ae
A
1615 }
1616 if (retval != 0 && filter != NULL)
1617 zfree(dlif_filt_zone, filter);
1618
1619 return (retval);
1c79356b
A
1620}
1621
91447636 1622static int
6d2010ae 1623dlil_detach_filter_internal(interface_filter_t filter, int detached)
1c79356b 1624{
91447636 1625 int retval = 0;
6d2010ae 1626
3a60a9f5 1627 if (detached == 0) {
6d2010ae
A
1628 ifnet_t ifp = NULL;
1629
3a60a9f5
A
1630 ifnet_head_lock_shared();
1631 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6d2010ae
A
1632 interface_filter_t entry = NULL;
1633
1634 lck_mtx_lock(&ifp->if_flt_lock);
3a60a9f5 1635 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
6d2010ae
A
1636 if (entry != filter || entry->filt_skip)
1637 continue;
1638 /*
1639 * We've found a match; since it's possible
1640 * that the thread gets blocked in the monitor,
1641 * we do the lock dance. Interface should
1642 * not be detached since we still have a use
1643 * count held during filter attach.
1644 */
1645 entry->filt_skip = 1; /* skip input/output */
1646 lck_mtx_unlock(&ifp->if_flt_lock);
1647 ifnet_head_done();
1648
1649 lck_mtx_lock(&ifp->if_flt_lock);
1650 if_flt_monitor_enter(ifp);
1651 lck_mtx_assert(&ifp->if_flt_lock,
1652 LCK_MTX_ASSERT_OWNED);
1653
1654 /* Remove the filter from the list */
1655 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1656 filt_next);
1657
1658 if_flt_monitor_leave(ifp);
1659 lck_mtx_unlock(&ifp->if_flt_lock);
1660 if (dlil_verbose) {
39236c6e
A
1661 printf("%s: %s filter detached\n",
1662 if_name(ifp), filter->filt_name);
6d2010ae
A
1663 }
1664 goto destroy;
3a60a9f5 1665 }
6d2010ae 1666 lck_mtx_unlock(&ifp->if_flt_lock);
3a60a9f5
A
1667 }
1668 ifnet_head_done();
6d2010ae
A
1669
1670 /* filter parameter is not a valid filter ref */
1671 retval = EINVAL;
1672 goto done;
3a60a9f5 1673 }
6d2010ae
A
1674
1675 if (dlil_verbose)
1676 printf("%s filter detached\n", filter->filt_name);
1677
1678destroy:
1679
1680 /* Call the detached function if there is one */
91447636
A
1681 if (filter->filt_detached)
1682 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
9bccf70c 1683
3a60a9f5 1684 /* Free the filter */
6d2010ae
A
1685 zfree(dlif_filt_zone, filter);
1686
b0d623f7
A
1687 /*
1688 * Decrease filter count and route_generation ID to let TCP
1689 * know it should reevalute doing TSO or not
1690 */
39236c6e
A
1691 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1692 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
b0d623f7 1693 routegenid_update();
39236c6e 1694 }
6d2010ae
A
1695done:
1696 if (retval != 0) {
1697 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1698 filter->filt_name, retval);
1699 }
1700 return (retval);
1c79356b
A
1701}
1702
2d21ac55 1703__private_extern__ void
91447636
A
1704dlil_detach_filter(interface_filter_t filter)
1705{
3a60a9f5
A
1706 if (filter == NULL)
1707 return;
91447636
A
1708 dlil_detach_filter_internal(filter, 0);
1709}
1c79356b 1710
316670eb
A
1711/*
1712 * Main input thread:
1713 *
1714 * a) handles all inbound packets for lo0
1715 * b) handles all inbound packets for interfaces with no dedicated
1716 * input thread (e.g. anything but Ethernet/PDP or those that support
1717 * opportunistic polling.)
1718 * c) protocol registrations
1719 * d) packet injections
1720 */
91447636 1721static void
316670eb 1722dlil_main_input_thread_func(void *v, wait_result_t w)
91447636 1723{
316670eb
A
1724#pragma unused(w)
1725 struct dlil_main_threading_info *inpm = v;
1726 struct dlil_threading_info *inp = v;
1727
1728 VERIFY(inp == dlil_main_input_thread);
1729 VERIFY(inp->ifp == NULL);
1730 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1731
91447636 1732 while (1) {
2d21ac55 1733 struct mbuf *m = NULL, *m_loop = NULL;
316670eb
A
1734 u_int32_t m_cnt, m_cnt_loop;
1735 boolean_t proto_req;
6d2010ae 1736
316670eb 1737 lck_mtx_lock_spin(&inp->input_lck);
6d2010ae 1738
2d21ac55 1739 /* Wait until there is work to be done */
316670eb
A
1740 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1741 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1742 (void) msleep(&inp->input_waiting, &inp->input_lck,
1743 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2d21ac55
A
1744 }
1745
316670eb
A
1746 inp->input_waiting |= DLIL_INPUT_RUNNING;
1747 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2d21ac55 1748
316670eb
A
1749 /* Main input thread cannot be terminated */
1750 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
2d21ac55 1751
316670eb
A
1752 proto_req = (inp->input_waiting &
1753 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
6d2010ae 1754
316670eb
A
1755 /* Packets for non-dedicated interfaces other than lo0 */
1756 m_cnt = qlen(&inp->rcvq_pkts);
1757 m = _getq_all(&inp->rcvq_pkts);
6d2010ae 1758
39236c6e 1759 /* Packets exclusive to lo0 */
316670eb
A
1760 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
1761 m_loop = _getq_all(&inpm->lo_rcvq_pkts);
6d2010ae 1762
316670eb 1763 inp->wtot = 0;
6d2010ae 1764
316670eb 1765 lck_mtx_unlock(&inp->input_lck);
6d2010ae 1766
316670eb
A
1767 /*
1768 * NOTE warning %%% attention !!!!
1769 * We should think about putting some thread starvation
1770 * safeguards if we deal with long chains of packets.
1771 */
1772 if (m_loop != NULL)
1773 dlil_input_packet_list_extended(lo_ifp, m_loop,
1774 m_cnt_loop, inp->mode);
6d2010ae 1775
316670eb
A
1776 if (m != NULL)
1777 dlil_input_packet_list_extended(NULL, m,
1778 m_cnt, inp->mode);
1779
1780 if (proto_req)
1781 proto_input_run();
1782 }
1783
1784 /* NOTREACHED */
1785 VERIFY(0); /* we should never get here */
1786}
1787
1788/*
1789 * Input thread for interfaces with legacy input model.
1790 */
1791static void
1792dlil_input_thread_func(void *v, wait_result_t w)
1793{
1794#pragma unused(w)
1795 struct dlil_threading_info *inp = v;
1796 struct ifnet *ifp = inp->ifp;
1797
1798 VERIFY(inp != dlil_main_input_thread);
1799 VERIFY(ifp != NULL);
1800 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
1801 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2d21ac55 1802
316670eb
A
1803 while (1) {
1804 struct mbuf *m = NULL;
1805 u_int32_t m_cnt;
1806
1807 lck_mtx_lock_spin(&inp->input_lck);
2d21ac55 1808
316670eb
A
1809 /* Wait until there is work to be done */
1810 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1811 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1812 (void) msleep(&inp->input_waiting, &inp->input_lck,
1813 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2d21ac55
A
1814 }
1815
316670eb
A
1816 inp->input_waiting |= DLIL_INPUT_RUNNING;
1817 inp->input_waiting &= ~DLIL_INPUT_WAITING;
6d2010ae 1818
316670eb
A
1819 /*
1820 * Protocol registration and injection must always use
1821 * the main input thread; in theory the latter can utilize
1822 * the corresponding input thread where the packet arrived
1823 * on, but that requires our knowing the interface in advance
1824 * (and the benefits might not worth the trouble.)
1825 */
1826 VERIFY(!(inp->input_waiting &
1827 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
6d2010ae 1828
316670eb
A
1829 /* Packets for this interface */
1830 m_cnt = qlen(&inp->rcvq_pkts);
1831 m = _getq_all(&inp->rcvq_pkts);
6d2010ae 1832
316670eb
A
1833 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
1834 lck_mtx_unlock(&inp->input_lck);
1835
1836 /* Free up pending packets */
1837 if (m != NULL)
1838 mbuf_freem_list(m);
1839
1840 dlil_terminate_input_thread(inp);
1841 /* NOTREACHED */
1842 return;
2d21ac55
A
1843 }
1844
316670eb
A
1845 inp->wtot = 0;
1846
1847 dlil_input_stats_sync(ifp, inp);
1848
1849 lck_mtx_unlock(&inp->input_lck);
2d21ac55 1850
91447636
A
1851 /*
1852 * NOTE warning %%% attention !!!!
6d2010ae
A
1853 * We should think about putting some thread starvation
1854 * safeguards if we deal with long chains of packets.
91447636 1855 */
6d2010ae 1856 if (m != NULL)
316670eb
A
1857 dlil_input_packet_list_extended(NULL, m,
1858 m_cnt, inp->mode);
2d21ac55 1859 }
316670eb
A
1860
1861 /* NOTREACHED */
1862 VERIFY(0); /* we should never get here */
2d21ac55
A
1863}
1864
316670eb
A
1865/*
1866 * Input thread for interfaces with opportunistic polling input model.
1867 */
1868static void
1869dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2d21ac55 1870{
316670eb
A
1871#pragma unused(w)
1872 struct dlil_threading_info *inp = v;
1873 struct ifnet *ifp = inp->ifp;
1874 struct timespec ts;
2d21ac55 1875
316670eb
A
1876 VERIFY(inp != dlil_main_input_thread);
1877 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
2d21ac55 1878
2d21ac55 1879 while (1) {
316670eb
A
1880 struct mbuf *m = NULL;
1881 u_int32_t m_cnt, m_size, poll_req = 0;
1882 ifnet_model_t mode;
1883 struct timespec now, delta;
39236c6e 1884 u_int64_t ival;
6d2010ae 1885
316670eb 1886 lck_mtx_lock_spin(&inp->input_lck);
6d2010ae 1887
39236c6e
A
1888 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
1889 ival = IF_RXPOLL_INTERVALTIME_MIN;
1890
316670eb
A
1891 /* Link parameters changed? */
1892 if (ifp->if_poll_update != 0) {
1893 ifp->if_poll_update = 0;
39236c6e 1894 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
91447636 1895 }
1c79356b 1896
316670eb
A
1897 /* Current operating mode */
1898 mode = inp->mode;
1c79356b 1899
316670eb 1900 /* Wait until there is work to be done */
39236c6e 1901 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
316670eb
A
1902 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1903 (void) msleep(&inp->input_waiting, &inp->input_lck,
1904 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1905 }
2d21ac55 1906
316670eb
A
1907 inp->input_waiting |= DLIL_INPUT_RUNNING;
1908 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2d21ac55
A
1909
1910 /*
316670eb
A
1911 * Protocol registration and injection must always use
1912 * the main input thread; in theory the latter can utilize
1913 * the corresponding input thread where the packet arrived
1914 * on, but that requires our knowing the interface in advance
1915 * (and the benefits might not worth the trouble.)
2d21ac55 1916 */
316670eb
A
1917 VERIFY(!(inp->input_waiting &
1918 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2d21ac55 1919
316670eb
A
1920 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
1921 /* Free up pending packets */
1922 _flushq(&inp->rcvq_pkts);
1923 lck_mtx_unlock(&inp->input_lck);
2d21ac55 1924
316670eb
A
1925 dlil_terminate_input_thread(inp);
1926 /* NOTREACHED */
1927 return;
2d21ac55 1928 }
2d21ac55 1929
316670eb
A
1930 /* Total count of all packets */
1931 m_cnt = qlen(&inp->rcvq_pkts);
1932
1933 /* Total bytes of all packets */
1934 m_size = qsize(&inp->rcvq_pkts);
1935
1936 /* Packets for this interface */
1937 m = _getq_all(&inp->rcvq_pkts);
1938 VERIFY(m != NULL || m_cnt == 0);
1939
1940 nanouptime(&now);
1941 if (!net_timerisset(&inp->sample_lasttime))
1942 *(&inp->sample_lasttime) = *(&now);
1943
1944 net_timersub(&now, &inp->sample_lasttime, &delta);
1945 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
1946 u_int32_t ptot, btot;
1947
1948 /* Accumulate statistics for current sampling */
1949 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
1950
1951 if (net_timercmp(&delta, &inp->sample_holdtime, <))
1952 goto skip;
1953
1954 *(&inp->sample_lasttime) = *(&now);
1955
1956 /* Calculate min/max of inbound bytes */
1957 btot = (u_int32_t)inp->sstats.bytes;
1958 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
1959 inp->rxpoll_bmin = btot;
1960 if (btot > inp->rxpoll_bmax)
1961 inp->rxpoll_bmax = btot;
1962
1963 /* Calculate EWMA of inbound bytes */
1964 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
1965
1966 /* Calculate min/max of inbound packets */
1967 ptot = (u_int32_t)inp->sstats.packets;
1968 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
1969 inp->rxpoll_pmin = ptot;
1970 if (ptot > inp->rxpoll_pmax)
1971 inp->rxpoll_pmax = ptot;
1972
1973 /* Calculate EWMA of inbound packets */
1974 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
1975
1976 /* Reset sampling statistics */
1977 PKTCNTR_CLEAR(&inp->sstats);
1978
1979 /* Calculate EWMA of wakeup requests */
1980 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
1981 inp->wtot = 0;
1982
1983 if (dlil_verbose) {
1984 if (!net_timerisset(&inp->dbg_lasttime))
1985 *(&inp->dbg_lasttime) = *(&now);
1986 net_timersub(&now, &inp->dbg_lasttime, &delta);
1987 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
1988 *(&inp->dbg_lasttime) = *(&now);
39236c6e 1989 printf("%s: [%s] pkts avg %d max %d "
316670eb
A
1990 "limits [%d/%d], wreq avg %d "
1991 "limits [%d/%d], bytes avg %d "
39236c6e
A
1992 "limits [%d/%d]\n", if_name(ifp),
1993 (inp->mode ==
316670eb
A
1994 IFNET_MODEL_INPUT_POLL_ON) ?
1995 "ON" : "OFF", inp->rxpoll_pavg,
1996 inp->rxpoll_pmax,
1997 inp->rxpoll_plowat,
1998 inp->rxpoll_phiwat,
1999 inp->rxpoll_wavg,
2000 inp->rxpoll_wlowat,
2001 inp->rxpoll_whiwat,
2002 inp->rxpoll_bavg,
2003 inp->rxpoll_blowat,
2004 inp->rxpoll_bhiwat);
2005 }
2006 }
2d21ac55 2007
316670eb
A
2008 /* Perform mode transition, if necessary */
2009 if (!net_timerisset(&inp->mode_lasttime))
2010 *(&inp->mode_lasttime) = *(&now);
2011
2012 net_timersub(&now, &inp->mode_lasttime, &delta);
2013 if (net_timercmp(&delta, &inp->mode_holdtime, <))
2014 goto skip;
2015
2016 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2017 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
316670eb
A
2018 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2019 mode = IFNET_MODEL_INPUT_POLL_OFF;
2020 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2021 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2022 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2023 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2024 mode = IFNET_MODEL_INPUT_POLL_ON;
2025 }
6d2010ae 2026
316670eb
A
2027 if (mode != inp->mode) {
2028 inp->mode = mode;
2029 *(&inp->mode_lasttime) = *(&now);
2030 poll_req++;
2031 }
2032 }
2033skip:
2034 dlil_input_stats_sync(ifp, inp);
6d2010ae 2035
316670eb 2036 lck_mtx_unlock(&inp->input_lck);
6d2010ae 2037
316670eb
A
2038 /*
2039 * If there's a mode change and interface is still attached,
2040 * perform a downcall to the driver for the new mode. Also
2041 * hold an IO refcnt on the interface to prevent it from
2042 * being detached (will be release below.)
2043 */
2044 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2045 struct ifnet_model_params p = { mode, { 0 } };
2046 errno_t err;
2047
2048 if (dlil_verbose) {
39236c6e 2049 printf("%s: polling is now %s, "
316670eb
A
2050 "pkts avg %d max %d limits [%d/%d], "
2051 "wreq avg %d limits [%d/%d], "
2052 "bytes avg %d limits [%d/%d]\n",
39236c6e 2053 if_name(ifp),
316670eb
A
2054 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2055 "ON" : "OFF", inp->rxpoll_pavg,
2056 inp->rxpoll_pmax, inp->rxpoll_plowat,
2057 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2058 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2059 inp->rxpoll_bavg, inp->rxpoll_blowat,
2060 inp->rxpoll_bhiwat);
2061 }
2d21ac55 2062
316670eb
A
2063 if ((err = ((*ifp->if_input_ctl)(ifp,
2064 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
39236c6e
A
2065 printf("%s: error setting polling mode "
2066 "to %s (%d)\n", if_name(ifp),
316670eb
A
2067 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2068 "ON" : "OFF", err);
2069 }
1c79356b 2070
316670eb
A
2071 switch (mode) {
2072 case IFNET_MODEL_INPUT_POLL_OFF:
2073 ifnet_set_poll_cycle(ifp, NULL);
2074 inp->rxpoll_offreq++;
2075 if (err != 0)
2076 inp->rxpoll_offerr++;
2077 break;
2d21ac55 2078
316670eb 2079 case IFNET_MODEL_INPUT_POLL_ON:
39236c6e 2080 net_nsectimer(&ival, &ts);
316670eb
A
2081 ifnet_set_poll_cycle(ifp, &ts);
2082 ifnet_poll(ifp);
2083 inp->rxpoll_onreq++;
2084 if (err != 0)
2085 inp->rxpoll_onerr++;
2086 break;
2087
2088 default:
2089 VERIFY(0);
2090 /* NOTREACHED */
2091 }
2092
2093 /* Release the IO refcnt */
2094 ifnet_decr_iorefcnt(ifp);
2095 }
2096
2097 /*
2098 * NOTE warning %%% attention !!!!
2099 * We should think about putting some thread starvation
2100 * safeguards if we deal with long chains of packets.
2101 */
2102 if (m != NULL)
2103 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2104 }
2105
2106 /* NOTREACHED */
2107 VERIFY(0); /* we should never get here */
2108}
2109
39236c6e
A
2110/*
2111 * Must be called on an attached ifnet (caller is expected to check.)
2112 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2113 */
2114errno_t
2115dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2116 boolean_t locked)
316670eb 2117{
39236c6e 2118 struct dlil_threading_info *inp;
316670eb
A
2119 u_int64_t sample_holdtime, inbw;
2120
39236c6e
A
2121 VERIFY(ifp != NULL);
2122 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2123 return (ENXIO);
2124
2125 if (p != NULL) {
2126 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2127 (p->packets_lowat != 0 && p->packets_hiwat == 0))
2128 return (EINVAL);
2129 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2130 p->packets_lowat >= p->packets_hiwat)
2131 return (EINVAL);
2132 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2133 (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
2134 return (EINVAL);
2135 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2136 p->bytes_lowat >= p->bytes_hiwat)
2137 return (EINVAL);
2138 if (p->interval_time != 0 &&
2139 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
2140 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2141 }
2142
2143 if (!locked)
2144 lck_mtx_lock(&inp->input_lck);
2145
2146 lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2147
2148 /*
2149 * Normally, we'd reset the parameters to the auto-tuned values
2150 * if the the input thread detects a change in link rate. If the
2151 * driver provides its own parameters right after a link rate
2152 * changes, but before the input thread gets to run, we want to
2153 * make sure to keep the driver's values. Clearing if_poll_update
2154 * will achieve that.
2155 */
2156 if (p != NULL && !locked && ifp->if_poll_update != 0)
2157 ifp->if_poll_update = 0;
316670eb 2158
39236c6e 2159 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
316670eb
A
2160 sample_holdtime = 0; /* polling is disabled */
2161 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2162 inp->rxpoll_blowat = 0;
2163 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2164 inp->rxpoll_bhiwat = (u_int32_t)-1;
39236c6e
A
2165 inp->rxpoll_plim = 0;
2166 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
316670eb 2167 } else {
39236c6e
A
2168 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2169 u_int64_t ival;
316670eb
A
2170 unsigned int n, i;
2171
39236c6e 2172 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
316670eb
A
2173 if (inbw < rxpoll_tbl[i].speed)
2174 break;
2175 n = i;
2176 }
39236c6e
A
2177 /* auto-tune if caller didn't specify a value */
2178 plowat = ((p == NULL || p->packets_lowat == 0) ?
2179 rxpoll_tbl[n].plowat : p->packets_lowat);
2180 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2181 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2182 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2183 rxpoll_tbl[n].blowat : p->bytes_lowat);
2184 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2185 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2186 plim = ((p == NULL || p->packets_limit == 0) ?
2187 if_rxpoll_max : p->packets_limit);
2188 ival = ((p == NULL || p->interval_time == 0) ?
2189 if_rxpoll_interval_time : p->interval_time);
2190
2191 VERIFY(plowat != 0 && phiwat != 0);
2192 VERIFY(blowat != 0 && bhiwat != 0);
2193 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2194
316670eb
A
2195 sample_holdtime = if_rxpoll_sample_holdtime;
2196 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2197 inp->rxpoll_whiwat = if_rxpoll_whiwat;
39236c6e
A
2198 inp->rxpoll_plowat = plowat;
2199 inp->rxpoll_phiwat = phiwat;
2200 inp->rxpoll_blowat = blowat;
2201 inp->rxpoll_bhiwat = bhiwat;
2202 inp->rxpoll_plim = plim;
2203 inp->rxpoll_ival = ival;
316670eb
A
2204 }
2205
2206 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2207 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2208
2209 if (dlil_verbose) {
39236c6e
A
2210 printf("%s: speed %llu bps, sample per %llu nsec, "
2211 "poll interval %llu nsec, pkts per poll %u, "
2212 "pkt limits [%u/%u], wreq limits [%u/%u], "
2213 "bytes limits [%u/%u]\n", if_name(ifp),
2214 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2215 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2216 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
316670eb 2217 }
39236c6e
A
2218
2219 if (!locked)
2220 lck_mtx_unlock(&inp->input_lck);
2221
2222 return (0);
2223}
2224
2225/*
2226 * Must be called on an attached ifnet (caller is expected to check.)
2227 */
2228errno_t
2229dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2230{
2231 struct dlil_threading_info *inp;
2232
2233 VERIFY(ifp != NULL && p != NULL);
2234 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2235 return (ENXIO);
2236
2237 bzero(p, sizeof (*p));
2238
2239 lck_mtx_lock(&inp->input_lck);
2240 p->packets_limit = inp->rxpoll_plim;
2241 p->packets_lowat = inp->rxpoll_plowat;
2242 p->packets_hiwat = inp->rxpoll_phiwat;
2243 p->bytes_lowat = inp->rxpoll_blowat;
2244 p->bytes_hiwat = inp->rxpoll_bhiwat;
2245 p->interval_time = inp->rxpoll_ival;
2246 lck_mtx_unlock(&inp->input_lck);
2247
2248 return (0);
316670eb
A
2249}
2250
2251errno_t
2252ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2253 const struct ifnet_stat_increment_param *s)
2254{
2255 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
2256}
2257
2258errno_t
2259ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2260 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2261{
2262 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
2263}
2264
2265static errno_t
2266ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2267 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2268{
2269 struct thread *tp = current_thread();
2270 struct mbuf *last;
2271 struct dlil_threading_info *inp;
2272 u_int32_t m_cnt = 0, m_size = 0;
2273
39236c6e
A
2274 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2275 if (m_head != NULL)
2276 mbuf_freem_list(m_head);
2277 return (EINVAL);
2278 }
2279
2280 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2281 VERIFY(m_tail == NULL || ext);
2282 VERIFY(s != NULL || !ext);
2283
316670eb
A
2284 /*
2285 * Drop the packet(s) if the parameters are invalid, or if the
2286 * interface is no longer attached; else hold an IO refcnt to
2287 * prevent it from being detached (will be released below.)
2288 */
39236c6e 2289 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
316670eb
A
2290 if (m_head != NULL)
2291 mbuf_freem_list(m_head);
2292 return (EINVAL);
2293 }
2294
316670eb
A
2295 if (m_tail == NULL) {
2296 last = m_head;
39236c6e 2297 while (m_head != NULL) {
316670eb
A
2298#if IFNET_INPUT_SANITY_CHK
2299 if (dlil_input_sanity_check != 0)
2300 DLIL_INPUT_CHECK(last, ifp);
2301#endif /* IFNET_INPUT_SANITY_CHK */
2302 m_cnt++;
2303 m_size += m_length(last);
2304 if (mbuf_nextpkt(last) == NULL)
2305 break;
2306 last = mbuf_nextpkt(last);
2307 }
2308 m_tail = last;
2309 } else {
2310#if IFNET_INPUT_SANITY_CHK
2311 if (dlil_input_sanity_check != 0) {
2312 last = m_head;
2313 while (1) {
2314 DLIL_INPUT_CHECK(last, ifp);
2315 m_cnt++;
2316 m_size += m_length(last);
2317 if (mbuf_nextpkt(last) == NULL)
2318 break;
2319 last = mbuf_nextpkt(last);
2320 }
2321 } else {
2322 m_cnt = s->packets_in;
2323 m_size = s->bytes_in;
2324 last = m_tail;
2325 }
2326#else
2327 m_cnt = s->packets_in;
2328 m_size = s->bytes_in;
2329 last = m_tail;
2330#endif /* IFNET_INPUT_SANITY_CHK */
2331 }
2332
2333 if (last != m_tail) {
39236c6e
A
2334 panic_plain("%s: invalid input packet chain for %s, "
2335 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2336 m_tail, last);
316670eb
A
2337 }
2338
2339 /*
2340 * Assert packet count only for the extended variant, for backwards
2341 * compatibility, since this came directly from the device driver.
2342 * Relax this assertion for input bytes, as the driver may have
2343 * included the link-layer headers in the computation; hence
2344 * m_size is just an approximation.
2345 */
2346 if (ext && s->packets_in != m_cnt) {
39236c6e
A
2347 panic_plain("%s: input packet count mismatch for %s, "
2348 "%d instead of %d\n", __func__, if_name(ifp),
2349 s->packets_in, m_cnt);
316670eb
A
2350 }
2351
2352 if ((inp = ifp->if_inp) == NULL)
2353 inp = dlil_main_input_thread;
2354
2355 /*
2356 * If there is a matching DLIL input thread associated with an
2357 * affinity set, associate this thread with the same set. We
2358 * will only do this once.
2359 */
2360 lck_mtx_lock_spin(&inp->input_lck);
2361 if (inp != dlil_main_input_thread && inp->net_affinity &&
2362 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2363 (poll && inp->poll_thr == THREAD_NULL))) {
2364 u_int32_t tag = inp->tag;
2365
2366 if (poll) {
2367 VERIFY(inp->poll_thr == THREAD_NULL);
2368 inp->poll_thr = tp;
2369 } else {
2370 VERIFY(inp->wloop_thr == THREAD_NULL);
2371 inp->wloop_thr = tp;
2372 }
2373 lck_mtx_unlock(&inp->input_lck);
2374
2375 /* Associate the current thread with the new affinity tag */
2376 (void) dlil_affinity_set(tp, tag);
2377
2378 /*
2379 * Take a reference on the current thread; during detach,
2380 * we will need to refer to it in order ot tear down its
2381 * affinity.
2382 */
2383 thread_reference(tp);
2384 lck_mtx_lock_spin(&inp->input_lck);
2385 }
2386
39236c6e
A
2387 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2388
316670eb
A
2389 /*
2390 * Because of loopbacked multicast we cannot stuff the ifp in
2391 * the rcvif of the packet header: loopback (lo0) packets use a
2392 * dedicated list so that we can later associate them with lo_ifp
2393 * on their way up the stack. Packets for other interfaces without
2394 * dedicated input threads go to the regular list.
2395 */
39236c6e
A
2396 if (m_head != NULL) {
2397 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2398 struct dlil_main_threading_info *inpm =
2399 (struct dlil_main_threading_info *)inp;
2400 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2401 m_cnt, m_size);
2402 } else {
2403 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2404 m_cnt, m_size);
2405 }
316670eb
A
2406 }
2407
2408#if IFNET_INPUT_SANITY_CHK
2409 if (dlil_input_sanity_check != 0) {
2410 u_int32_t count;
2411 struct mbuf *m0;
2412
2413 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
2414 count++;
2415
2416 if (count != m_cnt) {
39236c6e
A
2417 panic_plain("%s: invalid packet count %d "
2418 "(expected %d)\n", if_name(ifp),
316670eb
A
2419 count, m_cnt);
2420 /* NOTREACHED */
2421 }
2422
2423 inp->input_mbuf_cnt += m_cnt;
2424 }
2425#endif /* IFNET_INPUT_SANITY_CHK */
2426
2427 if (s != NULL) {
2428 dlil_input_stats_add(s, inp, poll);
2429 /*
2430 * If we're using the main input thread, synchronize the
2431 * stats now since we have the interface context. All
2432 * other cases involving dedicated input threads will
2433 * have their stats synchronized there.
2434 */
2435 if (inp == dlil_main_input_thread)
2436 dlil_input_stats_sync(ifp, inp);
2437 }
2438
2439 inp->input_waiting |= DLIL_INPUT_WAITING;
2440 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2441 inp->wtot++;
2442 wakeup_one((caddr_t)&inp->input_waiting);
2443 }
2444 lck_mtx_unlock(&inp->input_lck);
2445
2446 if (ifp != lo_ifp) {
2447 /* Release the IO refcnt */
2448 ifnet_decr_iorefcnt(ifp);
2449 }
2450
2451 return (0);
2452}
2453
39236c6e
A
2454static void
2455ifnet_start_common(struct ifnet *ifp, int resetfc)
316670eb 2456{
39236c6e
A
2457 if (!(ifp->if_eflags & IFEF_TXSTART))
2458 return;
316670eb 2459 /*
39236c6e
A
2460 * If the starter thread is inactive, signal it to do work,
2461 * unless the interface is being flow controlled from below,
2462 * e.g. a virtual interface being flow controlled by a real
2463 * network interface beneath it.
316670eb
A
2464 */
2465 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e
A
2466 if (resetfc) {
2467 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2468 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2469 lck_mtx_unlock(&ifp->if_start_lock);
2470 return;
2471 }
316670eb
A
2472 ifp->if_start_req++;
2473 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL) {
2474 wakeup_one((caddr_t)&ifp->if_start_thread);
2475 }
2476 lck_mtx_unlock(&ifp->if_start_lock);
2477}
2478
39236c6e
A
2479void
2480ifnet_start(struct ifnet *ifp)
2481{
2482 ifnet_start_common(ifp, 0);
2483}
2484
316670eb
A
2485static void
2486ifnet_start_thread_fn(void *v, wait_result_t w)
2487{
2488#pragma unused(w)
2489 struct ifnet *ifp = v;
2490 char ifname[IFNAMSIZ + 1];
2491 struct timespec *ts = NULL;
2492 struct ifclassq *ifq = &ifp->if_snd;
2493
2494 /*
2495 * Treat the dedicated starter thread for lo0 as equivalent to
2496 * the driver workloop thread; if net_affinity is enabled for
2497 * the main input thread, associate this starter thread to it
2498 * by binding them with the same affinity tag. This is done
2499 * only once (as we only have one lo_ifp which never goes away.)
2500 */
2501 if (ifp == lo_ifp) {
2502 struct dlil_threading_info *inp = dlil_main_input_thread;
2503 struct thread *tp = current_thread();
2504
2505 lck_mtx_lock(&inp->input_lck);
2506 if (inp->net_affinity) {
2507 u_int32_t tag = inp->tag;
2508
2509 VERIFY(inp->wloop_thr == THREAD_NULL);
2510 VERIFY(inp->poll_thr == THREAD_NULL);
2511 inp->wloop_thr = tp;
2512 lck_mtx_unlock(&inp->input_lck);
2513
2514 /* Associate this thread with the affinity tag */
2515 (void) dlil_affinity_set(tp, tag);
2516 } else {
2517 lck_mtx_unlock(&inp->input_lck);
2518 }
2519 }
2520
39236c6e
A
2521 snprintf(ifname, sizeof (ifname), "%s_starter",
2522 if_name(ifp));
316670eb
A
2523
2524 lck_mtx_lock_spin(&ifp->if_start_lock);
2525
2526 for (;;) {
2527 (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock,
2528 (PZERO - 1) | PSPIN, ifname, ts);
2529
2530 /* interface is detached? */
2531 if (ifp->if_start_thread == THREAD_NULL) {
2532 ifnet_set_start_cycle(ifp, NULL);
2533 lck_mtx_unlock(&ifp->if_start_lock);
2534 ifnet_purge(ifp);
2535
2536 if (dlil_verbose) {
39236c6e
A
2537 printf("%s: starter thread terminated\n",
2538 if_name(ifp));
316670eb
A
2539 }
2540
2541 /* for the extra refcnt from kernel_thread_start() */
2542 thread_deallocate(current_thread());
2543 /* this is the end */
2544 thread_terminate(current_thread());
2545 /* NOTREACHED */
2546 return;
2547 }
2548
2549 ifp->if_start_active = 1;
2550 for (;;) {
2551 u_int32_t req = ifp->if_start_req;
2552
2553 lck_mtx_unlock(&ifp->if_start_lock);
2554 /* invoke the driver's start routine */
2555 ((*ifp->if_start)(ifp));
2556 lck_mtx_lock_spin(&ifp->if_start_lock);
2557
2558 /* if there's no pending request, we're done */
2559 if (req == ifp->if_start_req)
2560 break;
2561 }
2562 ifp->if_start_req = 0;
2563 ifp->if_start_active = 0;
2564 /*
2565 * Wakeup N ns from now if rate-controlled by TBR, and if
2566 * there are still packets in the send queue which haven't
2567 * been dequeued so far; else sleep indefinitely (ts = NULL)
2568 * until ifnet_start() is called again.
2569 */
2570 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2571 &ifp->if_start_cycle : NULL);
2572
2573 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
2574 ts = NULL;
2575 }
2576
2577 /* NOTREACHED */
2578 lck_mtx_unlock(&ifp->if_start_lock);
2579 VERIFY(0); /* we should never get here */
2580}
2581
2582void
2583ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2584{
2585 if (ts == NULL)
2586 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
2587 else
2588 *(&ifp->if_start_cycle) = *ts;
2589
2590 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
39236c6e
A
2591 printf("%s: restart interval set to %lu nsec\n",
2592 if_name(ifp), ts->tv_nsec);
316670eb
A
2593}
2594
2595static void
2596ifnet_poll(struct ifnet *ifp)
2597{
2598 /*
2599 * If the poller thread is inactive, signal it to do work.
2600 */
2601 lck_mtx_lock_spin(&ifp->if_poll_lock);
2602 ifp->if_poll_req++;
2603 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
2604 wakeup_one((caddr_t)&ifp->if_poll_thread);
2605 }
2606 lck_mtx_unlock(&ifp->if_poll_lock);
2607}
2608
2609static void
2610ifnet_poll_thread_fn(void *v, wait_result_t w)
2611{
2612#pragma unused(w)
2613 struct dlil_threading_info *inp;
2614 struct ifnet *ifp = v;
2615 char ifname[IFNAMSIZ + 1];
2616 struct timespec *ts = NULL;
2617 struct ifnet_stat_increment_param s;
2618
39236c6e
A
2619 snprintf(ifname, sizeof (ifname), "%s_poller",
2620 if_name(ifp));
316670eb
A
2621 bzero(&s, sizeof (s));
2622
2623 lck_mtx_lock_spin(&ifp->if_poll_lock);
2624
2625 inp = ifp->if_inp;
2626 VERIFY(inp != NULL);
2627
2628 for (;;) {
2629 if (ifp->if_poll_thread != THREAD_NULL) {
2630 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
2631 (PZERO - 1) | PSPIN, ifname, ts);
2632 }
2633
2634 /* interface is detached (maybe while asleep)? */
2635 if (ifp->if_poll_thread == THREAD_NULL) {
2636 ifnet_set_poll_cycle(ifp, NULL);
2637 lck_mtx_unlock(&ifp->if_poll_lock);
2638
2639 if (dlil_verbose) {
39236c6e
A
2640 printf("%s: poller thread terminated\n",
2641 if_name(ifp));
316670eb
A
2642 }
2643
2644 /* for the extra refcnt from kernel_thread_start() */
2645 thread_deallocate(current_thread());
2646 /* this is the end */
2647 thread_terminate(current_thread());
2648 /* NOTREACHED */
2649 return;
2650 }
2651
2652 ifp->if_poll_active = 1;
2653 for (;;) {
2654 struct mbuf *m_head, *m_tail;
2655 u_int32_t m_lim, m_cnt, m_totlen;
2656 u_int16_t req = ifp->if_poll_req;
2657
2658 lck_mtx_unlock(&ifp->if_poll_lock);
2659
2660 /*
2661 * If no longer attached, there's nothing to do;
2662 * else hold an IO refcnt to prevent the interface
2663 * from being detached (will be released below.)
2664 */
db609669
A
2665 if (!ifnet_is_attached(ifp, 1)) {
2666 lck_mtx_lock_spin(&ifp->if_poll_lock);
316670eb 2667 break;
db609669 2668 }
316670eb 2669
39236c6e 2670 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
316670eb
A
2671 MAX((qlimit(&inp->rcvq_pkts)),
2672 (inp->rxpoll_phiwat << 2));
2673
2674 if (dlil_verbose > 1) {
39236c6e 2675 printf("%s: polling up to %d pkts, "
316670eb
A
2676 "pkts avg %d max %d, wreq avg %d, "
2677 "bytes avg %d\n",
39236c6e 2678 if_name(ifp), m_lim,
316670eb
A
2679 inp->rxpoll_pavg, inp->rxpoll_pmax,
2680 inp->rxpoll_wavg, inp->rxpoll_bavg);
2681 }
2682
2683 /* invoke the driver's input poll routine */
2684 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
2685 &m_cnt, &m_totlen));
2686
2687 if (m_head != NULL) {
2688 VERIFY(m_tail != NULL && m_cnt > 0);
2689
2690 if (dlil_verbose > 1) {
39236c6e 2691 printf("%s: polled %d pkts, "
316670eb
A
2692 "pkts avg %d max %d, wreq avg %d, "
2693 "bytes avg %d\n",
39236c6e 2694 if_name(ifp), m_cnt,
316670eb
A
2695 inp->rxpoll_pavg, inp->rxpoll_pmax,
2696 inp->rxpoll_wavg, inp->rxpoll_bavg);
2697 }
2698
2699 /* stats are required for extended variant */
2700 s.packets_in = m_cnt;
2701 s.bytes_in = m_totlen;
2702
2703 (void) ifnet_input_common(ifp, m_head, m_tail,
2704 &s, TRUE, TRUE);
39236c6e
A
2705 } else {
2706 if (dlil_verbose > 1) {
2707 printf("%s: no packets, "
2708 "pkts avg %d max %d, wreq avg %d, "
2709 "bytes avg %d\n",
2710 if_name(ifp), inp->rxpoll_pavg,
2711 inp->rxpoll_pmax, inp->rxpoll_wavg,
2712 inp->rxpoll_bavg);
2713 }
2714
2715 (void) ifnet_input_common(ifp, NULL, NULL,
2716 NULL, FALSE, TRUE);
316670eb
A
2717 }
2718
2719 /* Release the io ref count */
2720 ifnet_decr_iorefcnt(ifp);
2721
2722 lck_mtx_lock_spin(&ifp->if_poll_lock);
2723
2724 /* if there's no pending request, we're done */
2725 if (req == ifp->if_poll_req)
2726 break;
2727 }
2728 ifp->if_poll_req = 0;
2729 ifp->if_poll_active = 0;
2730
2731 /*
2732 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2733 * until ifnet_poll() is called again.
2734 */
2735 ts = &ifp->if_poll_cycle;
2736 if (ts->tv_sec == 0 && ts->tv_nsec == 0)
2737 ts = NULL;
2738 }
2739
2740 /* NOTREACHED */
2741 lck_mtx_unlock(&ifp->if_poll_lock);
2742 VERIFY(0); /* we should never get here */
2743}
2744
2745void
2746ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
2747{
2748 if (ts == NULL)
2749 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
2750 else
2751 *(&ifp->if_poll_cycle) = *ts;
2752
2753 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
39236c6e
A
2754 printf("%s: poll interval set to %lu nsec\n",
2755 if_name(ifp), ts->tv_nsec);
316670eb
A
2756}
2757
2758void
2759ifnet_purge(struct ifnet *ifp)
2760{
2761 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
2762 if_qflush(ifp, 0);
2763}
2764
2765void
2766ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
2767{
2768 IFCQ_LOCK_ASSERT_HELD(ifq);
2769
2770 if (!(IFCQ_IS_READY(ifq)))
2771 return;
2772
2773 if (IFCQ_TBR_IS_ENABLED(ifq)) {
2774 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
2775 ifq->ifcq_tbr.tbr_percent, 0 };
2776 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
2777 }
2778
2779 ifclassq_update(ifq, ev);
2780}
2781
2782void
2783ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
2784{
2785 switch (ev) {
39236c6e 2786 case CLASSQ_EV_LINK_BANDWIDTH:
316670eb
A
2787 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
2788 ifp->if_poll_update++;
2789 break;
2790
2791 default:
2792 break;
2793 }
2794}
2795
2796errno_t
2797ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
2798{
2799 struct ifclassq *ifq;
2800 u_int32_t omodel;
2801 errno_t err;
2802
2803 if (ifp == NULL || (model != IFNET_SCHED_MODEL_DRIVER_MANAGED &&
2804 model != IFNET_SCHED_MODEL_NORMAL))
2805 return (EINVAL);
2806 else if (!(ifp->if_eflags & IFEF_TXSTART))
2807 return (ENXIO);
2808
2809 ifq = &ifp->if_snd;
2810 IFCQ_LOCK(ifq);
2811 omodel = ifp->if_output_sched_model;
2812 ifp->if_output_sched_model = model;
2813 if ((err = ifclassq_pktsched_setup(ifq)) != 0)
2814 ifp->if_output_sched_model = omodel;
2815 IFCQ_UNLOCK(ifq);
2816
2817 return (err);
2818}
2819
2820errno_t
2821ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2822{
2823 if (ifp == NULL)
2824 return (EINVAL);
2825 else if (!(ifp->if_eflags & IFEF_TXSTART))
2826 return (ENXIO);
2827
2828 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
2829
2830 return (0);
2831}
2832
2833errno_t
2834ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2835{
2836 if (ifp == NULL || maxqlen == NULL)
2837 return (EINVAL);
2838 else if (!(ifp->if_eflags & IFEF_TXSTART))
2839 return (ENXIO);
2840
2841 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
2842
2843 return (0);
2844}
2845
2846errno_t
39236c6e 2847ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
316670eb 2848{
39236c6e
A
2849 errno_t err;
2850
2851 if (ifp == NULL || pkts == NULL)
2852 err = EINVAL;
316670eb 2853 else if (!(ifp->if_eflags & IFEF_TXSTART))
39236c6e
A
2854 err = ENXIO;
2855 else
2856 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
2857 pkts, NULL);
316670eb 2858
39236c6e
A
2859 return (err);
2860}
316670eb 2861
39236c6e
A
2862errno_t
2863ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
2864 u_int32_t *pkts, u_int32_t *bytes)
2865{
2866 errno_t err;
2867
2868 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
2869 (pkts == NULL && bytes == NULL))
2870 err = EINVAL;
2871 else if (!(ifp->if_eflags & IFEF_TXSTART))
2872 err = ENXIO;
2873 else
2874 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
2875
2876 return (err);
316670eb
A
2877}
2878
2879errno_t
2880ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2881{
2882 struct dlil_threading_info *inp;
2883
2884 if (ifp == NULL)
2885 return (EINVAL);
2886 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
2887 return (ENXIO);
2888
2889 if (maxqlen == 0)
2890 maxqlen = if_rcvq_maxlen;
2891 else if (maxqlen < IF_RCVQ_MINLEN)
2892 maxqlen = IF_RCVQ_MINLEN;
2893
2894 inp = ifp->if_inp;
2895 lck_mtx_lock(&inp->input_lck);
2896 qlimit(&inp->rcvq_pkts) = maxqlen;
2897 lck_mtx_unlock(&inp->input_lck);
2898
2899 return (0);
2900}
2901
2902errno_t
2903ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2904{
2905 struct dlil_threading_info *inp;
2906
2907 if (ifp == NULL || maxqlen == NULL)
2908 return (EINVAL);
2909 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
2910 return (ENXIO);
2911
2912 inp = ifp->if_inp;
2913 lck_mtx_lock(&inp->input_lck);
2914 *maxqlen = qlimit(&inp->rcvq_pkts);
2915 lck_mtx_unlock(&inp->input_lck);
2916 return (0);
2917}
2918
2919errno_t
2920ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
2921{
2922 int error;
2923
2924 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
2925 m->m_nextpkt != NULL) {
2926 if (m != NULL)
2927 m_freem_list(m);
2928 return (EINVAL);
2929 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
2930 !(ifp->if_refflags & IFRF_ATTACHED)) {
2931 /* flag tested without lock for performance */
2932 m_freem(m);
2933 return (ENXIO);
2934 } else if (!(ifp->if_flags & IFF_UP)) {
2935 m_freem(m);
2936 return (ENETDOWN);
316670eb
A
2937 }
2938
2939 /* enqueue the packet */
2940 error = ifclassq_enqueue(&ifp->if_snd, m);
2941
2942 /*
2943 * Tell the driver to start dequeueing; do this even when the queue
2944 * for the packet is suspended (EQSUSPENDED), as the driver could still
2945 * be dequeueing from other unsuspended queues.
2946 */
2947 if (error == 0 || error == EQFULL || error == EQSUSPENDED)
2948 ifnet_start(ifp);
2949
2950 return (error);
2951}
2952
2953errno_t
2954ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
2955{
2956 if (ifp == NULL || mp == NULL)
2957 return (EINVAL);
2958 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
2959 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
2960 return (ENXIO);
2961
2962 return (ifclassq_dequeue(&ifp->if_snd, 1, mp, NULL, NULL, NULL));
2963}
2964
2965errno_t
2966ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
2967 struct mbuf **mp)
2968{
2969 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
2970 return (EINVAL);
2971 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
2972 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
2973 return (ENXIO);
2974
2975 return (ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL));
2976}
2977
2978errno_t
2979ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t limit, struct mbuf **head,
2980 struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
2981{
2982 if (ifp == NULL || head == NULL || limit < 1)
2983 return (EINVAL);
2984 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
2985 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
2986 return (ENXIO);
2987
2988 return (ifclassq_dequeue(&ifp->if_snd, limit, head, tail, cnt, len));
2989}
2990
2991errno_t
2992ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
2993 u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
2994 u_int32_t *len)
2995{
2996
2997 if (ifp == NULL || head == NULL || limit < 1 || !MBUF_VALID_SC(sc))
2998 return (EINVAL);
2999 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3000 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
3001 return (ENXIO);
3002
3003 return (ifclassq_dequeue_sc(&ifp->if_snd, sc, limit, head,
3004 tail, cnt, len));
3005}
3006
39236c6e
A
3007errno_t
3008ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3009 const struct sockaddr *dest, const char *dest_linkaddr,
3010 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3011{
3012 if (pre != NULL)
3013 *pre = 0;
3014 if (post != NULL)
3015 *post = 0;
3016
3017 return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
3018}
3019
316670eb
A
3020static int
3021dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3022 char **frame_header_p, protocol_family_t protocol_family)
3023{
3024 struct ifnet_filter *filter;
3025
3026 /*
3027 * Pass the inbound packet to the interface filters
6d2010ae
A
3028 */
3029 lck_mtx_lock_spin(&ifp->if_flt_lock);
3030 /* prevent filter list from changing in case we drop the lock */
3031 if_flt_monitor_busy(ifp);
2d21ac55
A
3032 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3033 int result;
3034
6d2010ae
A
3035 if (!filter->filt_skip && filter->filt_input != NULL &&
3036 (filter->filt_protocol == 0 ||
3037 filter->filt_protocol == protocol_family)) {
3038 lck_mtx_unlock(&ifp->if_flt_lock);
3039
2d21ac55 3040 result = (*filter->filt_input)(filter->filt_cookie,
6d2010ae
A
3041 ifp, protocol_family, m_p, frame_header_p);
3042
3043 lck_mtx_lock_spin(&ifp->if_flt_lock);
2d21ac55 3044 if (result != 0) {
6d2010ae
A
3045 /* we're done with the filter list */
3046 if_flt_monitor_unbusy(ifp);
3047 lck_mtx_unlock(&ifp->if_flt_lock);
2d21ac55
A
3048 return (result);
3049 }
3050 }
3051 }
6d2010ae
A
3052 /* we're done with the filter list */
3053 if_flt_monitor_unbusy(ifp);
3054 lck_mtx_unlock(&ifp->if_flt_lock);
b7266188
A
3055
3056 /*
6d2010ae 3057 * Strip away M_PROTO1 bit prior to sending packet up the stack as
b7266188
A
3058 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3059 */
3060 if (*m_p != NULL)
3061 (*m_p)->m_flags &= ~M_PROTO1;
3062
2d21ac55 3063 return (0);
1c79356b
A
3064}
3065
6d2010ae
A
3066static int
3067dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3068 protocol_family_t protocol_family)
3069{
3070 struct ifnet_filter *filter;
3071
3072 /*
3073 * Pass the outbound packet to the interface filters
3074 */
3075 lck_mtx_lock_spin(&ifp->if_flt_lock);
3076 /* prevent filter list from changing in case we drop the lock */
3077 if_flt_monitor_busy(ifp);
3078 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3079 int result;
3080
3081 if (!filter->filt_skip && filter->filt_output != NULL &&
3082 (filter->filt_protocol == 0 ||
3083 filter->filt_protocol == protocol_family)) {
3084 lck_mtx_unlock(&ifp->if_flt_lock);
3085
3086 result = filter->filt_output(filter->filt_cookie, ifp,
3087 protocol_family, m_p);
3088
3089 lck_mtx_lock_spin(&ifp->if_flt_lock);
3090 if (result != 0) {
3091 /* we're done with the filter list */
3092 if_flt_monitor_unbusy(ifp);
3093 lck_mtx_unlock(&ifp->if_flt_lock);
3094 return (result);
3095 }
3096 }
3097 }
3098 /* we're done with the filter list */
3099 if_flt_monitor_unbusy(ifp);
3100 lck_mtx_unlock(&ifp->if_flt_lock);
3101
3102 return (0);
3103}
3104
2d21ac55
A
3105static void
3106dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
1c79356b 3107{
2d21ac55 3108 int error;
1c79356b 3109
2d21ac55
A
3110 if (ifproto->proto_kpi == kProtoKPI_v1) {
3111 /* Version 1 protocols get one packet at a time */
3112 while (m != NULL) {
3113 char * frame_header;
3114 mbuf_t next_packet;
6d2010ae 3115
2d21ac55
A
3116 next_packet = m->m_nextpkt;
3117 m->m_nextpkt = NULL;
39236c6e
A
3118 frame_header = m->m_pkthdr.pkt_hdr;
3119 m->m_pkthdr.pkt_hdr = NULL;
6d2010ae
A
3120 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3121 ifproto->protocol_family, m, frame_header);
2d21ac55
A
3122 if (error != 0 && error != EJUSTRETURN)
3123 m_freem(m);
3124 m = next_packet;
3125 }
6d2010ae 3126 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
2d21ac55
A
3127 /* Version 2 protocols support packet lists */
3128 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
6d2010ae 3129 ifproto->protocol_family, m);
2d21ac55
A
3130 if (error != 0 && error != EJUSTRETURN)
3131 m_freem_list(m);
91447636 3132 }
2d21ac55
A
3133 return;
3134}
1c79356b 3135
316670eb
A
3136static void
3137dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3138 struct dlil_threading_info *inp, boolean_t poll)
3139{
3140 struct ifnet_stat_increment_param *d = &inp->stats;
3141
3142 if (s->packets_in != 0)
3143 d->packets_in += s->packets_in;
3144 if (s->bytes_in != 0)
3145 d->bytes_in += s->bytes_in;
3146 if (s->errors_in != 0)
3147 d->errors_in += s->errors_in;
3148
3149 if (s->packets_out != 0)
3150 d->packets_out += s->packets_out;
3151 if (s->bytes_out != 0)
3152 d->bytes_out += s->bytes_out;
3153 if (s->errors_out != 0)
3154 d->errors_out += s->errors_out;
3155
3156 if (s->collisions != 0)
3157 d->collisions += s->collisions;
3158 if (s->dropped != 0)
3159 d->dropped += s->dropped;
3160
3161 if (poll)
3162 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
3163}
3164
3165static void
3166dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3167{
3168 struct ifnet_stat_increment_param *s = &inp->stats;
3169
3170 /*
3171 * Use of atomic operations is unavoidable here because
3172 * these stats may also be incremented elsewhere via KPIs.
3173 */
3174 if (s->packets_in != 0) {
3175 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3176 s->packets_in = 0;
3177 }
3178 if (s->bytes_in != 0) {
3179 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3180 s->bytes_in = 0;
3181 }
3182 if (s->errors_in != 0) {
3183 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3184 s->errors_in = 0;
3185 }
3186
3187 if (s->packets_out != 0) {
3188 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3189 s->packets_out = 0;
3190 }
3191 if (s->bytes_out != 0) {
3192 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3193 s->bytes_out = 0;
3194 }
3195 if (s->errors_out != 0) {
3196 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3197 s->errors_out = 0;
3198 }
3199
3200 if (s->collisions != 0) {
3201 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3202 s->collisions = 0;
3203 }
3204 if (s->dropped != 0) {
3205 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3206 s->dropped = 0;
3207 }
39236c6e
A
3208 /*
3209 * If we went over the threshold, notify NetworkStatistics.
3210 */
3211 if (ifp->if_data_threshold &&
3212 (ifp->if_ibytes + ifp->if_obytes) - ifp->if_dt_bytes >
3213 ifp->if_data_threshold) {
3214 ifp->if_dt_bytes = ifp->if_ibytes + ifp->if_obytes;
3215 nstat_ifnet_threshold_reached(ifp->if_index);
3216 }
316670eb
A
3217 /*
3218 * No need for atomic operations as they are modified here
3219 * only from within the DLIL input thread context.
3220 */
3221 if (inp->tstats.packets != 0) {
3222 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3223 inp->tstats.packets = 0;
3224 }
3225 if (inp->tstats.bytes != 0) {
3226 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3227 inp->tstats.bytes = 0;
3228 }
3229}
3230
3231__private_extern__ void
3232dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3233{
3234 return (dlil_input_packet_list_common(ifp, m, 0,
3235 IFNET_MODEL_INPUT_POLL_OFF, FALSE));
3236}
3237
2d21ac55 3238__private_extern__ void
316670eb
A
3239dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3240 u_int32_t cnt, ifnet_model_t mode)
3241{
3242 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
3243}
3244
3245static void
3246dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3247 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
2d21ac55
A
3248{
3249 int error = 0;
2d21ac55
A
3250 protocol_family_t protocol_family;
3251 mbuf_t next_packet;
3252 ifnet_t ifp = ifp_param;
3253 char * frame_header;
3254 struct if_proto * last_ifproto = NULL;
3255 mbuf_t pkt_first = NULL;
3256 mbuf_t * pkt_next = NULL;
316670eb 3257 u_int32_t poll_thresh = 0, poll_ival = 0;
2d21ac55
A
3258
3259 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0);
3260
316670eb
A
3261 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
3262 (poll_ival = if_rxpoll_interval_pkts) > 0)
3263 poll_thresh = cnt;
6d2010ae 3264
2d21ac55 3265 while (m != NULL) {
6d2010ae
A
3266 struct if_proto *ifproto = NULL;
3267 int iorefcnt = 0;
39236c6e 3268 uint32_t pktf_mask; /* pkt flags to preserve */
2d21ac55 3269
2d21ac55
A
3270 if (ifp_param == NULL)
3271 ifp = m->m_pkthdr.rcvif;
6d2010ae 3272
316670eb
A
3273 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
3274 poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
3275 ifnet_poll(ifp);
3276
6d2010ae 3277 /* Check if this mbuf looks valid */
316670eb 3278 MBUF_INPUT_CHECK(m, ifp);
6d2010ae
A
3279
3280 next_packet = m->m_nextpkt;
3281 m->m_nextpkt = NULL;
39236c6e
A
3282 frame_header = m->m_pkthdr.pkt_hdr;
3283 m->m_pkthdr.pkt_hdr = NULL;
2d21ac55 3284
316670eb
A
3285 /*
3286 * Get an IO reference count if the interface is not
3287 * loopback (lo0) and it is attached; lo0 never goes
3288 * away, so optimize for that.
6d2010ae
A
3289 */
3290 if (ifp != lo_ifp) {
3291 if (!ifnet_is_attached(ifp, 1)) {
3292 m_freem(m);
3293 goto next;
3294 }
3295 iorefcnt = 1;
39236c6e
A
3296 pktf_mask = 0;
3297 } else {
3298 /*
3299 * If this arrived on lo0, preserve interface addr
3300 * info to allow for connectivity between loopback
3301 * and local interface addresses.
3302 */
3303 pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
2d21ac55 3304 }
d41d1dae 3305
39236c6e
A
3306 /* make sure packet comes in clean */
3307 m_classifier_init(m, pktf_mask);
3308
316670eb 3309 ifp_inc_traffic_class_in(ifp, m);
d41d1dae 3310
2d21ac55 3311 /* find which protocol family this packet is for */
6d2010ae 3312 ifnet_lock_shared(ifp);
2d21ac55 3313 error = (*ifp->if_demux)(ifp, m, frame_header,
6d2010ae
A
3314 &protocol_family);
3315 ifnet_lock_done(ifp);
2d21ac55 3316 if (error != 0) {
6d2010ae 3317 if (error == EJUSTRETURN)
2d21ac55 3318 goto next;
2d21ac55
A
3319 protocol_family = 0;
3320 }
6d2010ae 3321
39236c6e
A
3322 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
3323 !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
3324 dlil_input_cksum_dbg(ifp, m, frame_header,
3325 protocol_family);
3326
3327 /*
3328 * For partial checksum offload, we expect the driver to
3329 * set the start offset indicating the start of the span
3330 * that is covered by the hardware-computed checksum;
3331 * adjust this start offset accordingly because the data
3332 * pointer has been advanced beyond the link-layer header.
3333 *
3334 * Don't adjust if the interface is a bridge member, as
3335 * the adjustment will occur from the context of the
3336 * bridge interface during input.
3337 */
3338 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
3339 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
3340 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
3341 int adj;
3342
3343 if (frame_header == NULL ||
3344 frame_header < (char *)mbuf_datastart(m) ||
3345 frame_header > (char *)m->m_data ||
3346 (adj = (m->m_data - frame_header)) >
3347 m->m_pkthdr.csum_rx_start) {
3348 m->m_pkthdr.csum_data = 0;
3349 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
3350 hwcksum_in_invalidated++;
3351 } else {
3352 m->m_pkthdr.csum_rx_start -= adj;
3353 }
3354 }
3355
3356 pktap_input(ifp, protocol_family, m, frame_header);
316670eb 3357
2d21ac55 3358 if (m->m_flags & (M_BCAST|M_MCAST))
6d2010ae 3359 atomic_add_64(&ifp->if_imcasts, 1);
1c79356b 3360
2d21ac55
A
3361 /* run interface filters, exclude VLAN packets PR-3586856 */
3362 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
6d2010ae
A
3363 error = dlil_interface_filters_input(ifp, &m,
3364 &frame_header, protocol_family);
3365 if (error != 0) {
3366 if (error != EJUSTRETURN)
2d21ac55 3367 m_freem(m);
2d21ac55 3368 goto next;
91447636
A
3369 }
3370 }
2d21ac55 3371 if (error != 0 || ((m->m_flags & M_PROMISC) != 0) ) {
91447636 3372 m_freem(m);
2d21ac55 3373 goto next;
91447636 3374 }
6d2010ae 3375
2d21ac55
A
3376 /* Lookup the protocol attachment to this interface */
3377 if (protocol_family == 0) {
3378 ifproto = NULL;
6d2010ae
A
3379 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
3380 (last_ifproto->protocol_family == protocol_family)) {
3381 VERIFY(ifproto == NULL);
2d21ac55 3382 ifproto = last_ifproto;
6d2010ae
A
3383 if_proto_ref(last_ifproto);
3384 } else {
3385 VERIFY(ifproto == NULL);
3386 ifnet_lock_shared(ifp);
3387 /* callee holds a proto refcnt upon success */
2d21ac55 3388 ifproto = find_attached_proto(ifp, protocol_family);
6d2010ae 3389 ifnet_lock_done(ifp);
2d21ac55
A
3390 }
3391 if (ifproto == NULL) {
3392 /* no protocol for this packet, discard */
3393 m_freem(m);
3394 goto next;
3395 }
3396 if (ifproto != last_ifproto) {
2d21ac55
A
3397 if (last_ifproto != NULL) {
3398 /* pass up the list for the previous protocol */
2d21ac55
A
3399 dlil_ifproto_input(last_ifproto, pkt_first);
3400 pkt_first = NULL;
3401 if_proto_free(last_ifproto);
2d21ac55
A
3402 }
3403 last_ifproto = ifproto;
6d2010ae 3404 if_proto_ref(ifproto);
2d21ac55
A
3405 }
3406 /* extend the list */
39236c6e 3407 m->m_pkthdr.pkt_hdr = frame_header;
2d21ac55
A
3408 if (pkt_first == NULL) {
3409 pkt_first = m;
3410 } else {
3411 *pkt_next = m;
3412 }
3413 pkt_next = &m->m_nextpkt;
1c79356b 3414
6d2010ae 3415next:
2d21ac55
A
3416 if (next_packet == NULL && last_ifproto != NULL) {
3417 /* pass up the last list of packets */
2d21ac55
A
3418 dlil_ifproto_input(last_ifproto, pkt_first);
3419 if_proto_free(last_ifproto);
6d2010ae
A
3420 last_ifproto = NULL;
3421 }
3422 if (ifproto != NULL) {
3423 if_proto_free(ifproto);
3424 ifproto = NULL;
2d21ac55 3425 }
316670eb 3426
2d21ac55 3427 m = next_packet;
1c79356b 3428
6d2010ae
A
3429 /* update the driver's multicast filter, if needed */
3430 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
3431 ifp->if_updatemcasts = 0;
3432 if (iorefcnt == 1)
3433 ifnet_decr_iorefcnt(ifp);
91447636 3434 }
6d2010ae 3435
91447636 3436 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0);
1c79356b
A
3437}
3438
6d2010ae
A
3439errno_t
3440if_mcasts_update(struct ifnet *ifp)
3441{
3442 errno_t err;
3443
3444 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
3445 if (err == EAFNOSUPPORT)
3446 err = 0;
39236c6e
A
3447 printf("%s: %s %d suspended link-layer multicast membership(s) "
3448 "(err=%d)\n", if_name(ifp),
6d2010ae
A
3449 (err == 0 ? "successfully restored" : "failed to restore"),
3450 ifp->if_updatemcasts, err);
3451
3452 /* just return success */
3453 return (0);
3454}
3455
91447636
A
3456static int
3457dlil_event_internal(struct ifnet *ifp, struct kev_msg *event)
1c79356b 3458{
91447636 3459 struct ifnet_filter *filter;
6d2010ae
A
3460
3461 /* Get an io ref count if the interface is attached */
3462 if (!ifnet_is_attached(ifp, 1))
3463 goto done;
3464
3465 /*
3466 * Pass the event to the interface filters
3467 */
3468 lck_mtx_lock_spin(&ifp->if_flt_lock);
3469 /* prevent filter list from changing in case we drop the lock */
3470 if_flt_monitor_busy(ifp);
3471 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3472 if (filter->filt_event != NULL) {
3473 lck_mtx_unlock(&ifp->if_flt_lock);
3474
3475 filter->filt_event(filter->filt_cookie, ifp,
3476 filter->filt_protocol, event);
3477
3478 lck_mtx_lock_spin(&ifp->if_flt_lock);
91447636 3479 }
6d2010ae
A
3480 }
3481 /* we're done with the filter list */
3482 if_flt_monitor_unbusy(ifp);
3483 lck_mtx_unlock(&ifp->if_flt_lock);
3484
3485 ifnet_lock_shared(ifp);
3486 if (ifp->if_proto_hash != NULL) {
3487 int i;
3488
3489 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
3490 struct if_proto *proto;
3491
3492 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
3493 next_hash) {
3494 proto_media_event eventp =
3495 (proto->proto_kpi == kProtoKPI_v1 ?
3496 proto->kpi.v1.event :
3497 proto->kpi.v2.event);
3498
3499 if (eventp != NULL) {
3500 if_proto_ref(proto);
3501 ifnet_lock_done(ifp);
3502
3503 eventp(ifp, proto->protocol_family,
3504 event);
3505
3506 ifnet_lock_shared(ifp);
3507 if_proto_free(proto);
91447636
A
3508 }
3509 }
3510 }
91447636 3511 }
6d2010ae
A
3512 ifnet_lock_done(ifp);
3513
3514 /* Pass the event to the interface */
3515 if (ifp->if_event != NULL)
3516 ifp->if_event(ifp, event);
3517
3518 /* Release the io ref count */
3519 ifnet_decr_iorefcnt(ifp);
3520
3521done:
3522 return (kev_post_msg(event));
1c79356b
A
3523}
3524
2d21ac55 3525errno_t
6d2010ae 3526ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
1c79356b 3527{
91447636 3528 struct kev_msg kev_msg;
2d21ac55
A
3529 int result = 0;
3530
6d2010ae
A
3531 if (ifp == NULL || event == NULL)
3532 return (EINVAL);
1c79356b 3533
6d2010ae 3534 bzero(&kev_msg, sizeof (kev_msg));
91447636
A
3535 kev_msg.vendor_code = event->vendor_code;
3536 kev_msg.kev_class = event->kev_class;
3537 kev_msg.kev_subclass = event->kev_subclass;
3538 kev_msg.event_code = event->event_code;
3539 kev_msg.dv[0].data_ptr = &event->event_data[0];
3540 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
3541 kev_msg.dv[1].data_length = 0;
6d2010ae 3542
91447636 3543 result = dlil_event_internal(ifp, &kev_msg);
1c79356b 3544
6d2010ae 3545 return (result);
91447636 3546}
1c79356b 3547
2d21ac55
A
3548#if CONFIG_MACF_NET
3549#include <netinet/ip6.h>
3550#include <netinet/ip.h>
6d2010ae
A
3551static int
3552dlil_get_socket_type(struct mbuf **mp, int family, int raw)
2d21ac55
A
3553{
3554 struct mbuf *m;
3555 struct ip *ip;
3556 struct ip6_hdr *ip6;
3557 int type = SOCK_RAW;
3558
3559 if (!raw) {
3560 switch (family) {
3561 case PF_INET:
3562 m = m_pullup(*mp, sizeof(struct ip));
3563 if (m == NULL)
3564 break;
3565 *mp = m;
3566 ip = mtod(m, struct ip *);
3567 if (ip->ip_p == IPPROTO_TCP)
3568 type = SOCK_STREAM;
3569 else if (ip->ip_p == IPPROTO_UDP)
3570 type = SOCK_DGRAM;
3571 break;
3572 case PF_INET6:
3573 m = m_pullup(*mp, sizeof(struct ip6_hdr));
3574 if (m == NULL)
3575 break;
3576 *mp = m;
3577 ip6 = mtod(m, struct ip6_hdr *);
3578 if (ip6->ip6_nxt == IPPROTO_TCP)
3579 type = SOCK_STREAM;
3580 else if (ip6->ip6_nxt == IPPROTO_UDP)
3581 type = SOCK_DGRAM;
3582 break;
3583 }
3584 }
3585
3586 return (type);
3587}
3588#endif
3589
316670eb
A
3590/*
3591 * This is mostly called from the context of the DLIL input thread;
3592 * because of that there is no need for atomic operations.
3593 */
3594static __inline void
3595ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
d41d1dae 3596{
d41d1dae
A
3597 if (!(m->m_flags & M_PKTHDR))
3598 return;
3599
316670eb
A
3600 switch (m_get_traffic_class(m)) {
3601 case MBUF_TC_BE:
3602 ifp->if_tc.ifi_ibepackets++;
3603 ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len;
3604 break;
3605 case MBUF_TC_BK:
3606 ifp->if_tc.ifi_ibkpackets++;
3607 ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len;
3608 break;
3609 case MBUF_TC_VI:
3610 ifp->if_tc.ifi_ivipackets++;
3611 ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len;
3612 break;
3613 case MBUF_TC_VO:
3614 ifp->if_tc.ifi_ivopackets++;
3615 ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len;
3616 break;
3617 default:
3618 break;
3619 }
3620
3621 if (mbuf_is_traffic_class_privileged(m)) {
3622 ifp->if_tc.ifi_ipvpackets++;
3623 ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len;
3624 }
3625}
3626
3627/*
3628 * This is called from DLIL output, hence multiple threads could end
3629 * up modifying the statistics. We trade off acccuracy for performance
3630 * by not using atomic operations here.
3631 */
3632static __inline void
3633ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
3634{
3635 if (!(m->m_flags & M_PKTHDR))
3636 return;
3637
3638 switch (m_get_traffic_class(m)) {
3639 case MBUF_TC_BE:
3640 ifp->if_tc.ifi_obepackets++;
3641 ifp->if_tc.ifi_obebytes += m->m_pkthdr.len;
3642 break;
3643 case MBUF_TC_BK:
3644 ifp->if_tc.ifi_obkpackets++;
3645 ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len;
3646 break;
3647 case MBUF_TC_VI:
3648 ifp->if_tc.ifi_ovipackets++;
3649 ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len;
3650 break;
3651 case MBUF_TC_VO:
3652 ifp->if_tc.ifi_ovopackets++;
3653 ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len;
3654 break;
3655 default:
3656 break;
3657 }
3658
3659 if (mbuf_is_traffic_class_privileged(m)) {
3660 ifp->if_tc.ifi_opvpackets++;
3661 ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len;
d41d1dae 3662 }
1c79356b
A
3663}
3664
1c79356b 3665/*
91447636
A
3666 * dlil_output
3667 *
3668 * Caller should have a lock on the protocol domain if the protocol
3669 * doesn't support finer grained locking. In most cases, the lock
3670 * will be held from the socket layer and won't be released until
3671 * we return back to the socket layer.
3672 *
3673 * This does mean that we must take a protocol lock before we take
3674 * an interface lock if we're going to take both. This makes sense
3675 * because a protocol is likely to interact with an ifp while it
3676 * is under the protocol lock.
316670eb
A
3677 *
3678 * An advisory code will be returned if adv is not null. This
39236c6e 3679 * can be used to provide feedback about interface queues to the
316670eb 3680 * application.
1c79356b 3681 */
6d2010ae
A
3682errno_t
3683dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
316670eb 3684 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
6d2010ae
A
3685{
3686 char *frame_type = NULL;
3687 char *dst_linkaddr = NULL;
3688 int retval = 0;
3689 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
3690 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
3691 struct if_proto *proto = NULL;
2d21ac55
A
3692 mbuf_t m;
3693 mbuf_t send_head = NULL;
3694 mbuf_t *send_tail = &send_head;
6d2010ae 3695 int iorefcnt = 0;
316670eb 3696 u_int32_t pre = 0, post = 0;
39236c6e
A
3697 u_int32_t fpkts = 0, fbytes = 0;
3698 int32_t flen = 0;
6d2010ae 3699
39236c6e 3700 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
6d2010ae
A
3701
3702 /* Get an io refcnt if the interface is attached to prevent ifnet_detach
3703 * from happening while this operation is in progress */
3704 if (!ifnet_is_attached(ifp, 1)) {
3705 retval = ENXIO;
3706 goto cleanup;
3707 }
3708 iorefcnt = 1;
3709
3710 /* update the driver's multicast filter, if needed */
3711 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
3712 ifp->if_updatemcasts = 0;
3713
3714 frame_type = frame_type_buffer;
3715 dst_linkaddr = dst_linkaddr_buffer;
3716
91447636 3717 if (raw == 0) {
6d2010ae
A
3718 ifnet_lock_shared(ifp);
3719 /* callee holds a proto refcnt upon success */
91447636
A
3720 proto = find_attached_proto(ifp, proto_family);
3721 if (proto == NULL) {
6d2010ae 3722 ifnet_lock_done(ifp);
91447636
A
3723 retval = ENXIO;
3724 goto cleanup;
3725 }
6d2010ae 3726 ifnet_lock_done(ifp);
2d21ac55 3727 }
6d2010ae 3728
2d21ac55
A
3729preout_again:
3730 if (packetlist == NULL)
3731 goto cleanup;
6d2010ae 3732
2d21ac55
A
3733 m = packetlist;
3734 packetlist = packetlist->m_nextpkt;
3735 m->m_nextpkt = NULL;
6d2010ae 3736
2d21ac55 3737 if (raw == 0) {
6d2010ae
A
3738 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
3739 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
91447636 3740 retval = 0;
6d2010ae
A
3741 if (preoutp != NULL) {
3742 retval = preoutp(ifp, proto_family, &m, dest, route,
3743 frame_type, dst_linkaddr);
3744
3745 if (retval != 0) {
3746 if (retval == EJUSTRETURN)
3747 goto preout_again;
3748 m_freem(m);
3749 goto cleanup;
91447636 3750 }
1c79356b 3751 }
1c79356b 3752 }
2d21ac55
A
3753
3754#if CONFIG_MACF_NET
3755 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
3756 dlil_get_socket_type(&m, proto_family, raw));
39236c6e 3757 if (retval != 0) {
2d21ac55
A
3758 m_freem(m);
3759 goto cleanup;
3760 }
3761#endif
3762
3763 do {
6d2010ae 3764#if CONFIG_DTRACE
316670eb 3765 if (!raw && proto_family == PF_INET) {
6d2010ae
A
3766 struct ip *ip = mtod(m, struct ip*);
3767 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
3768 struct ip *, ip, struct ifnet *, ifp,
3769 struct ip *, ip, struct ip6_hdr *, NULL);
3770
316670eb 3771 } else if (!raw && proto_family == PF_INET6) {
6d2010ae
A
3772 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr*);
3773 DTRACE_IP6(send, struct mbuf*, m, struct inpcb *, NULL,
3774 struct ip6_hdr *, ip6, struct ifnet*, ifp,
3775 struct ip*, NULL, struct ip6_hdr *, ip6);
3776 }
3777#endif /* CONFIG_DTRACE */
3778
39236c6e 3779 if (raw == 0 && ifp->if_framer != NULL) {
7e4a7d39
A
3780 int rcvif_set = 0;
3781
3782 /*
3783 * If this is a broadcast packet that needs to be
3784 * looped back into the system, set the inbound ifp
3785 * to that of the outbound ifp. This will allow
3786 * us to determine that it is a legitimate packet
3787 * for the system. Only set the ifp if it's not
3788 * already set, just to be safe.
3789 */
3790 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
3791 m->m_pkthdr.rcvif == NULL) {
3792 m->m_pkthdr.rcvif = ifp;
3793 rcvif_set = 1;
3794 }
3795
6d2010ae 3796 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
39236c6e
A
3797 frame_type, &pre, &post);
3798 if (retval != 0) {
6d2010ae 3799 if (retval != EJUSTRETURN)
2d21ac55 3800 m_freem(m);
2d21ac55 3801 goto next;
91447636 3802 }
7e4a7d39 3803
39236c6e
A
3804 /*
3805 * For partial checksum offload, adjust the start
3806 * and stuff offsets based on the prepended header.
3807 */
3808 if ((m->m_pkthdr.csum_flags &
3809 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
3810 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
3811 m->m_pkthdr.csum_tx_stuff += pre;
3812 m->m_pkthdr.csum_tx_start += pre;
3813 }
3814
3815 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
3816 dlil_output_cksum_dbg(ifp, m, pre,
3817 proto_family);
3818
7e4a7d39
A
3819 /*
3820 * Clear the ifp if it was set above, and to be
3821 * safe, only if it is still the same as the
3822 * outbound ifp we have in context. If it was
3823 * looped back, then a copy of it was sent to the
3824 * loopback interface with the rcvif set, and we
3825 * are clearing the one that will go down to the
3826 * layer below.
3827 */
3828 if (rcvif_set && m->m_pkthdr.rcvif == ifp)
3829 m->m_pkthdr.rcvif = NULL;
91447636 3830 }
6d2010ae
A
3831
3832 /*
2d21ac55
A
3833 * Let interface filters (if any) do their thing ...
3834 */
3835 /* Do not pass VLAN tagged packets to filters PR-3586856 */
3836 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
6d2010ae
A
3837 retval = dlil_interface_filters_output(ifp,
3838 &m, proto_family);
3839 if (retval != 0) {
3840 if (retval != EJUSTRETURN)
3841 m_freem(m);
3842 goto next;
1c79356b 3843 }
1c79356b 3844 }
b7266188 3845 /*
39236c6e
A
3846 * Strip away M_PROTO1 bit prior to sending packet
3847 * to the driver as this field may be used by the driver
b7266188
A
3848 */
3849 m->m_flags &= ~M_PROTO1;
3850
2d21ac55
A
3851 /*
3852 * If the underlying interface is not capable of handling a
3853 * packet whose data portion spans across physically disjoint
3854 * pages, we need to "normalize" the packet so that we pass
3855 * down a chain of mbufs where each mbuf points to a span that
3856 * resides in the system page boundary. If the packet does
3857 * not cross page(s), the following is a no-op.
3858 */
3859 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
3860 if ((m = m_normalize(m)) == NULL)
3861 goto next;
3862 }
3863
6d2010ae
A
3864 /*
3865 * If this is a TSO packet, make sure the interface still
3866 * advertise TSO capability.
b0d623f7 3867 */
39236c6e 3868 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
6d2010ae
A
3869 retval = EMSGSIZE;
3870 m_freem(m);
3871 goto cleanup;
b0d623f7
A
3872 }
3873
39236c6e
A
3874 /*
3875 * If the packet service class is not background,
3876 * update the timestamp to indicate recent activity
3877 * on a foreground socket.
3878 */
3879 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND) &&
3880 (m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3881 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB)
3882 ifp->if_fg_sendts = net_uptime();
3883
3884 ifp_inc_traffic_class_out(ifp, m);
3885 pktap_output(ifp, proto_family, m, pre, post);
6d2010ae 3886
2d21ac55
A
3887 /*
3888 * Finally, call the driver.
3889 */
39236c6e
A
3890 if (ifp->if_eflags & IFEF_SENDLIST) {
3891 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
3892 flen += (m_pktlen(m) - (pre + post));
3893 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
3894 }
2d21ac55
A
3895 *send_tail = m;
3896 send_tail = &m->m_nextpkt;
6d2010ae 3897 } else {
39236c6e
A
3898 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
3899 flen = (m_pktlen(m) - (pre + post));
3900 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
3901 } else {
3902 flen = 0;
3903 }
6d2010ae 3904 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
39236c6e 3905 0, 0, 0, 0, 0);
316670eb
A
3906 retval = (*ifp->if_output)(ifp, m);
3907 if (retval == EQFULL || retval == EQSUSPENDED) {
3908 if (adv != NULL && adv->code == FADV_SUCCESS) {
3909 adv->code = (retval == EQFULL ?
3910 FADV_FLOW_CONTROLLED :
3911 FADV_SUSPENDED);
3912 }
3913 retval = 0;
3914 }
39236c6e
A
3915 if (retval == 0 && flen > 0) {
3916 fbytes += flen;
3917 fpkts++;
3918 }
3919 if (retval != 0 && dlil_verbose) {
3920 printf("%s: output error on %s retval = %d\n",
3921 __func__, if_name(ifp),
6d2010ae 3922 retval);
2d21ac55 3923 }
6d2010ae 3924 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
39236c6e 3925 0, 0, 0, 0, 0);
2d21ac55 3926 }
39236c6e 3927 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
2d21ac55
A
3928
3929next:
3930 m = packetlist;
39236c6e 3931 if (m != NULL) {
2d21ac55
A
3932 packetlist = packetlist->m_nextpkt;
3933 m->m_nextpkt = NULL;
3934 }
39236c6e 3935 } while (m != NULL);
d41d1dae 3936
39236c6e
A
3937 if (send_head != NULL) {
3938 VERIFY(ifp->if_eflags & IFEF_SENDLIST);
3939 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
3940 0, 0, 0, 0, 0);
316670eb
A
3941 retval = (*ifp->if_output)(ifp, send_head);
3942 if (retval == EQFULL || retval == EQSUSPENDED) {
3943 if (adv != NULL) {
3944 adv->code = (retval == EQFULL ?
3945 FADV_FLOW_CONTROLLED : FADV_SUSPENDED);
3946 }
3947 retval = 0;
3948 }
39236c6e
A
3949 if (retval == 0 && flen > 0) {
3950 fbytes += flen;
3951 fpkts++;
2d21ac55 3952 }
39236c6e
A
3953 if (retval != 0 && dlil_verbose) {
3954 printf("%s: output error on %s retval = %d\n",
3955 __func__, if_name(ifp), retval);
3956 }
3957 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 3958 }
6d2010ae 3959
39236c6e 3960 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b 3961
91447636 3962cleanup:
39236c6e
A
3963 if (fbytes > 0)
3964 ifp->if_fbytes += fbytes;
3965 if (fpkts > 0)
3966 ifp->if_fpackets += fpkts;
6d2010ae
A
3967 if (proto != NULL)
3968 if_proto_free(proto);
3969 if (packetlist) /* if any packets are left, clean up */
2d21ac55 3970 mbuf_freem_list(packetlist);
91447636
A
3971 if (retval == EJUSTRETURN)
3972 retval = 0;
6d2010ae
A
3973 if (iorefcnt == 1)
3974 ifnet_decr_iorefcnt(ifp);
3975
3976 return (retval);
1c79356b
A
3977}
3978
2d21ac55 3979errno_t
6d2010ae
A
3980ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
3981 void *ioctl_arg)
3982{
3983 struct ifnet_filter *filter;
3984 int retval = EOPNOTSUPP;
3985 int result = 0;
3986
2d21ac55 3987 if (ifp == NULL || ioctl_code == 0)
6d2010ae
A
3988 return (EINVAL);
3989
3990 /* Get an io ref count if the interface is attached */
3991 if (!ifnet_is_attached(ifp, 1))
3992 return (EOPNOTSUPP);
3993
91447636
A
3994 /* Run the interface filters first.
3995 * We want to run all filters before calling the protocol,
3996 * interface family, or interface.
3997 */
6d2010ae
A
3998 lck_mtx_lock_spin(&ifp->if_flt_lock);
3999 /* prevent filter list from changing in case we drop the lock */
4000 if_flt_monitor_busy(ifp);
91447636 4001 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
6d2010ae
A
4002 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
4003 filter->filt_protocol == proto_fam)) {
4004 lck_mtx_unlock(&ifp->if_flt_lock);
4005
4006 result = filter->filt_ioctl(filter->filt_cookie, ifp,
4007 proto_fam, ioctl_code, ioctl_arg);
4008
4009 lck_mtx_lock_spin(&ifp->if_flt_lock);
4010
91447636
A
4011 /* Only update retval if no one has handled the ioctl */
4012 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4013 if (result == ENOTSUP)
4014 result = EOPNOTSUPP;
4015 retval = result;
6d2010ae
A
4016 if (retval != 0 && retval != EOPNOTSUPP) {
4017 /* we're done with the filter list */
4018 if_flt_monitor_unbusy(ifp);
4019 lck_mtx_unlock(&ifp->if_flt_lock);
91447636
A
4020 goto cleanup;
4021 }
4022 }
4023 }
4024 }
6d2010ae
A
4025 /* we're done with the filter list */
4026 if_flt_monitor_unbusy(ifp);
4027 lck_mtx_unlock(&ifp->if_flt_lock);
4028
91447636 4029 /* Allow the protocol to handle the ioctl */
6d2010ae
A
4030 if (proto_fam != 0) {
4031 struct if_proto *proto;
4032
4033 /* callee holds a proto refcnt upon success */
4034 ifnet_lock_shared(ifp);
4035 proto = find_attached_proto(ifp, proto_fam);
4036 ifnet_lock_done(ifp);
4037 if (proto != NULL) {
4038 proto_media_ioctl ioctlp =
4039 (proto->proto_kpi == kProtoKPI_v1 ?
4040 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
91447636 4041 result = EOPNOTSUPP;
6d2010ae
A
4042 if (ioctlp != NULL)
4043 result = ioctlp(ifp, proto_fam, ioctl_code,
4044 ioctl_arg);
4045 if_proto_free(proto);
4046
91447636
A
4047 /* Only update retval if no one has handled the ioctl */
4048 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4049 if (result == ENOTSUP)
4050 result = EOPNOTSUPP;
4051 retval = result;
6d2010ae 4052 if (retval && retval != EOPNOTSUPP)
91447636 4053 goto cleanup;
91447636
A
4054 }
4055 }
4056 }
6d2010ae 4057
91447636 4058 /* retval is either 0 or EOPNOTSUPP */
6d2010ae 4059
91447636
A
4060 /*
4061 * Let the interface handle this ioctl.
4062 * If it returns EOPNOTSUPP, ignore that, we may have
4063 * already handled this in the protocol or family.
4064 */
6d2010ae 4065 if (ifp->if_ioctl)
91447636 4066 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
6d2010ae 4067
91447636
A
4068 /* Only update retval if no one has handled the ioctl */
4069 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4070 if (result == ENOTSUP)
4071 result = EOPNOTSUPP;
4072 retval = result;
4073 if (retval && retval != EOPNOTSUPP) {
4074 goto cleanup;
4075 }
4076 }
1c79356b 4077
6d2010ae 4078cleanup:
91447636
A
4079 if (retval == EJUSTRETURN)
4080 retval = 0;
6d2010ae
A
4081
4082 ifnet_decr_iorefcnt(ifp);
4083
4084 return (retval);
91447636 4085}
1c79356b 4086
91447636 4087__private_extern__ errno_t
6d2010ae 4088dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
91447636
A
4089{
4090 errno_t error = 0;
6d2010ae
A
4091
4092
4093 if (ifp->if_set_bpf_tap) {
4094 /* Get an io reference on the interface if it is attached */
4095 if (!ifnet_is_attached(ifp, 1))
4096 return ENXIO;
91447636 4097 error = ifp->if_set_bpf_tap(ifp, mode, callback);
6d2010ae
A
4098 ifnet_decr_iorefcnt(ifp);
4099 }
4100 return (error);
1c79356b
A
4101}
4102
2d21ac55 4103errno_t
6d2010ae
A
4104dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4105 struct sockaddr *ll_addr, size_t ll_len)
1c79356b 4106{
91447636
A
4107 errno_t result = EOPNOTSUPP;
4108 struct if_proto *proto;
4109 const struct sockaddr *verify;
2d21ac55 4110 proto_media_resolve_multi resolvep;
6d2010ae
A
4111
4112 if (!ifnet_is_attached(ifp, 1))
4113 return result;
4114
91447636 4115 bzero(ll_addr, ll_len);
6d2010ae
A
4116
4117 /* Call the protocol first; callee holds a proto refcnt upon success */
4118 ifnet_lock_shared(ifp);
91447636 4119 proto = find_attached_proto(ifp, proto_addr->sa_family);
6d2010ae 4120 ifnet_lock_done(ifp);
2d21ac55 4121 if (proto != NULL) {
6d2010ae
A
4122 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4123 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
2d21ac55 4124 if (resolvep != NULL)
6d2010ae 4125 result = resolvep(ifp, proto_addr,
316670eb 4126 (struct sockaddr_dl*)(void *)ll_addr, ll_len);
6d2010ae 4127 if_proto_free(proto);
91447636 4128 }
6d2010ae 4129
91447636
A
4130 /* Let the interface verify the multicast address */
4131 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4132 if (result == 0)
4133 verify = ll_addr;
4134 else
4135 verify = proto_addr;
4136 result = ifp->if_check_multi(ifp, verify);
4137 }
6d2010ae
A
4138
4139 ifnet_decr_iorefcnt(ifp);
4140 return (result);
91447636 4141}
1c79356b 4142
91447636 4143__private_extern__ errno_t
6d2010ae
A
4144dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4145 const struct sockaddr_dl* sender_hw, const struct sockaddr* sender_proto,
4146 const struct sockaddr_dl* target_hw, const struct sockaddr* target_proto)
91447636
A
4147{
4148 struct if_proto *proto;
4149 errno_t result = 0;
6d2010ae
A
4150
4151 /* callee holds a proto refcnt upon success */
4152 ifnet_lock_shared(ifp);
91447636 4153 proto = find_attached_proto(ifp, target_proto->sa_family);
6d2010ae 4154 ifnet_lock_done(ifp);
2d21ac55 4155 if (proto == NULL) {
91447636 4156 result = ENOTSUP;
6d2010ae 4157 } else {
2d21ac55 4158 proto_media_send_arp arpp;
6d2010ae
A
4159 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4160 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
39236c6e 4161 if (arpp == NULL) {
2d21ac55 4162 result = ENOTSUP;
39236c6e
A
4163 } else {
4164 switch (arpop) {
4165 case ARPOP_REQUEST:
4166 arpstat.txrequests++;
4167 if (target_hw != NULL)
4168 arpstat.txurequests++;
4169 break;
4170 case ARPOP_REPLY:
4171 arpstat.txreplies++;
4172 break;
4173 }
6d2010ae
A
4174 result = arpp(ifp, arpop, sender_hw, sender_proto,
4175 target_hw, target_proto);
39236c6e 4176 }
6d2010ae 4177 if_proto_free(proto);
91447636 4178 }
6d2010ae
A
4179
4180 return (result);
91447636 4181}
1c79356b 4182
39236c6e
A
4183struct net_thread_marks { };
4184static const struct net_thread_marks net_thread_marks_base = { };
4185
4186__private_extern__ const net_thread_marks_t net_thread_marks_none =
4187 &net_thread_marks_base;
4188
4189__private_extern__ net_thread_marks_t
4190net_thread_marks_push(u_int32_t push)
316670eb 4191{
39236c6e
A
4192 static const char *const base = (const void*)&net_thread_marks_base;
4193 u_int32_t pop = 0;
4194
4195 if (push != 0) {
4196 struct uthread *uth = get_bsdthread_info(current_thread());
4197
4198 pop = push & ~uth->uu_network_marks;
4199 if (pop != 0)
4200 uth->uu_network_marks |= pop;
4201 }
4202
4203 return ((net_thread_marks_t)&base[pop]);
316670eb
A
4204}
4205
39236c6e
A
4206__private_extern__ net_thread_marks_t
4207net_thread_unmarks_push(u_int32_t unpush)
316670eb 4208{
39236c6e
A
4209 static const char *const base = (const void*)&net_thread_marks_base;
4210 u_int32_t unpop = 0;
4211
4212 if (unpush != 0) {
4213 struct uthread *uth = get_bsdthread_info(current_thread());
316670eb 4214
39236c6e
A
4215 unpop = unpush & uth->uu_network_marks;
4216 if (unpop != 0)
4217 uth->uu_network_marks &= ~unpop;
4218 }
4219
4220 return ((net_thread_marks_t)&base[unpop]);
316670eb
A
4221}
4222
4223__private_extern__ void
39236c6e 4224net_thread_marks_pop(net_thread_marks_t popx)
316670eb 4225{
39236c6e
A
4226 static const char *const base = (const void*)&net_thread_marks_base;
4227 ptrdiff_t pop = (caddr_t)popx - (caddr_t)base;
316670eb 4228
39236c6e
A
4229 if (pop != 0) {
4230 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4231 struct uthread *uth = get_bsdthread_info(current_thread());
4232
4233 VERIFY((pop & ones) == pop);
4234 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
4235 uth->uu_network_marks &= ~pop;
4236 }
4237}
4238
4239__private_extern__ void
4240net_thread_unmarks_pop(net_thread_marks_t unpopx)
4241{
4242 static const char *const base = (const void*)&net_thread_marks_base;
4243 ptrdiff_t unpop = (caddr_t)unpopx - (caddr_t)base;
4244
4245 if (unpop != 0) {
4246 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4247 struct uthread *uth = get_bsdthread_info(current_thread());
4248
4249 VERIFY((unpop & ones) == unpop);
4250 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
4251 uth->uu_network_marks |= unpop;
4252 }
4253}
4254
4255__private_extern__ u_int32_t
4256net_thread_is_marked(u_int32_t check)
4257{
4258 if (check != 0) {
4259 struct uthread *uth = get_bsdthread_info(current_thread());
4260 return (uth->uu_network_marks & check);
4261 }
4262 else
4263 return (0);
4264}
4265
4266__private_extern__ u_int32_t
4267net_thread_is_unmarked(u_int32_t check)
4268{
4269 if (check != 0) {
4270 struct uthread *uth = get_bsdthread_info(current_thread());
4271 return (~uth->uu_network_marks & check);
4272 }
4273 else
4274 return (0);
316670eb
A
4275}
4276
2d21ac55
A
4277static __inline__ int
4278_is_announcement(const struct sockaddr_in * sender_sin,
6d2010ae 4279 const struct sockaddr_in * target_sin)
2d21ac55
A
4280{
4281 if (sender_sin == NULL) {
6d2010ae 4282 return (FALSE);
2d21ac55
A
4283 }
4284 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
4285}
4286
91447636 4287__private_extern__ errno_t
6d2010ae
A
4288dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw,
4289 const struct sockaddr* sender_proto, const struct sockaddr_dl* target_hw,
316670eb 4290 const struct sockaddr* target_proto0, u_int32_t rtflags)
91447636
A
4291{
4292 errno_t result = 0;
2d21ac55
A
4293 const struct sockaddr_in * sender_sin;
4294 const struct sockaddr_in * target_sin;
316670eb
A
4295 struct sockaddr_inarp target_proto_sinarp;
4296 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
6d2010ae
A
4297
4298 if (target_proto == NULL || (sender_proto != NULL &&
4299 sender_proto->sa_family != target_proto->sa_family))
4300 return (EINVAL);
4301
316670eb
A
4302 /*
4303 * If the target is a (default) router, provide that
4304 * information to the send_arp callback routine.
4305 */
4306 if (rtflags & RTF_ROUTER) {
4307 bcopy(target_proto, &target_proto_sinarp,
4308 sizeof (struct sockaddr_in));
4309 target_proto_sinarp.sin_other |= SIN_ROUTER;
4310 target_proto = (struct sockaddr *)&target_proto_sinarp;
4311 }
4312
91447636
A
4313 /*
4314 * If this is an ARP request and the target IP is IPv4LL,
2d21ac55
A
4315 * send the request on all interfaces. The exception is
4316 * an announcement, which must only appear on the specific
4317 * interface.
91447636 4318 */
316670eb
A
4319 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
4320 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
6d2010ae
A
4321 if (target_proto->sa_family == AF_INET &&
4322 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4323 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4324 !_is_announcement(target_sin, sender_sin)) {
91447636
A
4325 ifnet_t *ifp_list;
4326 u_int32_t count;
4327 u_int32_t ifp_on;
6d2010ae 4328
91447636
A
4329 result = ENOTSUP;
4330
4331 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4332 for (ifp_on = 0; ifp_on < count; ifp_on++) {
6d2010ae
A
4333 errno_t new_result;
4334 ifaddr_t source_hw = NULL;
4335 ifaddr_t source_ip = NULL;
4336 struct sockaddr_in source_ip_copy;
4337 struct ifnet *cur_ifp = ifp_list[ifp_on];
4338
91447636 4339 /*
6d2010ae
A
4340 * Only arp on interfaces marked for IPv4LL
4341 * ARPing. This may mean that we don't ARP on
4342 * the interface the subnet route points to.
91447636 4343 */
6d2010ae 4344 if (!(cur_ifp->if_eflags & IFEF_ARPLL))
91447636 4345 continue;
b0d623f7 4346
91447636 4347 /* Find the source IP address */
6d2010ae
A
4348 ifnet_lock_shared(cur_ifp);
4349 source_hw = cur_ifp->if_lladdr;
4350 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4351 ifa_link) {
4352 IFA_LOCK(source_ip);
4353 if (source_ip->ifa_addr != NULL &&
4354 source_ip->ifa_addr->sa_family ==
4355 AF_INET) {
4356 /* Copy the source IP address */
4357 source_ip_copy =
4358 *(struct sockaddr_in *)
316670eb 4359 (void *)source_ip->ifa_addr;
6d2010ae 4360 IFA_UNLOCK(source_ip);
91447636
A
4361 break;
4362 }
6d2010ae 4363 IFA_UNLOCK(source_ip);
91447636 4364 }
6d2010ae 4365
91447636
A
4366 /* No IP Source, don't arp */
4367 if (source_ip == NULL) {
6d2010ae 4368 ifnet_lock_done(cur_ifp);
91447636
A
4369 continue;
4370 }
6d2010ae
A
4371
4372 IFA_ADDREF(source_hw);
4373 ifnet_lock_done(cur_ifp);
4374
91447636 4375 /* Send the ARP */
6d2010ae 4376 new_result = dlil_send_arp_internal(cur_ifp,
316670eb
A
4377 arpop, (struct sockaddr_dl *)(void *)
4378 source_hw->ifa_addr,
6d2010ae
A
4379 (struct sockaddr *)&source_ip_copy, NULL,
4380 target_proto);
b0d623f7 4381
6d2010ae 4382 IFA_REMREF(source_hw);
91447636
A
4383 if (result == ENOTSUP) {
4384 result = new_result;
4385 }
4386 }
6d2010ae 4387 ifnet_list_free(ifp_list);
91447636 4388 }
6d2010ae
A
4389 } else {
4390 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4391 sender_proto, target_hw, target_proto);
91447636 4392 }
6d2010ae
A
4393
4394 return (result);
91447636 4395}
1c79356b 4396
6d2010ae
A
4397/*
4398 * Caller must hold ifnet head lock.
4399 */
4400static int
4401ifnet_lookup(struct ifnet *ifp)
91447636 4402{
6d2010ae
A
4403 struct ifnet *_ifp;
4404
4405 lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
4406 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
4407 if (_ifp == ifp)
91447636 4408 break;
6d2010ae
A
4409 }
4410 return (_ifp != NULL);
91447636 4411}
6d2010ae
A
4412/*
4413 * Caller has to pass a non-zero refio argument to get a
4414 * IO reference count. This will prevent ifnet_detach from
4415 * being called when there are outstanding io reference counts.
91447636 4416 */
6d2010ae
A
4417int
4418ifnet_is_attached(struct ifnet *ifp, int refio)
4419{
4420 int ret;
4421
4422 lck_mtx_lock_spin(&ifp->if_ref_lock);
4423 if ((ret = ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
4424 IFRF_ATTACHED))) {
4425 if (refio > 0)
4426 ifp->if_refio++;
4427 }
4428 lck_mtx_unlock(&ifp->if_ref_lock);
4429
4430 return (ret);
4431}
4432
4433void
4434ifnet_decr_iorefcnt(struct ifnet *ifp)
4435{
4436 lck_mtx_lock_spin(&ifp->if_ref_lock);
4437 VERIFY(ifp->if_refio > 0);
4438 VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) != 0);
4439 ifp->if_refio--;
4440
4441 /* if there are no more outstanding io references, wakeup the
4442 * ifnet_detach thread if detaching flag is set.
4443 */
4444 if (ifp->if_refio == 0 &&
4445 (ifp->if_refflags & IFRF_DETACHING) != 0) {
6d2010ae 4446 wakeup(&(ifp->if_refio));
91447636 4447 }
6d2010ae
A
4448 lck_mtx_unlock(&ifp->if_ref_lock);
4449}
b0d623f7 4450
6d2010ae
A
4451static void
4452dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
4453{
4454 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
4455 ctrace_t *tr;
4456 u_int32_t idx;
4457 u_int16_t *cnt;
1c79356b 4458
6d2010ae
A
4459 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
4460 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
4461 /* NOTREACHED */
4462 }
4463
4464 if (refhold) {
4465 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
4466 tr = dl_if_dbg->dldbg_if_refhold;
4467 } else {
4468 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
4469 tr = dl_if_dbg->dldbg_if_refrele;
4470 }
4471
4472 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
4473 ctrace_record(&tr[idx]);
91447636 4474}
1c79356b 4475
6d2010ae
A
4476errno_t
4477dlil_if_ref(struct ifnet *ifp)
4478{
4479 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4480
4481 if (dl_if == NULL)
4482 return (EINVAL);
4483
4484 lck_mtx_lock_spin(&dl_if->dl_if_lock);
4485 ++dl_if->dl_if_refcnt;
4486 if (dl_if->dl_if_refcnt == 0) {
4487 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
4488 /* NOTREACHED */
4489 }
4490 if (dl_if->dl_if_trace != NULL)
4491 (*dl_if->dl_if_trace)(dl_if, TRUE);
4492 lck_mtx_unlock(&dl_if->dl_if_lock);
4493
4494 return (0);
91447636 4495}
1c79356b 4496
6d2010ae
A
4497errno_t
4498dlil_if_free(struct ifnet *ifp)
4499{
4500 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4501
4502 if (dl_if == NULL)
4503 return (EINVAL);
4504
4505 lck_mtx_lock_spin(&dl_if->dl_if_lock);
4506 if (dl_if->dl_if_refcnt == 0) {
4507 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
4508 /* NOTREACHED */
4509 }
4510 --dl_if->dl_if_refcnt;
4511 if (dl_if->dl_if_trace != NULL)
4512 (*dl_if->dl_if_trace)(dl_if, FALSE);
4513 lck_mtx_unlock(&dl_if->dl_if_lock);
4514
4515 return (0);
4516}
1c79356b 4517
2d21ac55 4518static errno_t
6d2010ae
A
4519dlil_attach_protocol_internal(struct if_proto *proto,
4520 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count)
91447636 4521{
6d2010ae 4522 struct kev_dl_proto_data ev_pr_data;
91447636
A
4523 struct ifnet *ifp = proto->ifp;
4524 int retval = 0;
b0d623f7 4525 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
6d2010ae
A
4526 struct if_proto *prev_proto;
4527 struct if_proto *_proto;
4528
4529 /* callee holds a proto refcnt upon success */
4530 ifnet_lock_exclusive(ifp);
4531 _proto = find_attached_proto(ifp, proto->protocol_family);
4532 if (_proto != NULL) {
91447636 4533 ifnet_lock_done(ifp);
6d2010ae
A
4534 if_proto_free(_proto);
4535 return (EEXIST);
91447636 4536 }
6d2010ae 4537
91447636
A
4538 /*
4539 * Call family module add_proto routine so it can refine the
4540 * demux descriptors as it wishes.
4541 */
6d2010ae
A
4542 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
4543 demux_count);
91447636 4544 if (retval) {
6d2010ae
A
4545 ifnet_lock_done(ifp);
4546 return (retval);
91447636 4547 }
6d2010ae 4548
91447636
A
4549 /*
4550 * Insert the protocol in the hash
4551 */
6d2010ae
A
4552 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
4553 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
4554 prev_proto = SLIST_NEXT(prev_proto, next_hash);
4555 if (prev_proto)
4556 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
4557 else
4558 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
4559 proto, next_hash);
4560
4561 /* hold a proto refcnt for attach */
4562 if_proto_ref(proto);
1c79356b 4563
91447636 4564 /*
6d2010ae
A
4565 * The reserved field carries the number of protocol still attached
4566 * (subject to change)
91447636 4567 */
91447636
A
4568 ev_pr_data.proto_family = proto->protocol_family;
4569 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
6d2010ae
A
4570 ifnet_lock_done(ifp);
4571
4572 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
4573 (struct net_event_data *)&ev_pr_data,
4574 sizeof (struct kev_dl_proto_data));
4575 return (retval);
91447636 4576}
0b4e3aa0 4577
2d21ac55
A
4578errno_t
4579ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 4580 const struct ifnet_attach_proto_param *proto_details)
91447636
A
4581{
4582 int retval = 0;
4583 struct if_proto *ifproto = NULL;
6d2010ae
A
4584
4585 ifnet_head_lock_shared();
4586 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4587 retval = EINVAL;
4588 goto end;
4589 }
4590 /* Check that the interface is in the global list */
4591 if (!ifnet_lookup(ifp)) {
4592 retval = ENXIO;
4593 goto end;
4594 }
4595
4596 ifproto = zalloc(dlif_proto_zone);
4597 if (ifproto == NULL) {
91447636
A
4598 retval = ENOMEM;
4599 goto end;
4600 }
6d2010ae
A
4601 bzero(ifproto, dlif_proto_size);
4602
4603 /* refcnt held above during lookup */
91447636
A
4604 ifproto->ifp = ifp;
4605 ifproto->protocol_family = protocol;
4606 ifproto->proto_kpi = kProtoKPI_v1;
4607 ifproto->kpi.v1.input = proto_details->input;
4608 ifproto->kpi.v1.pre_output = proto_details->pre_output;
4609 ifproto->kpi.v1.event = proto_details->event;
4610 ifproto->kpi.v1.ioctl = proto_details->ioctl;
4611 ifproto->kpi.v1.detached = proto_details->detached;
4612 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
4613 ifproto->kpi.v1.send_arp = proto_details->send_arp;
6d2010ae 4614
2d21ac55 4615 retval = dlil_attach_protocol_internal(ifproto,
6d2010ae
A
4616 proto_details->demux_list, proto_details->demux_count);
4617
4618 if (dlil_verbose) {
39236c6e
A
4619 printf("%s: attached v1 protocol %d\n", if_name(ifp),
4620 protocol);
6d2010ae
A
4621 }
4622
9bccf70c 4623end:
6d2010ae 4624 if (retval != 0 && retval != EEXIST && ifp != NULL) {
39236c6e
A
4625 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
4626 if_name(ifp), protocol, retval);
6d2010ae
A
4627 }
4628 ifnet_head_done();
4629 if (retval != 0 && ifproto != NULL)
4630 zfree(dlif_proto_zone, ifproto);
4631 return (retval);
1c79356b
A
4632}
4633
2d21ac55
A
4634errno_t
4635ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 4636 const struct ifnet_attach_proto_param_v2 *proto_details)
91447636 4637{
2d21ac55 4638 int retval = 0;
91447636 4639 struct if_proto *ifproto = NULL;
6d2010ae
A
4640
4641 ifnet_head_lock_shared();
4642 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4643 retval = EINVAL;
4644 goto end;
4645 }
4646 /* Check that the interface is in the global list */
4647 if (!ifnet_lookup(ifp)) {
4648 retval = ENXIO;
4649 goto end;
4650 }
4651
4652 ifproto = zalloc(dlif_proto_zone);
4653 if (ifproto == NULL) {
91447636
A
4654 retval = ENOMEM;
4655 goto end;
4656 }
2d21ac55 4657 bzero(ifproto, sizeof(*ifproto));
6d2010ae
A
4658
4659 /* refcnt held above during lookup */
2d21ac55
A
4660 ifproto->ifp = ifp;
4661 ifproto->protocol_family = protocol;
4662 ifproto->proto_kpi = kProtoKPI_v2;
4663 ifproto->kpi.v2.input = proto_details->input;
4664 ifproto->kpi.v2.pre_output = proto_details->pre_output;
4665 ifproto->kpi.v2.event = proto_details->event;
4666 ifproto->kpi.v2.ioctl = proto_details->ioctl;
4667 ifproto->kpi.v2.detached = proto_details->detached;
4668 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
4669 ifproto->kpi.v2.send_arp = proto_details->send_arp;
1c79356b 4670
6d2010ae
A
4671 retval = dlil_attach_protocol_internal(ifproto,
4672 proto_details->demux_list, proto_details->demux_count);
1c79356b 4673
6d2010ae 4674 if (dlil_verbose) {
39236c6e
A
4675 printf("%s: attached v2 protocol %d\n", if_name(ifp),
4676 protocol);
91447636 4677 }
6d2010ae
A
4678
4679end:
4680 if (retval != 0 && retval != EEXIST && ifp != NULL) {
39236c6e
A
4681 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
4682 if_name(ifp), protocol, retval);
2d21ac55 4683 }
6d2010ae
A
4684 ifnet_head_done();
4685 if (retval != 0 && ifproto != NULL)
4686 zfree(dlif_proto_zone, ifproto);
4687 return (retval);
91447636 4688}
1c79356b 4689
2d21ac55
A
4690errno_t
4691ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
91447636
A
4692{
4693 struct if_proto *proto = NULL;
4694 int retval = 0;
6d2010ae
A
4695
4696 if (ifp == NULL || proto_family == 0) {
4697 retval = EINVAL;
91447636
A
4698 goto end;
4699 }
6d2010ae
A
4700
4701 ifnet_lock_exclusive(ifp);
4702 /* callee holds a proto refcnt upon success */
91447636 4703 proto = find_attached_proto(ifp, proto_family);
91447636
A
4704 if (proto == NULL) {
4705 retval = ENXIO;
6d2010ae 4706 ifnet_lock_done(ifp);
91447636
A
4707 goto end;
4708 }
6d2010ae
A
4709
4710 /* call family module del_proto */
91447636
A
4711 if (ifp->if_del_proto)
4712 ifp->if_del_proto(ifp, proto->protocol_family);
1c79356b 4713
6d2010ae
A
4714 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
4715 proto, if_proto, next_hash);
4716
4717 if (proto->proto_kpi == kProtoKPI_v1) {
4718 proto->kpi.v1.input = ifproto_media_input_v1;
4719 proto->kpi.v1.pre_output= ifproto_media_preout;
4720 proto->kpi.v1.event = ifproto_media_event;
4721 proto->kpi.v1.ioctl = ifproto_media_ioctl;
4722 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
4723 proto->kpi.v1.send_arp = ifproto_media_send_arp;
4724 } else {
4725 proto->kpi.v2.input = ifproto_media_input_v2;
4726 proto->kpi.v2.pre_output = ifproto_media_preout;
4727 proto->kpi.v2.event = ifproto_media_event;
4728 proto->kpi.v2.ioctl = ifproto_media_ioctl;
4729 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
4730 proto->kpi.v2.send_arp = ifproto_media_send_arp;
4731 }
4732 proto->detached = 1;
4733 ifnet_lock_done(ifp);
4734
4735 if (dlil_verbose) {
39236c6e
A
4736 printf("%s: detached %s protocol %d\n", if_name(ifp),
4737 (proto->proto_kpi == kProtoKPI_v1) ?
6d2010ae
A
4738 "v1" : "v2", proto_family);
4739 }
4740
4741 /* release proto refcnt held during protocol attach */
4742 if_proto_free(proto);
91447636
A
4743
4744 /*
6d2010ae
A
4745 * Release proto refcnt held during lookup; the rest of
4746 * protocol detach steps will happen when the last proto
4747 * reference is released.
91447636 4748 */
6d2010ae
A
4749 if_proto_free(proto);
4750
91447636 4751end:
6d2010ae 4752 return (retval);
91447636 4753}
1c79356b 4754
6d2010ae
A
4755
4756static errno_t
4757ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
4758 struct mbuf *packet, char *header)
91447636 4759{
6d2010ae
A
4760#pragma unused(ifp, protocol, packet, header)
4761 return (ENXIO);
4762}
4763
4764static errno_t
4765ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
4766 struct mbuf *packet)
4767{
4768#pragma unused(ifp, protocol, packet)
4769 return (ENXIO);
4770
4771}
4772
4773static errno_t
4774ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
4775 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
4776 char *link_layer_dest)
4777{
4778#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
4779 return (ENXIO);
9bccf70c 4780
91447636 4781}
9bccf70c 4782
91447636 4783static void
6d2010ae
A
4784ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
4785 const struct kev_msg *event)
4786{
4787#pragma unused(ifp, protocol, event)
4788}
4789
4790static errno_t
4791ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
4792 unsigned long command, void *argument)
4793{
4794#pragma unused(ifp, protocol, command, argument)
4795 return (ENXIO);
4796}
4797
4798static errno_t
4799ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
4800 struct sockaddr_dl *out_ll, size_t ll_len)
4801{
4802#pragma unused(ifp, proto_addr, out_ll, ll_len)
4803 return (ENXIO);
4804}
4805
4806static errno_t
4807ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
4808 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4809 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4810{
4811#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
4812 return (ENXIO);
91447636 4813}
9bccf70c 4814
91447636
A
4815extern int if_next_index(void);
4816
2d21ac55 4817errno_t
6d2010ae 4818ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
91447636 4819{
91447636 4820 struct ifnet *tmp_if;
6d2010ae
A
4821 struct ifaddr *ifa;
4822 struct if_data_internal if_data_saved;
4823 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
316670eb
A
4824 struct dlil_threading_info *dl_inp;
4825 u_int32_t sflags = 0;
4826 int err;
1c79356b 4827
6d2010ae
A
4828 if (ifp == NULL)
4829 return (EINVAL);
4830
7ddcb079
A
4831 /*
4832 * Serialize ifnet attach using dlil_ifnet_lock, in order to
4833 * prevent the interface from being configured while it is
4834 * embryonic, as ifnet_head_lock is dropped and reacquired
4835 * below prior to marking the ifnet with IFRF_ATTACHED.
4836 */
4837 dlil_if_lock();
6d2010ae 4838 ifnet_head_lock_exclusive();
91447636
A
4839 /* Verify we aren't already on the list */
4840 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
4841 if (tmp_if == ifp) {
4842 ifnet_head_done();
7ddcb079 4843 dlil_if_unlock();
6d2010ae 4844 return (EEXIST);
91447636
A
4845 }
4846 }
0b4e3aa0 4847
6d2010ae
A
4848 lck_mtx_lock_spin(&ifp->if_ref_lock);
4849 if (ifp->if_refflags & IFRF_ATTACHED) {
316670eb 4850 panic_plain("%s: flags mismatch (attached set) ifp=%p",
6d2010ae
A
4851 __func__, ifp);
4852 /* NOTREACHED */
91447636 4853 }
6d2010ae 4854 lck_mtx_unlock(&ifp->if_ref_lock);
1c79356b 4855
6d2010ae 4856 ifnet_lock_exclusive(ifp);
b0d623f7 4857
6d2010ae
A
4858 /* Sanity check */
4859 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
4860 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
4861
4862 if (ll_addr != NULL) {
4863 if (ifp->if_addrlen == 0) {
4864 ifp->if_addrlen = ll_addr->sdl_alen;
4865 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
4866 ifnet_lock_done(ifp);
4867 ifnet_head_done();
7ddcb079 4868 dlil_if_unlock();
6d2010ae 4869 return (EINVAL);
b0d623f7
A
4870 }
4871 }
4872
91447636 4873 /*
b0d623f7 4874 * Allow interfaces without protocol families to attach
91447636
A
4875 * only if they have the necessary fields filled out.
4876 */
6d2010ae
A
4877 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
4878 DLIL_PRINTF("%s: Attempt to attach interface without "
4879 "family module - %d\n", __func__, ifp->if_family);
4880 ifnet_lock_done(ifp);
4881 ifnet_head_done();
7ddcb079 4882 dlil_if_unlock();
6d2010ae 4883 return (ENODEV);
1c79356b
A
4884 }
4885
6d2010ae
A
4886 /* Allocate protocol hash table */
4887 VERIFY(ifp->if_proto_hash == NULL);
4888 ifp->if_proto_hash = zalloc(dlif_phash_zone);
4889 if (ifp->if_proto_hash == NULL) {
4890 ifnet_lock_done(ifp);
4891 ifnet_head_done();
7ddcb079 4892 dlil_if_unlock();
6d2010ae
A
4893 return (ENOBUFS);
4894 }
4895 bzero(ifp->if_proto_hash, dlif_phash_size);
91447636 4896
6d2010ae
A
4897 lck_mtx_lock_spin(&ifp->if_flt_lock);
4898 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
91447636 4899 TAILQ_INIT(&ifp->if_flt_head);
6d2010ae
A
4900 VERIFY(ifp->if_flt_busy == 0);
4901 VERIFY(ifp->if_flt_waiters == 0);
4902 lck_mtx_unlock(&ifp->if_flt_lock);
4903
4904 VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
4905 TAILQ_INIT(&ifp->if_prefixhead);
4906
4907 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
4908 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
91447636 4909 LIST_INIT(&ifp->if_multiaddrs);
6d2010ae 4910 }
1c79356b 4911
6d2010ae
A
4912 VERIFY(ifp->if_allhostsinm == NULL);
4913 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
4914 TAILQ_INIT(&ifp->if_addrhead);
4915
6d2010ae
A
4916 if (ifp->if_index == 0) {
4917 int idx = if_next_index();
4918
4919 if (idx == -1) {
4920 ifp->if_index = 0;
4921 ifnet_lock_done(ifp);
4922 ifnet_head_done();
7ddcb079 4923 dlil_if_unlock();
6d2010ae 4924 return (ENOBUFS);
1c79356b 4925 }
6d2010ae
A
4926 ifp->if_index = idx;
4927 }
4928 /* There should not be anything occupying this slot */
4929 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
4930
4931 /* allocate (if needed) and initialize a link address */
4932 VERIFY(!(dl_if->dl_if_flags & DLIF_REUSE) || ifp->if_lladdr != NULL);
4933 ifa = dlil_alloc_lladdr(ifp, ll_addr);
4934 if (ifa == NULL) {
4935 ifnet_lock_done(ifp);
4936 ifnet_head_done();
7ddcb079 4937 dlil_if_unlock();
6d2010ae
A
4938 return (ENOBUFS);
4939 }
4940
4941 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
4942 ifnet_addrs[ifp->if_index - 1] = ifa;
4943
4944 /* make this address the first on the list */
4945 IFA_LOCK(ifa);
4946 /* hold a reference for ifnet_addrs[] */
4947 IFA_ADDREF_LOCKED(ifa);
4948 /* if_attach_link_ifa() holds a reference for ifa_link */
4949 if_attach_link_ifa(ifp, ifa);
4950 IFA_UNLOCK(ifa);
4951
2d21ac55 4952#if CONFIG_MACF_NET
6d2010ae 4953 mac_ifnet_label_associate(ifp);
2d21ac55 4954#endif
2d21ac55 4955
6d2010ae
A
4956 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
4957 ifindex2ifnet[ifp->if_index] = ifp;
2d21ac55 4958
6d2010ae
A
4959 /* Hold a reference to the underlying dlil_ifnet */
4960 ifnet_reference(ifp);
4961
316670eb
A
4962 /* Clear stats (save and restore other fields that we care) */
4963 if_data_saved = ifp->if_data;
4964 bzero(&ifp->if_data, sizeof (ifp->if_data));
4965 ifp->if_data.ifi_type = if_data_saved.ifi_type;
4966 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
4967 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
4968 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
4969 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
4970 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
4971 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
4972 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
4973 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
4974 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
4975 ifnet_touch_lastchange(ifp);
4976
4977 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
4978 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED);
4979
4980 /* By default, use SFB and enable flow advisory */
4981 sflags = PKTSCHEDF_QALG_SFB;
4982 if (if_flowadv)
4983 sflags |= PKTSCHEDF_QALG_FLOWCTL;
4984
4985 /* Initialize transmit queue(s) */
4986 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
4987 if (err != 0) {
4988 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
4989 "err=%d", __func__, ifp, err);
4990 /* NOTREACHED */
4991 }
4992
4993 /* Sanity checks on the input thread storage */
4994 dl_inp = &dl_if->dl_if_inpstorage;
4995 bzero(&dl_inp->stats, sizeof (dl_inp->stats));
4996 VERIFY(dl_inp->input_waiting == 0);
4997 VERIFY(dl_inp->wtot == 0);
4998 VERIFY(dl_inp->ifp == NULL);
4999 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
5000 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
5001 VERIFY(!dl_inp->net_affinity);
5002 VERIFY(ifp->if_inp == NULL);
5003 VERIFY(dl_inp->input_thr == THREAD_NULL);
5004 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
5005 VERIFY(dl_inp->poll_thr == THREAD_NULL);
5006 VERIFY(dl_inp->tag == 0);
5007 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
5008 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
5009 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
5010 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
5011#if IFNET_INPUT_SANITY_CHK
5012 VERIFY(dl_inp->input_mbuf_cnt == 0);
5013#endif /* IFNET_INPUT_SANITY_CHK */
5014
5015 /*
5016 * A specific DLIL input thread is created per Ethernet/cellular
5017 * interface or for an interface which supports opportunistic
5018 * input polling. Pseudo interfaces or other types of interfaces
5019 * use the main input thread instead.
5020 */
5021 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
5022 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
5023 ifp->if_inp = dl_inp;
5024 err = dlil_create_input_thread(ifp, ifp->if_inp);
5025 if (err != 0) {
5026 panic_plain("%s: ifp=%p couldn't get an input thread; "
5027 "err=%d", __func__, ifp, err);
5028 /* NOTREACHED */
5029 }
5030 }
5031
6d2010ae 5032 /*
39236c6e
A
5033 * If the driver supports the new transmit model, calculate flow hash
5034 * and create a workloop starter thread to invoke the if_start callback
5035 * where the packets may be dequeued and transmitted.
6d2010ae 5036 */
316670eb 5037 if (ifp->if_eflags & IFEF_TXSTART) {
39236c6e
A
5038 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5039 VERIFY(ifp->if_flowhash != 0);
5040
316670eb
A
5041 VERIFY(ifp->if_start != NULL);
5042 VERIFY(ifp->if_start_thread == THREAD_NULL);
5043
5044 ifnet_set_start_cycle(ifp, NULL);
5045 ifp->if_start_active = 0;
5046 ifp->if_start_req = 0;
39236c6e 5047 ifp->if_start_flags = 0;
316670eb
A
5048 if ((err = kernel_thread_start(ifnet_start_thread_fn, ifp,
5049 &ifp->if_start_thread)) != KERN_SUCCESS) {
5050 panic_plain("%s: ifp=%p couldn't get a start thread; "
5051 "err=%d", __func__, ifp, err);
6d2010ae
A
5052 /* NOTREACHED */
5053 }
316670eb
A
5054 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
5055 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
39236c6e
A
5056 } else {
5057 ifp->if_flowhash = 0;
316670eb
A
5058 }
5059
5060 /*
5061 * If the driver supports the new receive model, create a poller
5062 * thread to invoke if_input_poll callback where the packets may
5063 * be dequeued from the driver and processed for reception.
5064 */
5065 if (ifp->if_eflags & IFEF_RXPOLL) {
5066 VERIFY(ifp->if_input_poll != NULL);
5067 VERIFY(ifp->if_input_ctl != NULL);
5068 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5069
5070 ifnet_set_poll_cycle(ifp, NULL);
5071 ifp->if_poll_update = 0;
5072 ifp->if_poll_active = 0;
5073 ifp->if_poll_req = 0;
5074 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
5075 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5076 panic_plain("%s: ifp=%p couldn't get a poll thread; "
6d2010ae
A
5077 "err=%d", __func__, ifp, err);
5078 /* NOTREACHED */
5079 }
316670eb
A
5080 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
5081 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
91447636 5082 }
6d2010ae 5083
316670eb
A
5084 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5085 VERIFY(ifp->if_desc.ifd_len == 0);
5086 VERIFY(ifp->if_desc.ifd_desc != NULL);
6d2010ae
A
5087
5088 /* Record attach PC stacktrace */
5089 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5090
5091 ifp->if_updatemcasts = 0;
5092 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5093 struct ifmultiaddr *ifma;
5094 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5095 IFMA_LOCK(ifma);
5096 if (ifma->ifma_addr->sa_family == AF_LINK ||
5097 ifma->ifma_addr->sa_family == AF_UNSPEC)
5098 ifp->if_updatemcasts++;
5099 IFMA_UNLOCK(ifma);
5100 }
5101
39236c6e
A
5102 printf("%s: attached with %d suspended link-layer multicast "
5103 "membership(s)\n", if_name(ifp),
6d2010ae
A
5104 ifp->if_updatemcasts);
5105 }
5106
39236c6e
A
5107 /* Clear logging parameters */
5108 bzero(&ifp->if_log, sizeof (ifp->if_log));
5109 ifp->if_fg_sendts = 0;
5110
5111 VERIFY(ifp->if_delegated.ifp == NULL);
5112 VERIFY(ifp->if_delegated.type == 0);
5113 VERIFY(ifp->if_delegated.family == 0);
5114 VERIFY(ifp->if_delegated.subfamily == 0);
5115
0c530ab8 5116 ifnet_lock_done(ifp);
b0d623f7 5117 ifnet_head_done();
6d2010ae
A
5118
5119 lck_mtx_lock(&ifp->if_cached_route_lock);
5120 /* Enable forwarding cached route */
5121 ifp->if_fwd_cacheok = 1;
5122 /* Clean up any existing cached routes */
39236c6e 5123 ROUTE_RELEASE(&ifp->if_fwd_route);
6d2010ae 5124 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
39236c6e 5125 ROUTE_RELEASE(&ifp->if_src_route);
6d2010ae 5126 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
39236c6e 5127 ROUTE_RELEASE(&ifp->if_src_route6);
6d2010ae
A
5128 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
5129 lck_mtx_unlock(&ifp->if_cached_route_lock);
5130
5131 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5132
b0d623f7 5133 /*
6d2010ae
A
5134 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5135 * and trees; do this before the ifnet is marked as attached.
5136 * The ifnet keeps the reference to the info structures even after
5137 * the ifnet is detached, since the network-layer records still
5138 * refer to the info structures even after that. This also
5139 * makes it possible for them to still function after the ifnet
5140 * is recycled or reattached.
b0d623f7 5141 */
6d2010ae
A
5142#if INET
5143 if (IGMP_IFINFO(ifp) == NULL) {
5144 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
5145 VERIFY(IGMP_IFINFO(ifp) != NULL);
5146 } else {
5147 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5148 igmp_domifreattach(IGMP_IFINFO(ifp));
5149 }
5150#endif /* INET */
5151#if INET6
5152 if (MLD_IFINFO(ifp) == NULL) {
5153 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
5154 VERIFY(MLD_IFINFO(ifp) != NULL);
5155 } else {
5156 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5157 mld_domifreattach(MLD_IFINFO(ifp));
5158 }
5159#endif /* INET6 */
b0d623f7 5160
39236c6e
A
5161 VERIFY(ifp->if_data_threshold == 0);
5162
6d2010ae
A
5163 /*
5164 * Finally, mark this ifnet as attached.
5165 */
5166 lck_mtx_lock(rnh_lock);
5167 ifnet_lock_exclusive(ifp);
316670eb
A
5168 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5169 ifp->if_lqm = (ifp == lo_ifp) ? IFNET_LQM_THRESH_GOOD :
5170 IFNET_LQM_THRESH_UNKNOWN;
6d2010ae
A
5171 lck_mtx_lock_spin(&ifp->if_ref_lock);
5172 ifp->if_refflags = IFRF_ATTACHED;
5173 lck_mtx_unlock(&ifp->if_ref_lock);
d1ecb069 5174 if (net_rtref) {
6d2010ae
A
5175 /* boot-args override; enable idle notification */
5176 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
d1ecb069 5177 IFRF_IDLE_NOTIFY);
6d2010ae
A
5178 } else {
5179 /* apply previous request(s) to set the idle flags, if any */
5180 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5181 ifp->if_idle_new_flags_mask);
5182
d1ecb069 5183 }
6d2010ae
A
5184 ifnet_lock_done(ifp);
5185 lck_mtx_unlock(rnh_lock);
7ddcb079 5186 dlil_if_unlock();
6d2010ae
A
5187
5188#if PF
5189 /*
5190 * Attach packet filter to this interface, if enabled.
5191 */
5192 pf_ifnet_hook(ifp, 1);
5193#endif /* PF */
d1ecb069 5194
2d21ac55 5195 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
1c79356b 5196
6d2010ae 5197 if (dlil_verbose) {
39236c6e 5198 printf("%s: attached%s\n", if_name(ifp),
6d2010ae
A
5199 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5200 }
5201
5202 return (0);
5203}
5204
5205/*
5206 * Prepare the storage for the first/permanent link address, which must
5207 * must have the same lifetime as the ifnet itself. Although the link
5208 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5209 * its location in memory must never change as it may still be referred
5210 * to by some parts of the system afterwards (unfortunate implementation
5211 * artifacts inherited from BSD.)
5212 *
5213 * Caller must hold ifnet lock as writer.
5214 */
5215static struct ifaddr *
5216dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
5217{
5218 struct ifaddr *ifa, *oifa;
5219 struct sockaddr_dl *asdl, *msdl;
5220 char workbuf[IFNAMSIZ*2];
5221 int namelen, masklen, socksize;
5222 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5223
5224 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
5225 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
5226
39236c6e
A
5227 namelen = snprintf(workbuf, sizeof (workbuf), "%s",
5228 if_name(ifp));
6d2010ae
A
5229 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
5230 socksize = masklen + ifp->if_addrlen;
5231#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
5232 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
5233 socksize = sizeof(struct sockaddr_dl);
5234 socksize = ROUNDUP(socksize);
5235#undef ROUNDUP
5236
5237 ifa = ifp->if_lladdr;
5238 if (socksize > DLIL_SDLMAXLEN ||
5239 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
5240 /*
5241 * Rare, but in the event that the link address requires
5242 * more storage space than DLIL_SDLMAXLEN, allocate the
5243 * largest possible storages for address and mask, such
5244 * that we can reuse the same space when if_addrlen grows.
5245 * This same space will be used when if_addrlen shrinks.
5246 */
5247 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
5248 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
5249 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
5250 if (ifa == NULL)
5251 return (NULL);
5252 ifa_lock_init(ifa);
5253 /* Don't set IFD_ALLOC, as this is permanent */
5254 ifa->ifa_debug = IFD_LINK;
5255 }
5256 IFA_LOCK(ifa);
5257 /* address and mask sockaddr_dl locations */
5258 asdl = (struct sockaddr_dl *)(ifa + 1);
5259 bzero(asdl, SOCK_MAXADDRLEN);
316670eb
A
5260 msdl = (struct sockaddr_dl *)(void *)
5261 ((char *)asdl + SOCK_MAXADDRLEN);
6d2010ae
A
5262 bzero(msdl, SOCK_MAXADDRLEN);
5263 } else {
5264 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
5265 /*
5266 * Use the storage areas for address and mask within the
5267 * dlil_ifnet structure. This is the most common case.
5268 */
5269 if (ifa == NULL) {
5270 ifa = &dl_if->dl_if_lladdr.ifa;
5271 ifa_lock_init(ifa);
5272 /* Don't set IFD_ALLOC, as this is permanent */
5273 ifa->ifa_debug = IFD_LINK;
5274 }
5275 IFA_LOCK(ifa);
5276 /* address and mask sockaddr_dl locations */
316670eb 5277 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
6d2010ae 5278 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
316670eb 5279 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
6d2010ae
A
5280 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
5281 }
5282
5283 /* hold a permanent reference for the ifnet itself */
5284 IFA_ADDREF_LOCKED(ifa);
5285 oifa = ifp->if_lladdr;
5286 ifp->if_lladdr = ifa;
5287
5288 VERIFY(ifa->ifa_debug == IFD_LINK);
5289 ifa->ifa_ifp = ifp;
5290 ifa->ifa_rtrequest = link_rtrequest;
5291 ifa->ifa_addr = (struct sockaddr *)asdl;
5292 asdl->sdl_len = socksize;
5293 asdl->sdl_family = AF_LINK;
5294 bcopy(workbuf, asdl->sdl_data, namelen);
5295 asdl->sdl_nlen = namelen;
5296 asdl->sdl_index = ifp->if_index;
5297 asdl->sdl_type = ifp->if_type;
5298 if (ll_addr != NULL) {
5299 asdl->sdl_alen = ll_addr->sdl_alen;
5300 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
5301 } else {
5302 asdl->sdl_alen = 0;
5303 }
5304 ifa->ifa_netmask = (struct sockaddr*)msdl;
5305 msdl->sdl_len = masklen;
5306 while (namelen != 0)
5307 msdl->sdl_data[--namelen] = 0xff;
5308 IFA_UNLOCK(ifa);
5309
5310 if (oifa != NULL)
5311 IFA_REMREF(oifa);
5312
5313 return (ifa);
5314}
5315
5316static void
5317if_purgeaddrs(struct ifnet *ifp)
5318{
5319#if INET
5320 in_purgeaddrs(ifp);
5321#endif /* INET */
5322#if INET6
5323 in6_purgeaddrs(ifp);
5324#endif /* INET6 */
1c79356b
A
5325}
5326
2d21ac55 5327errno_t
6d2010ae 5328ifnet_detach(ifnet_t ifp)
1c79356b 5329{
39236c6e
A
5330 struct ifnet *delegated_ifp;
5331
6d2010ae
A
5332 if (ifp == NULL)
5333 return (EINVAL);
5334
6d2010ae 5335 lck_mtx_lock(rnh_lock);
316670eb 5336 ifnet_head_lock_exclusive();
91447636 5337 ifnet_lock_exclusive(ifp);
6d2010ae
A
5338
5339 /*
5340 * Check to see if this interface has previously triggered
5341 * aggressive protocol draining; if so, decrement the global
5342 * refcnt and clear PR_AGGDRAIN on the route domain if
5343 * there are no more of such an interface around.
5344 */
5345 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
5346
5347 lck_mtx_lock_spin(&ifp->if_ref_lock);
5348 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
5349 lck_mtx_unlock(&ifp->if_ref_lock);
5350 ifnet_lock_done(ifp);
6d2010ae 5351 ifnet_head_done();
13f56ec4 5352 lck_mtx_unlock(rnh_lock);
6d2010ae
A
5353 return (EINVAL);
5354 } else if (ifp->if_refflags & IFRF_DETACHING) {
91447636 5355 /* Interface has already been detached */
6d2010ae 5356 lck_mtx_unlock(&ifp->if_ref_lock);
91447636 5357 ifnet_lock_done(ifp);
6d2010ae 5358 ifnet_head_done();
13f56ec4 5359 lck_mtx_unlock(rnh_lock);
6d2010ae 5360 return (ENXIO);
55e303ae 5361 }
6d2010ae
A
5362 /* Indicate this interface is being detached */
5363 ifp->if_refflags &= ~IFRF_ATTACHED;
5364 ifp->if_refflags |= IFRF_DETACHING;
5365 lck_mtx_unlock(&ifp->if_ref_lock);
5366
5367 if (dlil_verbose)
39236c6e 5368 printf("%s: detaching\n", if_name(ifp));
6d2010ae 5369
91447636 5370 /*
6d2010ae
A
5371 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5372 * no longer be visible during lookups from this point.
91447636 5373 */
6d2010ae
A
5374 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
5375 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
5376 ifp->if_link.tqe_next = NULL;
5377 ifp->if_link.tqe_prev = NULL;
5378 ifindex2ifnet[ifp->if_index] = NULL;
5379
5380 /* Record detach PC stacktrace */
5381 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
5382
39236c6e
A
5383 /* Clear logging parameters */
5384 bzero(&ifp->if_log, sizeof (ifp->if_log));
5385
5386 /* Clear delegated interface info (reference released below) */
5387 delegated_ifp = ifp->if_delegated.ifp;
5388 bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
5389
91447636 5390 ifnet_lock_done(ifp);
6d2010ae 5391 ifnet_head_done();
13f56ec4 5392 lck_mtx_unlock(rnh_lock);
6d2010ae 5393
39236c6e
A
5394 /* Release reference held on the delegated interface */
5395 if (delegated_ifp != NULL)
5396 ifnet_release(delegated_ifp);
5397
316670eb
A
5398 /* Reset Link Quality Metric (unless loopback [lo0]) */
5399 if (ifp != lo_ifp)
5400 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF);
5401
5402 /* Reset TCP local statistics */
5403 if (ifp->if_tcp_stat != NULL)
5404 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
5405
5406 /* Reset UDP local statistics */
5407 if (ifp->if_udp_stat != NULL)
5408 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
5409
2d21ac55
A
5410 /* Let BPF know we're detaching */
5411 bpfdetach(ifp);
6d2010ae
A
5412
5413 /* Mark the interface as DOWN */
5414 if_down(ifp);
5415
316670eb
A
5416 /* Drain send queue */
5417 ifclassq_teardown(ifp);
5418
6d2010ae
A
5419 /* Disable forwarding cached route */
5420 lck_mtx_lock(&ifp->if_cached_route_lock);
5421 ifp->if_fwd_cacheok = 0;
5422 lck_mtx_unlock(&ifp->if_cached_route_lock);
5423
39236c6e 5424 ifp->if_data_threshold = 0;
d1ecb069 5425 /*
6d2010ae
A
5426 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5427 * references to the info structures and leave them attached to
5428 * this ifnet.
d1ecb069 5429 */
6d2010ae
A
5430#if INET
5431 igmp_domifdetach(ifp);
5432#endif /* INET */
5433#if INET6
5434 mld_domifdetach(ifp);
5435#endif /* INET6 */
5436
5437 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
5438
5439 /* Let worker thread take care of the rest, to avoid reentrancy */
7ddcb079 5440 dlil_if_lock();
6d2010ae 5441 ifnet_detaching_enqueue(ifp);
7ddcb079 5442 dlil_if_unlock();
6d2010ae
A
5443
5444 return (0);
5445}
5446
5447static void
5448ifnet_detaching_enqueue(struct ifnet *ifp)
5449{
7ddcb079 5450 dlil_if_lock_assert();
6d2010ae
A
5451
5452 ++ifnet_detaching_cnt;
5453 VERIFY(ifnet_detaching_cnt != 0);
5454 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
5455 wakeup((caddr_t)&ifnet_delayed_run);
5456}
5457
5458static struct ifnet *
5459ifnet_detaching_dequeue(void)
5460{
5461 struct ifnet *ifp;
5462
7ddcb079 5463 dlil_if_lock_assert();
6d2010ae
A
5464
5465 ifp = TAILQ_FIRST(&ifnet_detaching_head);
5466 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
5467 if (ifp != NULL) {
5468 VERIFY(ifnet_detaching_cnt != 0);
5469 --ifnet_detaching_cnt;
5470 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
5471 ifp->if_detaching_link.tqe_next = NULL;
5472 ifp->if_detaching_link.tqe_prev = NULL;
5473 }
5474 return (ifp);
5475}
5476
316670eb
A
5477static int
5478ifnet_detacher_thread_cont(int err)
6d2010ae 5479{
316670eb 5480#pragma unused(err)
6d2010ae
A
5481 struct ifnet *ifp;
5482
5483 for (;;) {
316670eb 5484 dlil_if_lock_assert();
6d2010ae 5485 while (ifnet_detaching_cnt == 0) {
316670eb
A
5486 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
5487 (PZERO - 1), "ifnet_detacher_cont", 0,
5488 ifnet_detacher_thread_cont);
5489 /* NOTREACHED */
6d2010ae
A
5490 }
5491
5492 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
5493
5494 /* Take care of detaching ifnet */
5495 ifp = ifnet_detaching_dequeue();
316670eb
A
5496 if (ifp != NULL) {
5497 dlil_if_unlock();
6d2010ae 5498 ifnet_detach_final(ifp);
316670eb
A
5499 dlil_if_lock();
5500 }
55e303ae 5501 }
316670eb
A
5502 /* NOTREACHED */
5503 return (0);
5504}
5505
5506static void
5507ifnet_detacher_thread_func(void *v, wait_result_t w)
5508{
5509#pragma unused(v, w)
5510 dlil_if_lock();
5511 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
5512 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
5513 /*
5514 * msleep0() shouldn't have returned as PCATCH was not set;
5515 * therefore assert in this case.
5516 */
5517 dlil_if_unlock();
5518 VERIFY(0);
6d2010ae 5519}
b0d623f7 5520
6d2010ae
A
5521static void
5522ifnet_detach_final(struct ifnet *ifp)
5523{
5524 struct ifnet_filter *filter, *filter_next;
5525 struct ifnet_filter_head fhead;
316670eb 5526 struct dlil_threading_info *inp;
6d2010ae
A
5527 struct ifaddr *ifa;
5528 ifnet_detached_func if_free;
5529 int i;
5530
5531 lck_mtx_lock(&ifp->if_ref_lock);
5532 if (!(ifp->if_refflags & IFRF_DETACHING)) {
5533 panic("%s: flags mismatch (detaching not set) ifp=%p",
5534 __func__, ifp);
5535 /* NOTREACHED */
5536 }
5537
316670eb
A
5538 /*
5539 * Wait until the existing IO references get released
5540 * before we proceed with ifnet_detach. This is not a
5541 * common case, so block without using a continuation.
b0d623f7 5542 */
6d2010ae 5543 while (ifp->if_refio > 0) {
39236c6e
A
5544 printf("%s: Waiting for IO references on %s interface "
5545 "to be released\n", __func__, if_name(ifp));
6d2010ae
A
5546 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
5547 (PZERO - 1), "ifnet_ioref_wait", NULL);
5548 }
5549 lck_mtx_unlock(&ifp->if_ref_lock);
5550
5551 /* Detach interface filters */
5552 lck_mtx_lock(&ifp->if_flt_lock);
5553 if_flt_monitor_enter(ifp);
b0d623f7 5554
6d2010ae 5555 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636
A
5556 fhead = ifp->if_flt_head;
5557 TAILQ_INIT(&ifp->if_flt_head);
2d21ac55 5558
6d2010ae
A
5559 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
5560 filter_next = TAILQ_NEXT(filter, filt_next);
5561 lck_mtx_unlock(&ifp->if_flt_lock);
5562
5563 dlil_detach_filter_internal(filter, 1);
5564 lck_mtx_lock(&ifp->if_flt_lock);
5565 }
5566 if_flt_monitor_leave(ifp);
5567 lck_mtx_unlock(&ifp->if_flt_lock);
5568
5569 /* Tell upper layers to drop their network addresses */
5570 if_purgeaddrs(ifp);
5571
5572 ifnet_lock_exclusive(ifp);
5573
5574 /* Uplumb all protocols */
5575 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
5576 struct if_proto *proto;
5577
5578 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5579 while (proto != NULL) {
5580 protocol_family_t family = proto->protocol_family;
5581 ifnet_lock_done(ifp);
5582 proto_unplumb(family, ifp);
5583 ifnet_lock_exclusive(ifp);
5584 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5585 }
5586 /* There should not be any protocols left */
5587 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
5588 }
5589 zfree(dlif_phash_zone, ifp->if_proto_hash);
5590 ifp->if_proto_hash = NULL;
5591
5592 /* Detach (permanent) link address from if_addrhead */
5593 ifa = TAILQ_FIRST(&ifp->if_addrhead);
5594 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
5595 IFA_LOCK(ifa);
5596 if_detach_link_ifa(ifp, ifa);
5597 IFA_UNLOCK(ifa);
5598
5599 /* Remove (permanent) link address from ifnet_addrs[] */
5600 IFA_REMREF(ifa);
5601 ifnet_addrs[ifp->if_index - 1] = NULL;
5602
5603 /* This interface should not be on {ifnet_head,detaching} */
5604 VERIFY(ifp->if_link.tqe_next == NULL);
5605 VERIFY(ifp->if_link.tqe_prev == NULL);
5606 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5607 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5608
5609 /* Prefix list should be empty by now */
5610 VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
5611
5612 /* The slot should have been emptied */
5613 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5614
5615 /* There should not be any addresses left */
5616 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
1c79356b 5617
316670eb
A
5618 /*
5619 * Signal the starter thread to terminate itself.
5620 */
5621 if (ifp->if_start_thread != THREAD_NULL) {
5622 lck_mtx_lock_spin(&ifp->if_start_lock);
39236c6e 5623 ifp->if_start_flags = 0;
316670eb
A
5624 ifp->if_start_thread = THREAD_NULL;
5625 wakeup_one((caddr_t)&ifp->if_start_thread);
5626 lck_mtx_unlock(&ifp->if_start_lock);
5627 }
5628
5629 /*
5630 * Signal the poller thread to terminate itself.
5631 */
5632 if (ifp->if_poll_thread != THREAD_NULL) {
5633 lck_mtx_lock_spin(&ifp->if_poll_lock);
5634 ifp->if_poll_thread = THREAD_NULL;
5635 wakeup_one((caddr_t)&ifp->if_poll_thread);
5636 lck_mtx_unlock(&ifp->if_poll_lock);
5637 }
5638
2d21ac55
A
5639 /*
5640 * If thread affinity was set for the workloop thread, we will need
5641 * to tear down the affinity and release the extra reference count
316670eb
A
5642 * taken at attach time. Does not apply to lo0 or other interfaces
5643 * without dedicated input threads.
2d21ac55 5644 */
316670eb
A
5645 if ((inp = ifp->if_inp) != NULL) {
5646 VERIFY(inp != dlil_main_input_thread);
5647
5648 if (inp->net_affinity) {
5649 struct thread *tp, *wtp, *ptp;
5650
5651 lck_mtx_lock_spin(&inp->input_lck);
5652 wtp = inp->wloop_thr;
5653 inp->wloop_thr = THREAD_NULL;
5654 ptp = inp->poll_thr;
5655 inp->poll_thr = THREAD_NULL;
5656 tp = inp->input_thr; /* don't nullify now */
5657 inp->tag = 0;
5658 inp->net_affinity = FALSE;
5659 lck_mtx_unlock(&inp->input_lck);
5660
5661 /* Tear down poll thread affinity */
5662 if (ptp != NULL) {
5663 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
5664 (void) dlil_affinity_set(ptp,
5665 THREAD_AFFINITY_TAG_NULL);
5666 thread_deallocate(ptp);
6d2010ae 5667 }
2d21ac55 5668
2d21ac55 5669 /* Tear down workloop thread affinity */
316670eb
A
5670 if (wtp != NULL) {
5671 (void) dlil_affinity_set(wtp,
2d21ac55 5672 THREAD_AFFINITY_TAG_NULL);
316670eb 5673 thread_deallocate(wtp);
2d21ac55 5674 }
1c79356b 5675
316670eb 5676 /* Tear down DLIL input thread affinity */
2d21ac55
A
5677 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
5678 thread_deallocate(tp);
9bccf70c 5679 }
1c79356b 5680
316670eb
A
5681 /* disassociate ifp DLIL input thread */
5682 ifp->if_inp = NULL;
6d2010ae 5683
316670eb
A
5684 lck_mtx_lock_spin(&inp->input_lck);
5685 inp->input_waiting |= DLIL_INPUT_TERMINATE;
5686 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
5687 wakeup_one((caddr_t)&inp->input_waiting);
91447636 5688 }
316670eb 5689 lck_mtx_unlock(&inp->input_lck);
55e303ae 5690 }
6d2010ae
A
5691
5692 /* The driver might unload, so point these to ourselves */
5693 if_free = ifp->if_free;
5694 ifp->if_output = ifp_if_output;
316670eb
A
5695 ifp->if_pre_enqueue = ifp_if_output;
5696 ifp->if_start = ifp_if_start;
5697 ifp->if_output_ctl = ifp_if_ctl;
5698 ifp->if_input_poll = ifp_if_input_poll;
5699 ifp->if_input_ctl = ifp_if_ctl;
6d2010ae
A
5700 ifp->if_ioctl = ifp_if_ioctl;
5701 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
5702 ifp->if_free = ifp_if_free;
5703 ifp->if_demux = ifp_if_demux;
5704 ifp->if_event = ifp_if_event;
39236c6e
A
5705 ifp->if_framer_legacy = ifp_if_framer;
5706 ifp->if_framer = ifp_if_framer_extended;
6d2010ae
A
5707 ifp->if_add_proto = ifp_if_add_proto;
5708 ifp->if_del_proto = ifp_if_del_proto;
5709 ifp->if_check_multi = ifp_if_check_multi;
5710
316670eb
A
5711 /* wipe out interface description */
5712 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5713 ifp->if_desc.ifd_len = 0;
5714 VERIFY(ifp->if_desc.ifd_desc != NULL);
5715 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
5716
39236c6e
A
5717 /* there shouldn't be any delegation by now */
5718 VERIFY(ifp->if_delegated.ifp == NULL);
5719 VERIFY(ifp->if_delegated.type == 0);
5720 VERIFY(ifp->if_delegated.family == 0);
5721 VERIFY(ifp->if_delegated.subfamily == 0);
5722
6d2010ae
A
5723 ifnet_lock_done(ifp);
5724
5725#if PF
5726 /*
5727 * Detach this interface from packet filter, if enabled.
5728 */
5729 pf_ifnet_hook(ifp, 0);
5730#endif /* PF */
5731
5732 /* Filter list should be empty */
5733 lck_mtx_lock_spin(&ifp->if_flt_lock);
5734 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
5735 VERIFY(ifp->if_flt_busy == 0);
5736 VERIFY(ifp->if_flt_waiters == 0);
5737 lck_mtx_unlock(&ifp->if_flt_lock);
5738
316670eb
A
5739 /* Last chance to drain send queue */
5740 if_qflush(ifp, 0);
5741
6d2010ae
A
5742 /* Last chance to cleanup any cached route */
5743 lck_mtx_lock(&ifp->if_cached_route_lock);
5744 VERIFY(!ifp->if_fwd_cacheok);
39236c6e 5745 ROUTE_RELEASE(&ifp->if_fwd_route);
6d2010ae 5746 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
39236c6e 5747 ROUTE_RELEASE(&ifp->if_src_route);
6d2010ae 5748 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
39236c6e 5749 ROUTE_RELEASE(&ifp->if_src_route6);
6d2010ae
A
5750 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
5751 lck_mtx_unlock(&ifp->if_cached_route_lock);
5752
39236c6e
A
5753 VERIFY(ifp->if_data_threshold == 0);
5754
6d2010ae
A
5755 ifnet_llreach_ifdetach(ifp);
5756
5757 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
5758
5759 if (if_free != NULL)
5760 if_free(ifp);
5761
5762 /*
5763 * Finally, mark this ifnet as detached.
5764 */
5765 lck_mtx_lock_spin(&ifp->if_ref_lock);
5766 if (!(ifp->if_refflags & IFRF_DETACHING)) {
5767 panic("%s: flags mismatch (detaching not set) ifp=%p",
5768 __func__, ifp);
5769 /* NOTREACHED */
55e303ae 5770 }
6d2010ae
A
5771 ifp->if_refflags &= ~IFRF_DETACHING;
5772 lck_mtx_unlock(&ifp->if_ref_lock);
5773
5774 if (dlil_verbose)
39236c6e 5775 printf("%s: detached\n", if_name(ifp));
6d2010ae
A
5776
5777 /* Release reference held during ifnet attach */
5778 ifnet_release(ifp);
1c79356b 5779}
9bccf70c 5780
91447636 5781static errno_t
6d2010ae 5782ifp_if_output(struct ifnet *ifp, struct mbuf *m)
9bccf70c 5783{
6d2010ae
A
5784#pragma unused(ifp)
5785 m_freem(m);
5786 return (0);
9bccf70c
A
5787}
5788
316670eb
A
5789static void
5790ifp_if_start(struct ifnet *ifp)
5791{
5792 ifnet_purge(ifp);
5793}
5794
5795static void
5796ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
5797 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
5798{
5799#pragma unused(ifp, flags, max_cnt)
5800 if (m_head != NULL)
5801 *m_head = NULL;
5802 if (m_tail != NULL)
5803 *m_tail = NULL;
5804 if (cnt != NULL)
5805 *cnt = 0;
5806 if (len != NULL)
5807 *len = 0;
5808}
5809
5810static errno_t
5811ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
5812{
5813#pragma unused(ifp, cmd, arglen, arg)
5814 return (EOPNOTSUPP);
5815}
5816
6d2010ae
A
5817static errno_t
5818ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
9bccf70c 5819{
6d2010ae
A
5820#pragma unused(ifp, fh, pf)
5821 m_freem(m);
5822 return (EJUSTRETURN);
9bccf70c
A
5823}
5824
6d2010ae
A
5825static errno_t
5826ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
5827 const struct ifnet_demux_desc *da, u_int32_t dc)
9bccf70c 5828{
6d2010ae
A
5829#pragma unused(ifp, pf, da, dc)
5830 return (EINVAL);
9bccf70c
A
5831}
5832
91447636 5833static errno_t
6d2010ae 5834ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
9bccf70c 5835{
6d2010ae
A
5836#pragma unused(ifp, pf)
5837 return (EINVAL);
5838}
5839
5840static errno_t
5841ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
5842{
5843#pragma unused(ifp, sa)
5844 return (EOPNOTSUPP);
5845}
5846
39236c6e
A
5847static errno_t
5848ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
5849 const struct sockaddr *sa, const char *ll, const char *t)
6d2010ae
A
5850{
5851#pragma unused(ifp, m, sa, ll, t)
39236c6e
A
5852 return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
5853}
5854
5855static errno_t
5856ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
5857 const struct sockaddr *sa, const char *ll, const char *t,
5858 u_int32_t *pre, u_int32_t *post)
5859{
5860#pragma unused(ifp, sa, ll, t)
6d2010ae
A
5861 m_freem(*m);
5862 *m = NULL;
39236c6e
A
5863
5864 if (pre != NULL)
5865 *pre = 0;
5866 if (post != NULL)
5867 *post = 0;
5868
6d2010ae
A
5869 return (EJUSTRETURN);
5870}
5871
316670eb 5872errno_t
6d2010ae
A
5873ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
5874{
5875#pragma unused(ifp, cmd, arg)
5876 return (EOPNOTSUPP);
5877}
5878
5879static errno_t
5880ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
5881{
5882#pragma unused(ifp, tm, f)
5883 /* XXX not sure what to do here */
5884 return (0);
5885}
5886
5887static void
5888ifp_if_free(struct ifnet *ifp)
5889{
5890#pragma unused(ifp)
5891}
5892
5893static void
5894ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
5895{
5896#pragma unused(ifp, e)
9bccf70c
A
5897}
5898
2d21ac55 5899__private_extern__
6d2010ae
A
5900int dlil_if_acquire(u_int32_t family, const void *uniqueid,
5901 size_t uniqueid_len, struct ifnet **ifp)
5902{
5903 struct ifnet *ifp1 = NULL;
5904 struct dlil_ifnet *dlifp1 = NULL;
5905 void *buf, *base, **pbuf;
5906 int ret = 0;
5907
7ddcb079 5908 dlil_if_lock();
6d2010ae
A
5909 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
5910 ifp1 = (struct ifnet *)dlifp1;
5911
5912 if (ifp1->if_family != family)
5913 continue;
5914
5915 lck_mtx_lock(&dlifp1->dl_if_lock);
5916 /* same uniqueid and same len or no unique id specified */
5917 if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) &&
5918 !bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len)) {
5919 /* check for matching interface in use */
5920 if (dlifp1->dl_if_flags & DLIF_INUSE) {
5921 if (uniqueid_len) {
5922 ret = EBUSY;
5923 lck_mtx_unlock(&dlifp1->dl_if_lock);
9bccf70c 5924 goto end;
6d2010ae
A
5925 }
5926 } else {
5927 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
5928 lck_mtx_unlock(&dlifp1->dl_if_lock);
5929 *ifp = ifp1;
5930 goto end;
5931 }
5932 }
5933 lck_mtx_unlock(&dlifp1->dl_if_lock);
5934 }
5935
5936 /* no interface found, allocate a new one */
5937 buf = zalloc(dlif_zone);
5938 if (buf == NULL) {
5939 ret = ENOMEM;
5940 goto end;
5941 }
5942 bzero(buf, dlif_bufsize);
5943
5944 /* Get the 64-bit aligned base address for this object */
5945 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
5946 sizeof (u_int64_t));
5947 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
5948
5949 /*
5950 * Wind back a pointer size from the aligned base and
5951 * save the original address so we can free it later.
5952 */
5953 pbuf = (void **)((intptr_t)base - sizeof (void *));
5954 *pbuf = buf;
5955 dlifp1 = base;
5956
5957 if (uniqueid_len) {
5958 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
5959 M_NKE, M_WAITOK);
5960 if (dlifp1->dl_if_uniqueid == NULL) {
5961 zfree(dlif_zone, dlifp1);
5962 ret = ENOMEM;
5963 goto end;
5964 }
5965 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
5966 dlifp1->dl_if_uniqueid_len = uniqueid_len;
5967 }
5968
5969 ifp1 = (struct ifnet *)dlifp1;
5970 dlifp1->dl_if_flags = DLIF_INUSE;
5971 if (ifnet_debug) {
5972 dlifp1->dl_if_flags |= DLIF_DEBUG;
5973 dlifp1->dl_if_trace = dlil_if_trace;
5974 }
5975 ifp1->if_name = dlifp1->dl_if_namestorage;
39236c6e 5976 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
316670eb
A
5977
5978 /* initialize interface description */
5979 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
5980 ifp1->if_desc.ifd_len = 0;
5981 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
5982
2d21ac55 5983#if CONFIG_MACF_NET
6d2010ae 5984 mac_ifnet_label_init(ifp1);
2d21ac55 5985#endif
9bccf70c 5986
316670eb
A
5987 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
5988 DLIL_PRINTF("%s: failed to allocate if local stats, "
5989 "error: %d\n", __func__, ret);
5990 /* This probably shouldn't be fatal */
5991 ret = 0;
5992 }
5993
6d2010ae
A
5994 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
5995 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
5996 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
5997 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6d2010ae
A
5998 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
5999 ifnet_lock_attr);
6000 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
39236c6e
A
6001#if INET6
6002 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group, ifnet_lock_attr);
6003 ifp1->if_inet6data = NULL;
6004#endif
6d2010ae 6005
316670eb
A
6006 /* for send data paths */
6007 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
6008 ifnet_lock_attr);
6009 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
6010 ifnet_lock_attr);
6011 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
6012 ifnet_lock_attr);
6013
6014 /* for receive data paths */
6015 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
6016 ifnet_lock_attr);
6017
6d2010ae
A
6018 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
6019
6020 *ifp = ifp1;
9bccf70c
A
6021
6022end:
7ddcb079 6023 dlil_if_unlock();
9bccf70c 6024
6d2010ae
A
6025 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
6026 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
6027
6028 return (ret);
9bccf70c
A
6029}
6030
2d21ac55 6031__private_extern__ void
6d2010ae
A
6032dlil_if_release(ifnet_t ifp)
6033{
6034 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
6035
6036 ifnet_lock_exclusive(ifp);
6037 lck_mtx_lock(&dlifp->dl_if_lock);
6038 dlifp->dl_if_flags &= ~DLIF_INUSE;
6039 strncpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6040 ifp->if_name = dlifp->dl_if_namestorage;
39236c6e
A
6041 /* Reset external name (name + unit) */
6042 ifp->if_xname = dlifp->dl_if_xnamestorage;
6043 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
6044 "%s?", ifp->if_name);
6d2010ae 6045 lck_mtx_unlock(&dlifp->dl_if_lock);
2d21ac55 6046#if CONFIG_MACF_NET
6d2010ae
A
6047 /*
6048 * We can either recycle the MAC label here or in dlil_if_acquire().
6049 * It seems logical to do it here but this means that anything that
6050 * still has a handle on ifp will now see it as unlabeled.
6051 * Since the interface is "dead" that may be OK. Revisit later.
6052 */
6053 mac_ifnet_label_recycle(ifp);
2d21ac55 6054#endif
6d2010ae 6055 ifnet_lock_done(ifp);
9bccf70c 6056}
4a3eedf9 6057
7ddcb079
A
6058__private_extern__ void
6059dlil_if_lock(void)
6060{
6061 lck_mtx_lock(&dlil_ifnet_lock);
6062}
6063
6064__private_extern__ void
6065dlil_if_unlock(void)
6066{
6067 lck_mtx_unlock(&dlil_ifnet_lock);
6068}
6069
6070__private_extern__ void
6071dlil_if_lock_assert(void)
6072{
6073 lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
6074}
6075
4a3eedf9
A
6076__private_extern__ void
6077dlil_proto_unplumb_all(struct ifnet *ifp)
6078{
6079 /*
39236c6e
A
6080 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6081 * each bucket contains exactly one entry; PF_VLAN does not need an
6082 * explicit unplumb.
4a3eedf9 6083 *
39236c6e 6084 * if_proto_hash[3] is for other protocols; we expect anything
4a3eedf9
A
6085 * in this bucket to respond to the DETACHING event (which would
6086 * have happened by now) and do the unplumb then.
6087 */
6088 (void) proto_unplumb(PF_INET, ifp);
6089#if INET6
6090 (void) proto_unplumb(PF_INET6, ifp);
6091#endif /* INET6 */
4a3eedf9 6092}
6d2010ae
A
6093
6094static void
6095ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6096{
6097 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6098 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6099
6100 route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
6101
6102 lck_mtx_unlock(&ifp->if_cached_route_lock);
6103}
6104
6105static void
6106ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6107{
6108 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6109 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6110
6111 if (ifp->if_fwd_cacheok) {
6112 route_copyin(src, &ifp->if_src_route, sizeof (*src));
6113 } else {
39236c6e 6114 ROUTE_RELEASE(src);
6d2010ae
A
6115 }
6116 lck_mtx_unlock(&ifp->if_cached_route_lock);
6117}
6118
6119#if INET6
6120static void
6121ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6122{
6123 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6124 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6125
6126 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6127 sizeof (*dst));
6128
6129 lck_mtx_unlock(&ifp->if_cached_route_lock);
6130}
6131
6132static void
6133ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6134{
6135 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6136 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6137
6138 if (ifp->if_fwd_cacheok) {
6139 route_copyin((struct route *)src,
6140 (struct route *)&ifp->if_src_route6, sizeof (*src));
6141 } else {
39236c6e 6142 ROUTE_RELEASE(src);
6d2010ae
A
6143 }
6144 lck_mtx_unlock(&ifp->if_cached_route_lock);
6145}
6146#endif /* INET6 */
6147
6148struct rtentry *
6149ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6150{
6151 struct route src_rt;
316670eb
A
6152 struct sockaddr_in *dst;
6153
6154 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
6d2010ae
A
6155
6156 ifp_src_route_copyout(ifp, &src_rt);
6157
39236c6e
A
6158 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6159 ROUTE_RELEASE(&src_rt);
6160 if (dst->sin_family != AF_INET) {
6d2010ae
A
6161 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
6162 dst->sin_len = sizeof (src_rt.ro_dst);
6163 dst->sin_family = AF_INET;
6164 }
6165 dst->sin_addr = src_ip;
6166
6167 if (src_rt.ro_rt == NULL) {
6168 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
6169 0, 0, ifp->if_index);
6170
6171 if (src_rt.ro_rt != NULL) {
6172 /* retain a ref, copyin consumes one */
6173 struct rtentry *rte = src_rt.ro_rt;
6174 RT_ADDREF(rte);
6175 ifp_src_route_copyin(ifp, &src_rt);
6176 src_rt.ro_rt = rte;
6177 }
6178 }
6179 }
6180
6181 return (src_rt.ro_rt);
6182}
6183
6184#if INET6
6185struct rtentry*
6186ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
6187{
6188 struct route_in6 src_rt;
6189
6190 ifp_src_route6_copyout(ifp, &src_rt);
6191
39236c6e
A
6192 if (ROUTE_UNUSABLE(&src_rt) ||
6193 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
6194 ROUTE_RELEASE(&src_rt);
6195 if (src_rt.ro_dst.sin6_family != AF_INET6) {
6d2010ae
A
6196 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
6197 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
6198 src_rt.ro_dst.sin6_family = AF_INET6;
6199 }
6200 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
316670eb
A
6201 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
6202 sizeof (src_rt.ro_dst.sin6_addr));
6d2010ae
A
6203
6204 if (src_rt.ro_rt == NULL) {
6205 src_rt.ro_rt = rtalloc1_scoped(
6206 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
6207 ifp->if_index);
6208
6209 if (src_rt.ro_rt != NULL) {
6210 /* retain a ref, copyin consumes one */
6211 struct rtentry *rte = src_rt.ro_rt;
6212 RT_ADDREF(rte);
6213 ifp_src_route6_copyin(ifp, &src_rt);
6214 src_rt.ro_rt = rte;
6215 }
6216 }
6217 }
6218
6219 return (src_rt.ro_rt);
6220}
6221#endif /* INET6 */
316670eb
A
6222
6223void
6224if_lqm_update(struct ifnet *ifp, int lqm)
6225{
6226 struct kev_dl_link_quality_metric_data ev_lqm_data;
6227
6228 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
6229
6230 /* Normalize to edge */
6231 if (lqm > IFNET_LQM_THRESH_UNKNOWN && lqm <= IFNET_LQM_THRESH_POOR)
6232 lqm = IFNET_LQM_THRESH_POOR;
6233 else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD)
6234 lqm = IFNET_LQM_THRESH_GOOD;
6235
6236 ifnet_lock_exclusive(ifp);
6237 if (lqm == ifp->if_lqm) {
6238 ifnet_lock_done(ifp);
6239 return; /* nothing to update */
6240 }
6241 ifp->if_lqm = lqm;
6242 ifnet_lock_done(ifp);
6243
6244 bzero(&ev_lqm_data, sizeof (ev_lqm_data));
6245 ev_lqm_data.link_quality_metric = lqm;
6246
6247 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
6248 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
6249}
6250
6251/* for uuid.c */
6252int
6253uuid_get_ethernet(u_int8_t *node)
6254{
6255 struct ifnet *ifp;
6256 struct sockaddr_dl *sdl;
6257
6258 ifnet_head_lock_shared();
6259 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6260 ifnet_lock_shared(ifp);
6261 IFA_LOCK_SPIN(ifp->if_lladdr);
6262 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
6263 if (sdl->sdl_type == IFT_ETHER) {
6264 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
6265 IFA_UNLOCK(ifp->if_lladdr);
6266 ifnet_lock_done(ifp);
6267 ifnet_head_done();
6268 return (0);
6269 }
6270 IFA_UNLOCK(ifp->if_lladdr);
6271 ifnet_lock_done(ifp);
6272 }
6273 ifnet_head_done();
6274
6275 return (-1);
6276}
6277
6278static int
6279sysctl_rxpoll SYSCTL_HANDLER_ARGS
6280{
6281#pragma unused(arg1, arg2)
39236c6e
A
6282 uint32_t i;
6283 int err;
316670eb
A
6284
6285 i = if_rxpoll;
6286
6287 err = sysctl_handle_int(oidp, &i, 0, req);
6288 if (err != 0 || req->newptr == USER_ADDR_NULL)
6289 return (err);
6290
6291 if (net_rxpoll == 0)
6292 return (ENXIO);
6293
6294 if_rxpoll = i;
6295 return (err);
6296}
6297
6298static int
39236c6e 6299sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
6300{
6301#pragma unused(arg1, arg2)
39236c6e
A
6302 uint64_t q;
6303 int err;
316670eb 6304
39236c6e 6305 q = if_rxpoll_mode_holdtime;
316670eb 6306
39236c6e 6307 err = sysctl_handle_quad(oidp, &q, 0, req);
316670eb
A
6308 if (err != 0 || req->newptr == USER_ADDR_NULL)
6309 return (err);
6310
39236c6e
A
6311 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN)
6312 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
6313
6314 if_rxpoll_mode_holdtime = q;
316670eb 6315
316670eb
A
6316 return (err);
6317}
6318
6319static int
39236c6e 6320sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
316670eb
A
6321{
6322#pragma unused(arg1, arg2)
39236c6e
A
6323 uint64_t q;
6324 int err;
316670eb 6325
39236c6e 6326 q = if_rxpoll_sample_holdtime;
316670eb 6327
39236c6e 6328 err = sysctl_handle_quad(oidp, &q, 0, req);
316670eb
A
6329 if (err != 0 || req->newptr == USER_ADDR_NULL)
6330 return (err);
6331
39236c6e
A
6332 if (q < IF_RXPOLL_SAMPLETIME_MIN)
6333 q = IF_RXPOLL_SAMPLETIME_MIN;
6334
6335 if_rxpoll_sample_holdtime = q;
316670eb 6336
316670eb
A
6337 return (err);
6338}
6339
39236c6e
A
6340static int
6341sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
316670eb 6342{
39236c6e
A
6343#pragma unused(arg1, arg2)
6344 uint64_t q;
6345 int err;
316670eb 6346
39236c6e 6347 q = if_rxpoll_interval_time;
316670eb 6348
39236c6e
A
6349 err = sysctl_handle_quad(oidp, &q, 0, req);
6350 if (err != 0 || req->newptr == USER_ADDR_NULL)
6351 return (err);
6352
6353 if (q < IF_RXPOLL_INTERVALTIME_MIN)
6354 q = IF_RXPOLL_INTERVALTIME_MIN;
316670eb 6355
39236c6e 6356 if_rxpoll_interval_time = q;
316670eb 6357
39236c6e 6358 return (err);
316670eb
A
6359}
6360
39236c6e
A
6361static int
6362sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
316670eb 6363{
39236c6e
A
6364#pragma unused(arg1, arg2)
6365 uint32_t i;
6366 int err;
316670eb 6367
39236c6e 6368 i = if_rxpoll_wlowat;
316670eb 6369
39236c6e
A
6370 err = sysctl_handle_int(oidp, &i, 0, req);
6371 if (err != 0 || req->newptr == USER_ADDR_NULL)
6372 return (err);
316670eb 6373
39236c6e
A
6374 if (i == 0 || i >= if_rxpoll_whiwat)
6375 return (EINVAL);
6376
6377 if_rxpoll_wlowat = i;
6378 return (err);
316670eb
A
6379}
6380
39236c6e
A
6381static int
6382sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
316670eb 6383{
39236c6e
A
6384#pragma unused(arg1, arg2)
6385 uint32_t i;
6386 int err;
316670eb 6387
39236c6e 6388 i = if_rxpoll_whiwat;
316670eb 6389
39236c6e
A
6390 err = sysctl_handle_int(oidp, &i, 0, req);
6391 if (err != 0 || req->newptr == USER_ADDR_NULL)
6392 return (err);
316670eb 6393
39236c6e
A
6394 if (i <= if_rxpoll_wlowat)
6395 return (EINVAL);
6396
6397 if_rxpoll_whiwat = i;
6398 return (err);
316670eb
A
6399}
6400
6401static int
39236c6e 6402sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
316670eb 6403{
39236c6e
A
6404#pragma unused(arg1, arg2)
6405 int i, err;
316670eb 6406
39236c6e 6407 i = if_sndq_maxlen;
316670eb 6408
39236c6e
A
6409 err = sysctl_handle_int(oidp, &i, 0, req);
6410 if (err != 0 || req->newptr == USER_ADDR_NULL)
6411 return (err);
316670eb 6412
39236c6e
A
6413 if (i < IF_SNDQ_MINLEN)
6414 i = IF_SNDQ_MINLEN;
316670eb 6415
39236c6e
A
6416 if_sndq_maxlen = i;
6417 return (err);
316670eb
A
6418}
6419
39236c6e
A
6420static int
6421sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
316670eb 6422{
39236c6e
A
6423#pragma unused(arg1, arg2)
6424 int i, err;
6425
6426 i = if_rcvq_maxlen;
6427
6428 err = sysctl_handle_int(oidp, &i, 0, req);
6429 if (err != 0 || req->newptr == USER_ADDR_NULL)
6430 return (err);
6431
6432 if (i < IF_RCVQ_MINLEN)
6433 i = IF_RCVQ_MINLEN;
6434
6435 if_rcvq_maxlen = i;
6436 return (err);
316670eb
A
6437}
6438
6439void
6440dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
6441 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6442{
6443 struct kev_dl_node_presence kev;
6444 struct sockaddr_dl *sdl;
6445 struct sockaddr_in6 *sin6;
6446
6447 VERIFY(ifp);
6448 VERIFY(sa);
6449 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6450
6451 bzero(&kev, sizeof (kev));
6452 sin6 = &kev.sin6_node_address;
6453 sdl = &kev.sdl_node_address;
6454 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
6455 kev.rssi = rssi;
6456 kev.link_quality_metric = lqm;
6457 kev.node_proximity_metric = npm;
6458 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
6459
6460 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
6461 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6462 &kev.link_data, sizeof (kev));
6463}
6464
6465void
6466dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
6467{
6468 struct kev_dl_node_absence kev;
6469 struct sockaddr_in6 *sin6;
6470 struct sockaddr_dl *sdl;
6471
6472 VERIFY(ifp);
6473 VERIFY(sa);
6474 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6475
6476 bzero(&kev, sizeof (kev));
6477 sin6 = &kev.sin6_node_address;
6478 sdl = &kev.sdl_node_address;
6479 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
6480
6481 nd6_alt_node_absent(ifp, sin6);
6482 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
6483 &kev.link_data, sizeof (kev));
6484}
6485
39236c6e
A
6486const void *
6487dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
6488 kauth_cred_t *credp)
6489{
6490 const u_int8_t *bytes;
6491 size_t size;
6492
6493 bytes = CONST_LLADDR(sdl);
6494 size = sdl->sdl_alen;
6495
6496#if CONFIG_MACF
6497 if (dlil_lladdr_ckreq) {
6498 switch (sdl->sdl_type) {
6499 case IFT_ETHER:
6500 case IFT_BRIDGE:
6501 case IFT_IEEE1394:
6502 case IFT_IEEE8023ADLAG:
6503 case IFT_L2VLAN:
6504 break;
6505 default:
6506 credp = NULL;
6507 break;
6508 };
6509
6510 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
6511 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
6512 [0] = 2
6513 };
6514
6515 switch (sdl->sdl_type) {
6516 case IFT_ETHER:
6517 case IFT_BRIDGE:
6518 case IFT_IEEE8023ADLAG:
6519 case IFT_L2VLAN:
6520 VERIFY(size == ETHER_ADDR_LEN);
6521 bytes = unspec;
6522 break;
6523 case IFT_IEEE1394:
6524 VERIFY(size == FIREWIRE_EUI64_LEN);
6525 bytes = unspec;
6526 break;
6527 default:
6528 VERIFY(FALSE);
6529 break;
6530 };
6531 }
6532 }
6533#else
6534#pragma unused(credp)
6535#endif
6536
6537 if (sizep != NULL) *sizep = size;
6538 return (bytes);
6539}
6540
6541void
6542dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
6543 u_int8_t info[DLIL_MODARGLEN])
6544{
6545 struct kev_dl_issues kev;
6546 struct timeval tv;
6547
6548 VERIFY(ifp != NULL);
6549 VERIFY(modid != NULL);
6550 _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
6551 _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
6552
6553 bzero(&kev, sizeof (&kev));
6554
6555 microtime(&tv);
6556 kev.timestamp = tv.tv_sec;
6557 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
6558 if (info != NULL)
6559 bcopy(info, &kev.info, DLIL_MODARGLEN);
6560
6561 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
6562 &kev.link_data, sizeof (kev));
6563}
6564
316670eb
A
6565errno_t
6566ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
6567 struct proc *p)
6568{
6569 u_int32_t level = IFNET_THROTTLE_OFF;
6570 errno_t result = 0;
6571
6572 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
6573
6574 if (cmd == SIOCSIFOPPORTUNISTIC) {
6575 /*
6576 * XXX: Use priv_check_cred() instead of root check?
6577 */
6578 if ((result = proc_suser(p)) != 0)
6579 return (result);
6580
6581 if (ifr->ifr_opportunistic.ifo_flags ==
6582 IFRIFOF_BLOCK_OPPORTUNISTIC)
6583 level = IFNET_THROTTLE_OPPORTUNISTIC;
6584 else if (ifr->ifr_opportunistic.ifo_flags == 0)
6585 level = IFNET_THROTTLE_OFF;
6586 else
6587 result = EINVAL;
6588
6589 if (result == 0)
6590 result = ifnet_set_throttle(ifp, level);
6591 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
6592 ifr->ifr_opportunistic.ifo_flags = 0;
6593 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
6594 ifr->ifr_opportunistic.ifo_flags |=
6595 IFRIFOF_BLOCK_OPPORTUNISTIC;
6596 }
6597 }
6598
6599 /*
6600 * Return the count of current opportunistic connections
6601 * over the interface.
6602 */
6603 if (result == 0) {
6604 uint32_t flags = 0;
6605 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
6606 INPCB_OPPORTUNISTIC_SETCMD : 0;
6607 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
6608 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
6609 ifr->ifr_opportunistic.ifo_inuse =
6610 udp_count_opportunistic(ifp->if_index, flags) +
6611 tcp_count_opportunistic(ifp->if_index, flags);
6612 }
6613
6614 if (result == EALREADY)
6615 result = 0;
6616
6617 return (result);
6618}
6619
6620int
6621ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
6622{
6623 struct ifclassq *ifq;
6624 int err = 0;
6625
6626 if (!(ifp->if_eflags & IFEF_TXSTART))
6627 return (ENXIO);
6628
6629 *level = IFNET_THROTTLE_OFF;
6630
6631 ifq = &ifp->if_snd;
6632 IFCQ_LOCK(ifq);
6633 /* Throttling works only for IFCQ, not ALTQ instances */
6634 if (IFCQ_IS_ENABLED(ifq))
6635 IFCQ_GET_THROTTLE(ifq, *level, err);
6636 IFCQ_UNLOCK(ifq);
6637
6638 return (err);
6639}
6640
6641int
6642ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
6643{
6644 struct ifclassq *ifq;
6645 int err = 0;
6646
6647 if (!(ifp->if_eflags & IFEF_TXSTART))
6648 return (ENXIO);
6649
39236c6e
A
6650 ifq = &ifp->if_snd;
6651
316670eb
A
6652 switch (level) {
6653 case IFNET_THROTTLE_OFF:
6654 case IFNET_THROTTLE_OPPORTUNISTIC:
6655#if PF_ALTQ
6656 /* Throttling works only for IFCQ, not ALTQ instances */
6657 if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
6658 return (ENXIO);
6659#endif /* PF_ALTQ */
6660 break;
6661 default:
6662 return (EINVAL);
6663 }
6664
316670eb
A
6665 IFCQ_LOCK(ifq);
6666 if (IFCQ_IS_ENABLED(ifq))
6667 IFCQ_SET_THROTTLE(ifq, level, err);
6668 IFCQ_UNLOCK(ifq);
6669
6670 if (err == 0) {
39236c6e
A
6671 printf("%s: throttling level set to %d\n", if_name(ifp),
6672 level);
316670eb
A
6673 if (level == IFNET_THROTTLE_OFF)
6674 ifnet_start(ifp);
6675 }
6676
6677 return (err);
6678}
39236c6e
A
6679
6680errno_t
6681ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
6682 struct proc *p)
6683{
6684#pragma unused(p)
6685 errno_t result = 0;
6686 uint32_t flags;
6687 int level, category, subcategory;
6688
6689 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
6690
6691 if (cmd == SIOCSIFLOG) {
6692 if ((result = priv_check_cred(kauth_cred_get(),
6693 PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
6694 return (result);
6695
6696 level = ifr->ifr_log.ifl_level;
6697 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX)
6698 result = EINVAL;
6699
6700 flags = ifr->ifr_log.ifl_flags;
6701 if ((flags &= IFNET_LOGF_MASK) == 0)
6702 result = EINVAL;
6703
6704 category = ifr->ifr_log.ifl_category;
6705 subcategory = ifr->ifr_log.ifl_subcategory;
6706
6707 if (result == 0)
6708 result = ifnet_set_log(ifp, level, flags,
6709 category, subcategory);
6710 } else {
6711 result = ifnet_get_log(ifp, &level, &flags, &category,
6712 &subcategory);
6713 if (result == 0) {
6714 ifr->ifr_log.ifl_level = level;
6715 ifr->ifr_log.ifl_flags = flags;
6716 ifr->ifr_log.ifl_category = category;
6717 ifr->ifr_log.ifl_subcategory = subcategory;
6718 }
6719 }
6720
6721 return (result);
6722}
6723
6724int
6725ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
6726 int32_t category, int32_t subcategory)
6727{
6728 int err = 0;
6729
6730 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
6731 VERIFY(flags & IFNET_LOGF_MASK);
6732
6733 /*
6734 * The logging level applies to all facilities; make sure to
6735 * update them all with the most current level.
6736 */
6737 flags |= ifp->if_log.flags;
6738
6739 if (ifp->if_output_ctl != NULL) {
6740 struct ifnet_log_params l;
6741
6742 bzero(&l, sizeof (l));
6743 l.level = level;
6744 l.flags = flags;
6745 l.flags &= ~IFNET_LOGF_DLIL;
6746 l.category = category;
6747 l.subcategory = subcategory;
6748
6749 /* Send this request to lower layers */
6750 if (l.flags != 0) {
6751 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
6752 sizeof (l), &l);
6753 }
6754 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
6755 /*
6756 * If targeted to the lower layers without an output
6757 * control callback registered on the interface, just
6758 * silently ignore facilities other than ours.
6759 */
6760 flags &= IFNET_LOGF_DLIL;
6761 if (flags == 0 && (!ifp->if_log.flags & IFNET_LOGF_DLIL))
6762 level = 0;
6763 }
6764
6765 if (err == 0) {
6766 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT)
6767 ifp->if_log.flags = 0;
6768 else
6769 ifp->if_log.flags |= flags;
6770
6771 log(LOG_INFO, "%s: logging level set to %d flags=%b "
6772 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
6773 ifp->if_log.level, ifp->if_log.flags,
6774 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
6775 category, subcategory);
6776 }
6777
6778 return (err);
6779}
6780
6781int
6782ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
6783 int32_t *category, int32_t *subcategory)
6784{
6785 if (level != NULL)
6786 *level = ifp->if_log.level;
6787 if (flags != NULL)
6788 *flags = ifp->if_log.flags;
6789 if (category != NULL)
6790 *category = ifp->if_log.category;
6791 if (subcategory != NULL)
6792 *subcategory = ifp->if_log.subcategory;
6793
6794 return (0);
6795}
6796
6797int
6798ifnet_notify_address(struct ifnet *ifp, int af)
6799{
6800 struct ifnet_notify_address_params na;
6801
6802#if PF
6803 (void) pf_ifaddr_hook(ifp);
6804#endif /* PF */
6805
6806 if (ifp->if_output_ctl == NULL)
6807 return (EOPNOTSUPP);
6808
6809 bzero(&na, sizeof (na));
6810 na.address_family = af;
6811
6812 return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
6813 sizeof (na), &na));
6814}
6815
6816errno_t
6817ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
6818{
6819 if (ifp == NULL || flowid == NULL) {
6820 return (EINVAL);
6821 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
6822 !(ifp->if_refflags & IFRF_ATTACHED)) {
6823 return (ENXIO);
6824 }
6825
6826 *flowid = ifp->if_flowhash;
6827
6828 return (0);
6829}
6830
6831errno_t
6832ifnet_disable_output(struct ifnet *ifp)
6833{
6834 int err;
6835
6836 if (ifp == NULL) {
6837 return (EINVAL);
6838 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
6839 !(ifp->if_refflags & IFRF_ATTACHED)) {
6840 return (ENXIO);
6841 }
6842
6843 if ((err = ifnet_fc_add(ifp)) == 0) {
6844 lck_mtx_lock_spin(&ifp->if_start_lock);
6845 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
6846 lck_mtx_unlock(&ifp->if_start_lock);
6847 }
6848 return (err);
6849}
6850
6851errno_t
6852ifnet_enable_output(struct ifnet *ifp)
6853{
6854 if (ifp == NULL) {
6855 return (EINVAL);
6856 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
6857 !(ifp->if_refflags & IFRF_ATTACHED)) {
6858 return (ENXIO);
6859 }
6860
6861 ifnet_start_common(ifp, 1);
6862 return (0);
6863}
6864
6865void
6866ifnet_flowadv(uint32_t flowhash)
6867{
6868 struct ifnet_fc_entry *ifce;
6869 struct ifnet *ifp;
6870
6871 ifce = ifnet_fc_get(flowhash);
6872 if (ifce == NULL)
6873 return;
6874
6875 VERIFY(ifce->ifce_ifp != NULL);
6876 ifp = ifce->ifce_ifp;
6877
6878 /* flow hash gets recalculated per attach, so check */
6879 if (ifnet_is_attached(ifp, 1)) {
6880 if (ifp->if_flowhash == flowhash)
6881 (void) ifnet_enable_output(ifp);
6882 ifnet_decr_iorefcnt(ifp);
6883 }
6884 ifnet_fc_entry_free(ifce);
6885}
6886
6887/*
6888 * Function to compare ifnet_fc_entries in ifnet flow control tree
6889 */
6890static inline int
6891ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
6892{
6893 return (fc1->ifce_flowhash - fc2->ifce_flowhash);
6894}
6895
6896static int
6897ifnet_fc_add(struct ifnet *ifp)
6898{
6899 struct ifnet_fc_entry keyfc, *ifce;
6900 uint32_t flowhash;
6901
6902 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
6903 VERIFY(ifp->if_flowhash != 0);
6904 flowhash = ifp->if_flowhash;
6905
6906 bzero(&keyfc, sizeof (keyfc));
6907 keyfc.ifce_flowhash = flowhash;
6908
6909 lck_mtx_lock_spin(&ifnet_fc_lock);
6910 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
6911 if (ifce != NULL && ifce->ifce_ifp == ifp) {
6912 /* Entry is already in ifnet_fc_tree, return */
6913 lck_mtx_unlock(&ifnet_fc_lock);
6914 return (0);
6915 }
6916
6917 if (ifce != NULL) {
6918 /*
6919 * There is a different fc entry with the same flow hash
6920 * but different ifp pointer. There can be a collision
6921 * on flow hash but the probability is low. Let's just
6922 * avoid adding a second one when there is a collision.
6923 */
6924 lck_mtx_unlock(&ifnet_fc_lock);
6925 return (EAGAIN);
6926 }
6927
6928 /* become regular mutex */
6929 lck_mtx_convert_spin(&ifnet_fc_lock);
6930
6931 ifce = zalloc_noblock(ifnet_fc_zone);
6932 if (ifce == NULL) {
6933 /* memory allocation failed */
6934 lck_mtx_unlock(&ifnet_fc_lock);
6935 return (ENOMEM);
6936 }
6937 bzero(ifce, ifnet_fc_zone_size);
6938
6939 ifce->ifce_flowhash = flowhash;
6940 ifce->ifce_ifp = ifp;
6941
6942 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
6943 lck_mtx_unlock(&ifnet_fc_lock);
6944 return (0);
6945}
6946
6947static struct ifnet_fc_entry *
6948ifnet_fc_get(uint32_t flowhash)
6949{
6950 struct ifnet_fc_entry keyfc, *ifce;
6951 struct ifnet *ifp;
6952
6953 bzero(&keyfc, sizeof (keyfc));
6954 keyfc.ifce_flowhash = flowhash;
6955
6956 lck_mtx_lock_spin(&ifnet_fc_lock);
6957 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
6958 if (ifce == NULL) {
6959 /* Entry is not present in ifnet_fc_tree, return */
6960 lck_mtx_unlock(&ifnet_fc_lock);
6961 return (NULL);
6962 }
6963
6964 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
6965
6966 VERIFY(ifce->ifce_ifp != NULL);
6967 ifp = ifce->ifce_ifp;
6968
6969 /* become regular mutex */
6970 lck_mtx_convert_spin(&ifnet_fc_lock);
6971
6972 if (!ifnet_is_attached(ifp, 0)) {
6973 /*
6974 * This ifp is not attached or in the process of being
6975 * detached; just don't process it.
6976 */
6977 ifnet_fc_entry_free(ifce);
6978 ifce = NULL;
6979 }
6980 lck_mtx_unlock(&ifnet_fc_lock);
6981
6982 return (ifce);
6983}
6984
6985static void
6986ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
6987{
6988 zfree(ifnet_fc_zone, ifce);
6989}
6990
6991static uint32_t
6992ifnet_calc_flowhash(struct ifnet *ifp)
6993{
6994 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
6995 uint32_t flowhash = 0;
6996
6997 if (ifnet_flowhash_seed == 0)
6998 ifnet_flowhash_seed = RandomULong();
6999
7000 bzero(&fh, sizeof (fh));
7001
7002 (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name);
7003 fh.ifk_unit = ifp->if_unit;
7004 fh.ifk_flags = ifp->if_flags;
7005 fh.ifk_eflags = ifp->if_eflags;
7006 fh.ifk_capabilities = ifp->if_capabilities;
7007 fh.ifk_capenable = ifp->if_capenable;
7008 fh.ifk_output_sched_model = ifp->if_output_sched_model;
7009 fh.ifk_rand1 = RandomULong();
7010 fh.ifk_rand2 = RandomULong();
7011
7012try_again:
7013 flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed);
7014 if (flowhash == 0) {
7015 /* try to get a non-zero flowhash */
7016 ifnet_flowhash_seed = RandomULong();
7017 goto try_again;
7018 }
7019
7020 return (flowhash);
7021}
7022
7023static void
7024dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
7025 protocol_family_t pf)
7026{
7027#pragma unused(ifp)
7028 uint32_t did_sw;
7029
7030 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
7031 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6)))
7032 return;
7033
7034 switch (pf) {
7035 case PF_INET:
7036 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
7037 if (did_sw & CSUM_DELAY_IP)
7038 hwcksum_dbg_finalized_hdr++;
7039 if (did_sw & CSUM_DELAY_DATA)
7040 hwcksum_dbg_finalized_data++;
7041 break;
7042#if INET6
7043 case PF_INET6:
7044 /*
7045 * Checksum offload should not have been enabled when
7046 * extension headers exist; that also means that we
7047 * cannot force-finalize packets with extension headers.
7048 * Indicate to the callee should it skip such case by
7049 * setting optlen to -1.
7050 */
7051 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
7052 m->m_pkthdr.csum_flags);
7053 if (did_sw & CSUM_DELAY_IPV6_DATA)
7054 hwcksum_dbg_finalized_data++;
7055 break;
7056#endif /* INET6 */
7057 default:
7058 return;
7059 }
7060}
7061
7062static void
7063dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
7064 protocol_family_t pf)
7065{
7066 uint16_t sum;
7067 uint32_t hlen;
7068
7069 if (frame_header == NULL ||
7070 frame_header < (char *)mbuf_datastart(m) ||
7071 frame_header > (char *)m->m_data) {
7072 printf("%s: frame header pointer 0x%llx out of range "
7073 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
7074 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
7075 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
7076 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
7077 (uint64_t)VM_KERNEL_ADDRPERM(m));
7078 return;
7079 }
7080 hlen = (m->m_data - frame_header);
7081
7082 switch (pf) {
7083 case PF_INET:
7084#if INET6
7085 case PF_INET6:
7086#endif /* INET6 */
7087 break;
7088 default:
7089 return;
7090 }
7091
7092 /*
7093 * Force partial checksum offload; useful to simulate cases
7094 * where the hardware does not support partial checksum offload,
7095 * in order to validate correctness throughout the layers above.
7096 */
7097 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
7098 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
7099
7100 if (foff > (uint32_t)m->m_pkthdr.len)
7101 return;
7102
7103 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
7104
7105 /* Compute 16-bit 1's complement sum from forced offset */
7106 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
7107
7108 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
7109 m->m_pkthdr.csum_rx_val = sum;
7110 m->m_pkthdr.csum_rx_start = (foff + hlen);
7111
7112 hwcksum_dbg_partial_forced++;
7113 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
7114 }
7115
7116 /*
7117 * Partial checksum offload verification (and adjustment);
7118 * useful to validate and test cases where the hardware
7119 * supports partial checksum offload.
7120 */
7121 if ((m->m_pkthdr.csum_flags &
7122 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
7123 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
7124 uint32_t rxoff;
7125
7126 /* Start offset must begin after frame header */
7127 rxoff = m->m_pkthdr.csum_rx_start;
7128 if (hlen > rxoff) {
7129 hwcksum_dbg_bad_rxoff++;
7130 if (dlil_verbose) {
7131 printf("%s: partial cksum start offset %d "
7132 "is less than frame header length %d for "
7133 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
7134 (uint64_t)VM_KERNEL_ADDRPERM(m));
7135 }
7136 return;
7137 }
7138 rxoff -=hlen;
7139
7140 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
7141 /*
7142 * Compute the expected 16-bit 1's complement sum;
7143 * skip this if we've already computed it above
7144 * when partial checksum offload is forced.
7145 */
7146 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
7147
7148 /* Hardware or driver is buggy */
7149 if (sum != m->m_pkthdr.csum_rx_val) {
7150 hwcksum_dbg_bad_cksum++;
7151 if (dlil_verbose) {
7152 printf("%s: bad partial cksum value "
7153 "0x%x (expected 0x%x) for mbuf "
7154 "0x%llx [rx_start %d]\n",
7155 if_name(ifp),
7156 m->m_pkthdr.csum_rx_val, sum,
7157 (uint64_t)VM_KERNEL_ADDRPERM(m),
7158 m->m_pkthdr.csum_rx_start);
7159 }
7160 return;
7161 }
7162 }
7163 hwcksum_dbg_verified++;
7164
7165 /*
7166 * This code allows us to emulate various hardwares that
7167 * perform 16-bit 1's complement sum beginning at various
7168 * start offset values.
7169 */
7170 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
7171 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
7172
7173 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len)
7174 return;
7175
7176 sum = m_adj_sum16(m, rxoff, aoff, sum);
7177
7178 m->m_pkthdr.csum_rx_val = sum;
7179 m->m_pkthdr.csum_rx_start = (aoff + hlen);
7180
7181 hwcksum_dbg_adjusted++;
7182 }
7183 }
7184}
7185
7186static int
7187sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
7188{
7189#pragma unused(arg1, arg2)
7190 u_int32_t i;
7191 int err;
7192
7193 i = hwcksum_dbg_mode;
7194
7195 err = sysctl_handle_int(oidp, &i, 0, req);
7196 if (err != 0 || req->newptr == USER_ADDR_NULL)
7197 return (err);
7198
7199 if (hwcksum_dbg == 0)
7200 return (ENODEV);
7201
7202 if ((i & ~HWCKSUM_DBG_MASK) != 0)
7203 return (EINVAL);
7204
7205 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
7206
7207 return (err);
7208}
7209
7210static int
7211sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
7212{
7213#pragma unused(arg1, arg2)
7214 u_int32_t i;
7215 int err;
7216
7217 i = hwcksum_dbg_partial_rxoff_forced;
7218
7219 err = sysctl_handle_int(oidp, &i, 0, req);
7220 if (err != 0 || req->newptr == USER_ADDR_NULL)
7221 return (err);
7222
7223 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED))
7224 return (ENODEV);
7225
7226 hwcksum_dbg_partial_rxoff_forced = i;
7227
7228 return (err);
7229}
7230
7231static int
7232sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
7233{
7234#pragma unused(arg1, arg2)
7235 u_int32_t i;
7236 int err;
7237
7238 i = hwcksum_dbg_partial_rxoff_adj;
7239
7240 err = sysctl_handle_int(oidp, &i, 0, req);
7241 if (err != 0 || req->newptr == USER_ADDR_NULL)
7242 return (err);
7243
7244 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ))
7245 return (ENODEV);
7246
7247 hwcksum_dbg_partial_rxoff_adj = i;
7248
7249 return (err);
7250}
7251
7252#if DEBUG
7253/* Blob for sum16 verification */
7254static uint8_t sumdata[] = {
7255 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
7256 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
7257 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
7258 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
7259 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
7260 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
7261 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
7262 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
7263 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
7264 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
7265 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
7266 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
7267 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
7268 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
7269 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
7270 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
7271 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
7272 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
7273 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
7274 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
7275 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
7276 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
7277 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
7278 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
7279 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
7280 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
7281 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
7282 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
7283 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
7284 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
7285 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
7286 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
7287 0xc8, 0x28, 0x02, 0x00, 0x00
7288};
7289
7290/* Precomputed 16-bit 1's complement sums for various spans of the above data */
7291static struct {
7292 int len;
7293 uint16_t sum;
7294} sumtbl[] = {
7295 { 11, 0xcb6d },
7296 { 20, 0x20dd },
7297 { 27, 0xbabd },
7298 { 32, 0xf3e8 },
7299 { 37, 0x197d },
7300 { 43, 0x9eae },
7301 { 64, 0x4678 },
7302 { 127, 0x9399 },
7303 { 256, 0xd147 },
7304 { 325, 0x0358 }
7305};
7306#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
7307
7308static void
7309dlil_verify_sum16(void)
7310{
7311 struct mbuf *m;
7312 uint8_t *buf;
7313 int n;
7314
7315 /* Make sure test data plus extra room for alignment fits in cluster */
7316 _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES);
7317
7318 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
7319 MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */
7320 buf = mtod(m, uint8_t *); /* base address */
7321
7322 for (n = 0; n < SUMTBL_MAX; n++) {
7323 uint16_t len = sumtbl[n].len;
7324 int i;
7325
7326 /* Verify for all possible alignments */
7327 for (i = 0; i < (int)sizeof (uint64_t); i++) {
7328 uint16_t sum;
7329 uint8_t *c;
7330
7331 /* Copy over test data to mbuf */
7332 VERIFY(len <= sizeof (sumdata));
7333 c = buf + i;
7334 bcopy(sumdata, c, len);
7335
7336 /* Zero-offset test (align by data pointer) */
7337 m->m_data = (caddr_t)c;
7338 m->m_len = len;
7339 sum = m_sum16(m, 0, len);
7340
7341 /* Something is horribly broken; stop now */
7342 if (sum != sumtbl[n].sum) {
7343 panic("%s: broken m_sum16 for len=%d align=%d "
7344 "sum=0x%04x [expected=0x%04x]\n", __func__,
7345 len, i, sum, sumtbl[n].sum);
7346 /* NOTREACHED */
7347 }
7348
7349 /* Alignment test by offset (fixed data pointer) */
7350 m->m_data = (caddr_t)buf;
7351 m->m_len = i + len;
7352 sum = m_sum16(m, i, len);
7353
7354 /* Something is horribly broken; stop now */
7355 if (sum != sumtbl[n].sum) {
7356 panic("%s: broken m_sum16 for len=%d offset=%d "
7357 "sum=0x%04x [expected=0x%04x]\n", __func__,
7358 len, i, sum, sumtbl[n].sum);
7359 /* NOTREACHED */
7360 }
7361#if INET
7362 /* Simple sum16 contiguous buffer test by aligment */
7363 sum = b_sum16(c, len);
7364
7365 /* Something is horribly broken; stop now */
7366 if (sum != sumtbl[n].sum) {
7367 panic("%s: broken b_sum16 for len=%d align=%d "
7368 "sum=0x%04x [expected=0x%04x]\n", __func__,
7369 len, i, sum, sumtbl[n].sum);
7370 /* NOTREACHED */
7371 }
7372#endif /* INET */
7373 }
7374 }
7375 m_freem(m);
7376
7377 printf("DLIL: SUM16 self-tests PASSED\n");
7378}
7379#endif /* DEBUG */
7380
7381#define CASE_STRINGIFY(x) case x: return #x
7382
7383__private_extern__ const char *
7384dlil_kev_dl_code_str(u_int32_t event_code)
7385{
7386 switch (event_code) {
7387 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
7388 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
7389 CASE_STRINGIFY(KEV_DL_SIFMTU);
7390 CASE_STRINGIFY(KEV_DL_SIFPHYS);
7391 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
7392 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
7393 CASE_STRINGIFY(KEV_DL_ADDMULTI);
7394 CASE_STRINGIFY(KEV_DL_DELMULTI);
7395 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
7396 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
7397 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
7398 CASE_STRINGIFY(KEV_DL_LINK_OFF);
7399 CASE_STRINGIFY(KEV_DL_LINK_ON);
7400 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
7401 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
7402 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
7403 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
7404 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
7405 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
7406 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
7407 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
7408 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
7409 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
7410 CASE_STRINGIFY(KEV_DL_ISSUES);
7411 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
7412 default:
7413 break;
7414 }
7415 return ("");
7416}