]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/dlil.c
xnu-2050.18.24.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
CommitLineData
1c79356b 1/*
7ddcb079 2 * Copyright (c) 1999-2012 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
2d21ac55
A
28/*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
1c79356b 34
1c79356b
A
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/malloc.h>
39#include <sys/mbuf.h>
40#include <sys/socket.h>
91447636
A
41#include <sys/domain.h>
42#include <sys/user.h>
2d21ac55 43#include <sys/random.h>
316670eb 44#include <sys/socketvar.h>
1c79356b
A
45#include <net/if_dl.h>
46#include <net/if.h>
91447636 47#include <net/route.h>
1c79356b
A
48#include <net/if_var.h>
49#include <net/dlil.h>
91447636 50#include <net/if_arp.h>
316670eb 51#include <net/iptap.h>
1c79356b
A
52#include <sys/kern_event.h>
53#include <sys/kdebug.h>
6d2010ae 54#include <sys/mcache.h>
1c79356b 55
91447636 56#include <kern/assert.h>
1c79356b 57#include <kern/task.h>
9bccf70c
A
58#include <kern/thread.h>
59#include <kern/sched_prim.h>
91447636 60#include <kern/locks.h>
6d2010ae 61#include <kern/zalloc.h>
2d21ac55 62#include <net/kpi_protocol.h>
9bccf70c 63
1c79356b 64#include <net/if_types.h>
6d2010ae 65#include <net/if_llreach.h>
91447636 66#include <net/kpi_interfacefilter.h>
316670eb
A
67#include <net/classq/classq.h>
68#include <net/classq/classq_sfb.h>
91447636 69
6d2010ae
A
70#if INET
71#include <netinet/in_var.h>
72#include <netinet/igmp_var.h>
316670eb
A
73#include <netinet/ip_var.h>
74#include <netinet/tcp.h>
75#include <netinet/tcp_var.h>
76#include <netinet/udp.h>
77#include <netinet/udp_var.h>
78#include <netinet/if_ether.h>
79#include <netinet/in_pcb.h>
6d2010ae
A
80#endif /* INET */
81
82#if INET6
83#include <netinet6/in6_var.h>
84#include <netinet6/nd6.h>
85#include <netinet6/mld6_var.h>
86#endif /* INET6 */
87
88#if NETAT
89#include <netat/at_var.h>
90#endif /* NETAT */
91
91447636 92#include <libkern/OSAtomic.h>
1c79356b 93
d52fe63f 94#include <machine/machine_routines.h>
1c79356b 95
2d21ac55 96#include <mach/thread_act.h>
6d2010ae 97#include <mach/sdt.h>
2d21ac55
A
98
99#if CONFIG_MACF_NET
100#include <security/mac_framework.h>
101#endif /* MAC_NET */
102
b0d623f7
A
103#if PF
104#include <net/pfvar.h>
105#endif /* PF */
316670eb
A
106#if PF_ALTQ
107#include <net/altq/altq.h>
108#endif /* PF_ALTQ */
109#include <net/pktsched/pktsched.h>
b0d623f7 110
6d2010ae
A
111#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
112#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
1c79356b
A
113#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
114#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
115#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
116
117
1c79356b
A
118#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
119#define MAX_LINKADDR 4 /* LONGWORDS */
120#define M_NKE M_IFADDR
121
2d21ac55 122#if 1
91447636
A
123#define DLIL_PRINTF printf
124#else
125#define DLIL_PRINTF kprintf
126#endif
127
6d2010ae
A
128#define IF_DATA_REQUIRE_ALIGNED_64(f) \
129 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
d1ecb069 130
6d2010ae
A
131#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
132 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
133
91447636 134enum {
2d21ac55
A
135 kProtoKPI_v1 = 1,
136 kProtoKPI_v2 = 2
91447636
A
137};
138
6d2010ae
A
139/*
140 * List of if_proto structures in if_proto_hash[] is protected by
141 * the ifnet lock. The rest of the fields are initialized at protocol
142 * attach time and never change, thus no lock required as long as
143 * a reference to it is valid, via if_proto_ref().
144 */
91447636 145struct if_proto {
6d2010ae
A
146 SLIST_ENTRY(if_proto) next_hash;
147 u_int32_t refcount;
148 u_int32_t detached;
149 struct ifnet *ifp;
91447636 150 protocol_family_t protocol_family;
6d2010ae 151 int proto_kpi;
91447636 152 union {
91447636 153 struct {
6d2010ae
A
154 proto_media_input input;
155 proto_media_preout pre_output;
156 proto_media_event event;
157 proto_media_ioctl ioctl;
91447636
A
158 proto_media_detached detached;
159 proto_media_resolve_multi resolve_multi;
160 proto_media_send_arp send_arp;
161 } v1;
2d21ac55
A
162 struct {
163 proto_media_input_v2 input;
6d2010ae
A
164 proto_media_preout pre_output;
165 proto_media_event event;
166 proto_media_ioctl ioctl;
2d21ac55
A
167 proto_media_detached detached;
168 proto_media_resolve_multi resolve_multi;
169 proto_media_send_arp send_arp;
170 } v2;
91447636 171 } kpi;
1c79356b
A
172};
173
91447636
A
174SLIST_HEAD(proto_hash_entry, if_proto);
175
6d2010ae
A
176#define DLIL_SDLMAXLEN 64
177#define DLIL_SDLDATALEN \
178 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
1c79356b 179
9bccf70c 180struct dlil_ifnet {
6d2010ae
A
181 struct ifnet dl_if; /* public ifnet */
182 /*
316670eb 183 * DLIL private fields, protected by dl_if_lock
6d2010ae
A
184 */
185 decl_lck_mtx_data(, dl_if_lock);
186 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
187 u_int32_t dl_if_flags; /* flags (below) */
188 u_int32_t dl_if_refcnt; /* refcnt */
189 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
190 void *dl_if_uniqueid; /* unique interface id */
191 size_t dl_if_uniqueid_len; /* length of the unique id */
192 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
193 struct {
194 struct ifaddr ifa; /* lladdr ifa */
195 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
196 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
197 } dl_if_lladdr;
316670eb
A
198 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
199 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
6d2010ae
A
200 ctrace_t dl_if_attach; /* attach PC stacktrace */
201 ctrace_t dl_if_detach; /* detach PC stacktrace */
202};
203
204/* Values for dl_if_flags (private to DLIL) */
205#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
206#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
207#define DLIF_DEBUG 0x4 /* has debugging info */
208
209#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
210
211/* For gdb */
212__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
213
214struct dlil_ifnet_dbg {
215 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
216 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
217 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
218 /*
219 * Circular lists of ifnet_{reference,release} callers.
220 */
221 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
222 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
1c79356b
A
223};
224
6d2010ae
A
225#define DLIL_TO_IFP(s) (&s->dl_if)
226#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
227
91447636
A
228struct ifnet_filter {
229 TAILQ_ENTRY(ifnet_filter) filt_next;
6d2010ae
A
230 u_int32_t filt_skip;
231 ifnet_t filt_ifp;
232 const char *filt_name;
233 void *filt_cookie;
234 protocol_family_t filt_protocol;
235 iff_input_func filt_input;
236 iff_output_func filt_output;
237 iff_event_func filt_event;
238 iff_ioctl_func filt_ioctl;
239 iff_detached_func filt_detached;
1c79356b
A
240};
241
2d21ac55 242struct proto_input_entry;
55e303ae 243
91447636 244static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
91447636 245static lck_grp_t *dlil_lock_group;
6d2010ae 246lck_grp_t *ifnet_lock_group;
91447636 247static lck_grp_t *ifnet_head_lock_group;
316670eb
A
248static lck_grp_t *ifnet_snd_lock_group;
249static lck_grp_t *ifnet_rcv_lock_group;
6d2010ae 250lck_attr_t *ifnet_lock_attr;
7ddcb079
A
251decl_lck_rw_data(static, ifnet_head_lock);
252decl_lck_mtx_data(static, dlil_ifnet_lock);
b0d623f7 253u_int32_t dlil_filter_count = 0;
91447636 254extern u_int32_t ipv4_ll_arp_aware;
1c79356b 255
316670eb
A
256struct sfb_fc_list ifnet_fclist;
257decl_lck_mtx_data(static, ifnet_fclist_lock);
258
259static unsigned int ifnet_fcezone_size; /* size of ifnet_fce */
260static struct zone *ifnet_fcezone; /* zone for ifnet_fce */
261
262#define IFNET_FCEZONE_MAX 32 /* maximum elements in zone */
263#define IFNET_FCEZONE_NAME "ifnet_fcezone" /* zone name */
264
265static void ifnet_fc_thread_func(void *, wait_result_t);
266static void ifnet_fc_init(void);
267
6d2010ae
A
268#if DEBUG
269static unsigned int ifnet_debug = 1; /* debugging (enabled) */
270#else
271static unsigned int ifnet_debug; /* debugging (disabled) */
272#endif /* !DEBUG */
273static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
274static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
275static struct zone *dlif_zone; /* zone for dlil_ifnet */
276
277#define DLIF_ZONE_MAX 64 /* maximum elements in zone */
278#define DLIF_ZONE_NAME "ifnet" /* zone name */
279
280static unsigned int dlif_filt_size; /* size of ifnet_filter */
281static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
282
283#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
284#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
285
6d2010ae
A
286static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
287static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
288
289#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
290#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
291
292static unsigned int dlif_proto_size; /* size of if_proto */
293static struct zone *dlif_proto_zone; /* zone for if_proto */
294
295#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
296#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
297
316670eb
A
298static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
299static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
300static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
301
302#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
303#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
304
305static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
306static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
307static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
308
309#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
310#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
311
d1ecb069
A
312/*
313 * Updating this variable should be done by first acquiring the global
314 * radix node head (rnh_lock), in tandem with settting/clearing the
315 * PR_AGGDRAIN for routedomain.
316 */
317u_int32_t ifnet_aggressive_drainers;
318static u_int32_t net_rtref;
d1ecb069 319
316670eb
A
320static struct dlil_main_threading_info dlil_main_input_thread_info;
321__private_extern__ struct dlil_threading_info *dlil_main_input_thread =
322 (struct dlil_threading_info *)&dlil_main_input_thread_info;
2d21ac55 323
91447636 324static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg);
91447636 325static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
6d2010ae
A
326static void dlil_if_trace(struct dlil_ifnet *, int);
327static void if_proto_ref(struct if_proto *);
328static void if_proto_free(struct if_proto *);
329static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
330static int dlil_ifp_proto_count(struct ifnet *);
331static void if_flt_monitor_busy(struct ifnet *);
332static void if_flt_monitor_unbusy(struct ifnet *);
333static void if_flt_monitor_enter(struct ifnet *);
334static void if_flt_monitor_leave(struct ifnet *);
335static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
336 char **, protocol_family_t);
337static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
338 protocol_family_t);
339static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
340 const struct sockaddr_dl *);
341static int ifnet_lookup(struct ifnet *);
342static void if_purgeaddrs(struct ifnet *);
343
344static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
345 struct mbuf *, char *);
346static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
347 struct mbuf *);
348static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
349 mbuf_t *, const struct sockaddr *, void *, char *, char *);
350static void ifproto_media_event(struct ifnet *, protocol_family_t,
351 const struct kev_msg *);
352static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
353 unsigned long, void *);
354static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
355 struct sockaddr_dl *, size_t);
356static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
357 const struct sockaddr_dl *, const struct sockaddr *,
358 const struct sockaddr_dl *, const struct sockaddr *);
359
360static errno_t ifp_if_output(struct ifnet *, struct mbuf *);
316670eb
A
361static void ifp_if_start(struct ifnet *);
362static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
363 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
364static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
6d2010ae
A
365static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
366 protocol_family_t *);
367static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
368 const struct ifnet_demux_desc *, u_int32_t);
369static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
370static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
371static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
316670eb
A
372 const struct sockaddr *, const char *, const char *
373#if CONFIG_EMBEDDED
374 ,
375 u_int32_t *, u_int32_t *
376#endif /* CONFIG_EMBEDDED */
377 );
6d2010ae
A
378static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
379static void ifp_if_free(struct ifnet *);
380static void ifp_if_event(struct ifnet *, const struct kev_msg *);
316670eb
A
381static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
382static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
6d2010ae 383
316670eb
A
384static void dlil_main_input_thread_func(void *, wait_result_t);
385static void dlil_input_thread_func(void *, wait_result_t);
386static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
387static void dlil_rxpoll_calc_limits(struct dlil_threading_info *);
6d2010ae 388static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
316670eb
A
389static void dlil_terminate_input_thread(struct dlil_threading_info *);
390static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
391 struct dlil_threading_info *, boolean_t);
392static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
393static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
394 u_int32_t, ifnet_model_t, boolean_t);
395static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
396 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
397
398static void ifnet_detacher_thread_func(void *, wait_result_t);
399static int ifnet_detacher_thread_cont(int);
6d2010ae
A
400static void ifnet_detach_final(struct ifnet *);
401static void ifnet_detaching_enqueue(struct ifnet *);
402static struct ifnet *ifnet_detaching_dequeue(void);
403
316670eb
A
404static void ifnet_start_thread_fn(void *, wait_result_t);
405static void ifnet_poll_thread_fn(void *, wait_result_t);
406static void ifnet_poll(struct ifnet *);
407
6d2010ae
A
408static void ifp_src_route_copyout(struct ifnet *, struct route *);
409static void ifp_src_route_copyin(struct ifnet *, struct route *);
410#if INET6
411static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
412static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
413#endif /* INET6 */
414
316670eb
A
415static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
416static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
417static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
418
6d2010ae
A
419/* The following are protected by dlil_ifnet_lock */
420static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
421static u_int32_t ifnet_detaching_cnt;
422static void *ifnet_delayed_run; /* wait channel for detaching thread */
423
424extern void bpfdetach(struct ifnet*);
425extern void proto_input_run(void);
91447636 426
316670eb
A
427extern uint32_t udp_count_opportunistic(unsigned int ifindex,
428 u_int32_t flags);
429extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
430 u_int32_t flags);
431
6d2010ae 432__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
91447636 433
b0d623f7 434#if DEBUG
6d2010ae 435static int dlil_verbose = 1;
b0d623f7 436#else
6d2010ae 437static int dlil_verbose = 0;
b0d623f7 438#endif /* DEBUG */
6d2010ae 439#if IFNET_INPUT_SANITY_CHK
6d2010ae 440/* sanity checking of input packet lists received */
316670eb
A
441static u_int32_t dlil_input_sanity_check = 0;
442#endif /* IFNET_INPUT_SANITY_CHK */
443/* rate limit debug messages */
444struct timespec dlil_dbgrate = { 1, 0 };
b0d623f7 445
6d2010ae 446SYSCTL_DECL(_net_link_generic_system);
91447636 447
316670eb
A
448SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
449 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
450
451#define IF_SNDQ_MINLEN 32
452u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
453SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
454 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
455 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
456
457#define IF_RCVQ_MINLEN 32
458#define IF_RCVQ_MAXLEN 256
459u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
460SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
461 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
462 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
463
464#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
465static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
466SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
467 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
468 "ilog2 of EWMA decay rate of avg inbound packets");
469
470#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
471static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
472SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
473 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
474 "input poll mode freeze time");
475
476#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
477static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
478SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
479 CTLFLAG_RD | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
480 "input poll sampling time");
481
482#define IF_RXPOLL_INTERVAL_TIME (1ULL * 1000 * 1000) /* 1 ms */
483static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVAL_TIME;
484SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
485 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
486 "input poll interval (time)");
487
488#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
489static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
490SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
491 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
492 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
493
494#define IF_RXPOLL_WLOWAT 5
495static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
496SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
497 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat, IF_RXPOLL_WLOWAT,
498 "input poll wakeup low watermark");
499
500#define IF_RXPOLL_WHIWAT 100
501static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
502SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
503 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat, IF_RXPOLL_WHIWAT,
504 "input poll wakeup high watermark");
505
506static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
507SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
508 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
509 "max packets per poll call");
510
511static u_int32_t if_rxpoll = 1;
512SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
513 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
514 sysctl_rxpoll, "I", "enable opportunistic input polling");
515
516u_int32_t if_bw_smoothing_val = 3;
517SYSCTL_UINT(_net_link_generic_system, OID_AUTO, if_bw_smoothing_val,
518 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_smoothing_val, 0, "");
519
520u_int32_t if_bw_measure_size = 10;
521SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_bw_measure_size,
522 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_measure_size, 0, "");
523
524static u_int32_t cur_dlil_input_threads = 0;
525SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
526 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads , 0,
527 "Current number of DLIL input threads");
91447636 528
6d2010ae 529#if IFNET_INPUT_SANITY_CHK
316670eb
A
530SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
531 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check , 0,
6d2010ae 532 "Turn on sanity checking in DLIL input");
316670eb 533#endif /* IFNET_INPUT_SANITY_CHK */
1c79356b 534
316670eb
A
535static u_int32_t if_flowadv = 1;
536SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
537 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
538 "enable flow-advisory mechanism");
539
540unsigned int net_rxpoll = 1;
6d2010ae
A
541unsigned int net_affinity = 1;
542static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
1c79356b 543
b36670ce
A
544extern u_int32_t inject_buckets;
545
2d21ac55
A
546static lck_grp_attr_t *dlil_grp_attributes = NULL;
547static lck_attr_t *dlil_lck_attributes = NULL;
91447636 548
91447636
A
549#define PROTO_HASH_SLOTS 0x5
550
316670eb
A
551#define DLIL_INPUT_CHECK(m, ifp) { \
552 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
553 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
554 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
555 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
556 /* NOTREACHED */ \
557 } \
558}
559
560#define DLIL_EWMA(old, new, decay) do { \
561 u_int32_t _avg; \
562 if ((_avg = (old)) > 0) \
563 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
564 else \
565 _avg = (new); \
566 (old) = _avg; \
567} while (0)
568
569#define MBPS (1ULL * 1000 * 1000)
570#define GBPS (MBPS * 1000)
571
572struct rxpoll_time_tbl {
573 u_int64_t speed; /* downlink speed */
574 u_int32_t plowat; /* packets low watermark */
575 u_int32_t phiwat; /* packets high watermark */
576 u_int32_t blowat; /* bytes low watermark */
577 u_int32_t bhiwat; /* bytes high watermark */
578};
579
580static struct rxpoll_time_tbl rxpoll_tbl[] = {
581 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
582 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
583 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
584 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
585 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
586 { 0, 0, 0, 0, 0 }
587};
588
1c79356b
A
589/*
590 * Internal functions.
591 */
592
91447636 593static int
b0d623f7 594proto_hash_value(u_int32_t protocol_family)
91447636 595{
4a3eedf9
A
596 /*
597 * dlil_proto_unplumb_all() depends on the mapping between
598 * the hash bucket index and the protocol family defined
599 * here; future changes must be applied there as well.
600 */
91447636
A
601 switch(protocol_family) {
602 case PF_INET:
6d2010ae 603 return (0);
91447636 604 case PF_INET6:
6d2010ae 605 return (1);
91447636 606 case PF_APPLETALK:
6d2010ae 607 return (2);
91447636 608 case PF_VLAN:
6d2010ae
A
609 return (3);
610 case PF_UNSPEC:
91447636 611 default:
6d2010ae 612 return (4);
91447636
A
613 }
614}
615
6d2010ae
A
616/*
617 * Caller must already be holding ifnet lock.
618 */
619static struct if_proto *
b0d623f7 620find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
1c79356b 621{
91447636 622 struct if_proto *proto = NULL;
b0d623f7 623 u_int32_t i = proto_hash_value(protocol_family);
6d2010ae
A
624
625 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
626
627 if (ifp->if_proto_hash != NULL)
91447636 628 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6d2010ae
A
629
630 while (proto != NULL && proto->protocol_family != protocol_family)
91447636 631 proto = SLIST_NEXT(proto, next_hash);
6d2010ae
A
632
633 if (proto != NULL)
634 if_proto_ref(proto);
635
636 return (proto);
1c79356b
A
637}
638
91447636
A
639static void
640if_proto_ref(struct if_proto *proto)
1c79356b 641{
6d2010ae 642 atomic_add_32(&proto->refcount, 1);
1c79356b
A
643}
644
6d2010ae
A
645extern void if_rtproto_del(struct ifnet *ifp, int protocol);
646
91447636
A
647static void
648if_proto_free(struct if_proto *proto)
0b4e3aa0 649{
6d2010ae
A
650 u_int32_t oldval;
651 struct ifnet *ifp = proto->ifp;
652 u_int32_t proto_family = proto->protocol_family;
653 struct kev_dl_proto_data ev_pr_data;
654
655 oldval = atomic_add_32_ov(&proto->refcount, -1);
656 if (oldval > 1)
657 return;
658
659 /* No more reference on this, protocol must have been detached */
660 VERIFY(proto->detached);
661
662 if (proto->proto_kpi == kProtoKPI_v1) {
663 if (proto->kpi.v1.detached)
664 proto->kpi.v1.detached(ifp, proto->protocol_family);
665 }
666 if (proto->proto_kpi == kProtoKPI_v2) {
667 if (proto->kpi.v2.detached)
668 proto->kpi.v2.detached(ifp, proto->protocol_family);
91447636 669 }
6d2010ae
A
670
671 /*
672 * Cleanup routes that may still be in the routing table for that
673 * interface/protocol pair.
674 */
675 if_rtproto_del(ifp, proto_family);
676
677 /*
678 * The reserved field carries the number of protocol still attached
679 * (subject to change)
680 */
681 ifnet_lock_shared(ifp);
682 ev_pr_data.proto_family = proto_family;
683 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
684 ifnet_lock_done(ifp);
685
686 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
687 (struct net_event_data *)&ev_pr_data,
688 sizeof(struct kev_dl_proto_data));
689
690 zfree(dlif_proto_zone, proto);
0b4e3aa0
A
691}
692
91447636 693__private_extern__ void
6d2010ae 694ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
1c79356b 695{
6d2010ae
A
696 unsigned int type = 0;
697 int ass = 1;
698
699 switch (what) {
700 case IFNET_LCK_ASSERT_EXCLUSIVE:
701 type = LCK_RW_ASSERT_EXCLUSIVE;
702 break;
703
704 case IFNET_LCK_ASSERT_SHARED:
705 type = LCK_RW_ASSERT_SHARED;
706 break;
707
708 case IFNET_LCK_ASSERT_OWNED:
709 type = LCK_RW_ASSERT_HELD;
710 break;
711
712 case IFNET_LCK_ASSERT_NOTOWNED:
713 /* nothing to do here for RW lock; bypass assert */
714 ass = 0;
715 break;
716
717 default:
718 panic("bad ifnet assert type: %d", what);
719 /* NOTREACHED */
720 }
721 if (ass)
722 lck_rw_assert(&ifp->if_lock, type);
1c79356b
A
723}
724
91447636 725__private_extern__ void
6d2010ae 726ifnet_lock_shared(struct ifnet *ifp)
1c79356b 727{
6d2010ae 728 lck_rw_lock_shared(&ifp->if_lock);
1c79356b
A
729}
730
91447636 731__private_extern__ void
6d2010ae 732ifnet_lock_exclusive(struct ifnet *ifp)
0b4e3aa0 733{
6d2010ae 734 lck_rw_lock_exclusive(&ifp->if_lock);
0b4e3aa0
A
735}
736
91447636 737__private_extern__ void
6d2010ae 738ifnet_lock_done(struct ifnet *ifp)
1c79356b 739{
6d2010ae 740 lck_rw_done(&ifp->if_lock);
1c79356b
A
741}
742
91447636 743__private_extern__ void
2d21ac55 744ifnet_head_lock_shared(void)
1c79356b 745{
6d2010ae 746 lck_rw_lock_shared(&ifnet_head_lock);
1c79356b
A
747}
748
91447636 749__private_extern__ void
2d21ac55 750ifnet_head_lock_exclusive(void)
91447636 751{
6d2010ae 752 lck_rw_lock_exclusive(&ifnet_head_lock);
91447636 753}
1c79356b 754
91447636 755__private_extern__ void
2d21ac55 756ifnet_head_done(void)
1c79356b 757{
6d2010ae 758 lck_rw_done(&ifnet_head_lock);
91447636 759}
1c79356b 760
6d2010ae
A
761/*
762 * Caller must already be holding ifnet lock.
763 */
764static int
765dlil_ifp_proto_count(struct ifnet * ifp)
91447636 766{
6d2010ae
A
767 int i, count = 0;
768
769 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
770
771 if (ifp->if_proto_hash == NULL)
772 goto done;
773
774 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
775 struct if_proto *proto;
776 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
777 count++;
91447636
A
778 }
779 }
6d2010ae
A
780done:
781 return (count);
91447636 782}
1c79356b 783
91447636 784__private_extern__ void
6d2010ae
A
785dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
786 u_int32_t event_code, struct net_event_data *event_data,
787 u_int32_t event_data_len)
91447636 788{
6d2010ae
A
789 struct net_event_data ev_data;
790 struct kev_msg ev_msg;
791
792 bzero(&ev_msg, sizeof (ev_msg));
793 bzero(&ev_data, sizeof (ev_data));
794 /*
2d21ac55 795 * a net event always starts with a net_event_data structure
91447636
A
796 * but the caller can generate a simple net event or
797 * provide a longer event structure to post
798 */
6d2010ae
A
799 ev_msg.vendor_code = KEV_VENDOR_APPLE;
800 ev_msg.kev_class = KEV_NETWORK_CLASS;
801 ev_msg.kev_subclass = event_subclass;
802 ev_msg.event_code = event_code;
803
804 if (event_data == NULL) {
91447636
A
805 event_data = &ev_data;
806 event_data_len = sizeof(struct net_event_data);
807 }
6d2010ae 808
91447636
A
809 strncpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
810 event_data->if_family = ifp->if_family;
b0d623f7 811 event_data->if_unit = (u_int32_t) ifp->if_unit;
6d2010ae 812
91447636 813 ev_msg.dv[0].data_length = event_data_len;
6d2010ae 814 ev_msg.dv[0].data_ptr = event_data;
91447636 815 ev_msg.dv[1].data_length = 0;
6d2010ae 816
91447636 817 dlil_event_internal(ifp, &ev_msg);
1c79356b
A
818}
819
316670eb
A
820__private_extern__ int
821dlil_alloc_local_stats(struct ifnet *ifp)
822{
823 int ret = EINVAL;
824 void *buf, *base, **pbuf;
825
826 if (ifp == NULL)
827 goto end;
828
829 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
830 /* allocate tcpstat_local structure */
831 buf = zalloc(dlif_tcpstat_zone);
832 if (buf == NULL) {
833 ret = ENOMEM;
834 goto end;
835 }
836 bzero(buf, dlif_tcpstat_bufsize);
837
838 /* Get the 64-bit aligned base address for this object */
839 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
840 sizeof (u_int64_t));
841 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
842 ((intptr_t)buf + dlif_tcpstat_bufsize));
843
844 /*
845 * Wind back a pointer size from the aligned base and
846 * save the original address so we can free it later.
847 */
848 pbuf = (void **)((intptr_t)base - sizeof (void *));
849 *pbuf = buf;
850 ifp->if_tcp_stat = base;
851
852 /* allocate udpstat_local structure */
853 buf = zalloc(dlif_udpstat_zone);
854 if (buf == NULL) {
855 ret = ENOMEM;
856 goto end;
857 }
858 bzero(buf, dlif_udpstat_bufsize);
859
860 /* Get the 64-bit aligned base address for this object */
861 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
862 sizeof (u_int64_t));
863 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
864 ((intptr_t)buf + dlif_udpstat_bufsize));
865
866 /*
867 * Wind back a pointer size from the aligned base and
868 * save the original address so we can free it later.
869 */
870 pbuf = (void **)((intptr_t)base - sizeof (void *));
871 *pbuf = buf;
872 ifp->if_udp_stat = base;
873
874 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
875 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
876
877 ret = 0;
878 }
879
880end:
881 if (ret != 0) {
882 if (ifp->if_tcp_stat != NULL) {
883 pbuf = (void **)
884 ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
885 zfree(dlif_tcpstat_zone, *pbuf);
886 ifp->if_tcp_stat = NULL;
887 }
888 if (ifp->if_udp_stat != NULL) {
889 pbuf = (void **)
890 ((intptr_t)ifp->if_udp_stat - sizeof (void *));
891 zfree(dlif_udpstat_zone, *pbuf);
892 ifp->if_udp_stat = NULL;
893 }
894 }
895
896 return (ret);
897}
898
6d2010ae 899static int
316670eb 900dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
2d21ac55 901{
316670eb
A
902 thread_continue_t func;
903 u_int32_t limit;
2d21ac55
A
904 int error;
905
316670eb
A
906 /* NULL ifp indicates the main input thread, called at dlil_init time */
907 if (ifp == NULL) {
908 func = dlil_main_input_thread_func;
909 VERIFY(inp == dlil_main_input_thread);
910 (void) strlcat(inp->input_name,
911 "main_input", DLIL_THREADNAME_LEN);
912 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
913 func = dlil_rxpoll_input_thread_func;
914 VERIFY(inp != dlil_main_input_thread);
915 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
916 "%s%d_input_poll", ifp->if_name, ifp->if_unit);
6d2010ae 917 } else {
316670eb
A
918 func = dlil_input_thread_func;
919 VERIFY(inp != dlil_main_input_thread);
920 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
921 "%s%d_input", ifp->if_name, ifp->if_unit);
6d2010ae 922 }
316670eb 923 VERIFY(inp->input_thr == THREAD_NULL);
2d21ac55 924
316670eb
A
925 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
926 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
927
928 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
929 inp->ifp = ifp; /* NULL for main input thread */
930
931 net_timerclear(&inp->mode_holdtime);
932 net_timerclear(&inp->mode_lasttime);
933 net_timerclear(&inp->sample_holdtime);
934 net_timerclear(&inp->sample_lasttime);
935 net_timerclear(&inp->dbg_lasttime);
936
937 /*
938 * For interfaces that support opportunistic polling, set the
939 * low and high watermarks for outstanding inbound packets/bytes.
940 * Also define freeze times for transitioning between modes
941 * and updating the average.
942 */
943 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
944 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
945 dlil_rxpoll_calc_limits(inp);
946 } else {
947 limit = (u_int32_t)-1;
948 }
949
950 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit);
951 if (inp == dlil_main_input_thread) {
952 struct dlil_main_threading_info *inpm =
953 (struct dlil_main_threading_info *)inp;
954 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit);
955 }
2d21ac55 956
316670eb
A
957 error = kernel_thread_start(func, inp, &inp->input_thr);
958 if (error == KERN_SUCCESS) {
959 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
6d2010ae 960 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
2d21ac55 961 /*
316670eb
A
962 * We create an affinity set so that the matching workloop
963 * thread or the starter thread (for loopback) can be
964 * scheduled on the same processor set as the input thread.
2d21ac55 965 */
316670eb
A
966 if (net_affinity) {
967 struct thread *tp = inp->input_thr;
2d21ac55
A
968 u_int32_t tag;
969 /*
970 * Randomize to reduce the probability
971 * of affinity tag namespace collision.
972 */
973 read_random(&tag, sizeof (tag));
974 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
975 thread_reference(tp);
316670eb
A
976 inp->tag = tag;
977 inp->net_affinity = TRUE;
2d21ac55
A
978 }
979 }
316670eb
A
980 } else if (inp == dlil_main_input_thread) {
981 panic_plain("%s: couldn't create main input thread", __func__);
982 /* NOTREACHED */
2d21ac55 983 } else {
316670eb
A
984 panic_plain("%s: couldn't create %s%d input thread", __func__,
985 ifp->if_name, ifp->if_unit);
6d2010ae 986 /* NOTREACHED */
2d21ac55 987 }
b0d623f7 988 OSAddAtomic(1, &cur_dlil_input_threads);
316670eb 989
6d2010ae 990 return (error);
2d21ac55
A
991}
992
316670eb
A
993static void
994dlil_terminate_input_thread(struct dlil_threading_info *inp)
995{
996 struct ifnet *ifp;
997
998 VERIFY(current_thread() == inp->input_thr);
999 VERIFY(inp != dlil_main_input_thread);
1000
1001 OSAddAtomic(-1, &cur_dlil_input_threads);
1002
1003 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1004 lck_grp_free(inp->lck_grp);
1005
1006 inp->input_waiting = 0;
1007 inp->wtot = 0;
1008 bzero(inp->input_name, sizeof (inp->input_name));
1009 ifp = inp->ifp;
1010 inp->ifp = NULL;
1011 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1012 qlimit(&inp->rcvq_pkts) = 0;
1013 bzero(&inp->stats, sizeof (inp->stats));
1014
1015 VERIFY(!inp->net_affinity);
1016 inp->input_thr = THREAD_NULL;
1017 VERIFY(inp->wloop_thr == THREAD_NULL);
1018 VERIFY(inp->poll_thr == THREAD_NULL);
1019 VERIFY(inp->tag == 0);
1020
1021 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1022 bzero(&inp->tstats, sizeof (inp->tstats));
1023 bzero(&inp->pstats, sizeof (inp->pstats));
1024 bzero(&inp->sstats, sizeof (inp->sstats));
1025
1026 net_timerclear(&inp->mode_holdtime);
1027 net_timerclear(&inp->mode_lasttime);
1028 net_timerclear(&inp->sample_holdtime);
1029 net_timerclear(&inp->sample_lasttime);
1030 net_timerclear(&inp->dbg_lasttime);
1031
1032#if IFNET_INPUT_SANITY_CHK
1033 inp->input_mbuf_cnt = 0;
1034#endif /* IFNET_INPUT_SANITY_CHK */
1035
1036 if (dlil_verbose) {
1037 printf("%s%d: input thread terminated\n",
1038 ifp->if_name, ifp->if_unit);
1039 }
1040
1041 /* for the extra refcnt from kernel_thread_start() */
1042 thread_deallocate(current_thread());
1043
1044 /* this is the end */
1045 thread_terminate(current_thread());
1046 /* NOTREACHED */
1047}
1048
2d21ac55
A
1049static kern_return_t
1050dlil_affinity_set(struct thread *tp, u_int32_t tag)
1051{
1052 thread_affinity_policy_data_t policy;
1053
1054 bzero(&policy, sizeof (policy));
1055 policy.affinity_tag = tag;
1056 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1057 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
1058}
1059
91447636
A
1060void
1061dlil_init(void)
1062{
6d2010ae
A
1063 thread_t thread = THREAD_NULL;
1064
1065 /*
1066 * The following fields must be 64-bit aligned for atomic operations.
1067 */
1068 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1069 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
1070 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1071 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1072 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1073 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1074 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1075 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1076 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1077 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1078 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1079 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
6d2010ae
A
1080
1081 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1082 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
1083 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1084 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1085 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1086 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1087 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1088 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1089 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1090 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1091 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
316670eb 1092 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
6d2010ae
A
1093
1094 /*
1095 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1096 */
1097 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1098 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1099 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1100 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1101 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1102 _CASSERT(IF_HWASSIST_CSUM_TCP_SUM16 == IFNET_CSUM_SUM16);
1103 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1104 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1105 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1106 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1107
1108 /*
1109 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1110 */
1111 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
316670eb 1112 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
6d2010ae
A
1113
1114 PE_parse_boot_argn("net_affinity", &net_affinity,
1115 sizeof (net_affinity));
b0d623f7 1116
316670eb
A
1117 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
1118
d1ecb069 1119 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
6d2010ae
A
1120
1121 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
1122
1123 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
1124 sizeof (struct dlil_ifnet_dbg);
1125 /* Enforce 64-bit alignment for dlil_ifnet structure */
1126 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
1127 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
1128 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1129 0, DLIF_ZONE_NAME);
1130 if (dlif_zone == NULL) {
316670eb
A
1131 panic_plain("%s: failed allocating %s", __func__,
1132 DLIF_ZONE_NAME);
6d2010ae
A
1133 /* NOTREACHED */
1134 }
1135 zone_change(dlif_zone, Z_EXPAND, TRUE);
1136 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1137
1138 dlif_filt_size = sizeof (struct ifnet_filter);
1139 dlif_filt_zone = zinit(dlif_filt_size,
1140 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1141 if (dlif_filt_zone == NULL) {
316670eb 1142 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1143 DLIF_FILT_ZONE_NAME);
1144 /* NOTREACHED */
1145 }
1146 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1147 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1148
6d2010ae
A
1149 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
1150 dlif_phash_zone = zinit(dlif_phash_size,
1151 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1152 if (dlif_phash_zone == NULL) {
316670eb 1153 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1154 DLIF_PHASH_ZONE_NAME);
1155 /* NOTREACHED */
1156 }
1157 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1158 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1159
1160 dlif_proto_size = sizeof (struct if_proto);
1161 dlif_proto_zone = zinit(dlif_proto_size,
1162 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1163 if (dlif_proto_zone == NULL) {
316670eb 1164 panic_plain("%s: failed allocating %s", __func__,
6d2010ae
A
1165 DLIF_PROTO_ZONE_NAME);
1166 /* NOTREACHED */
1167 }
1168 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1169 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1170
316670eb
A
1171 dlif_tcpstat_size = sizeof (struct tcpstat_local);
1172 /* Enforce 64-bit alignment for tcpstat_local structure */
1173 dlif_tcpstat_bufsize =
1174 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
1175 dlif_tcpstat_bufsize =
1176 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
1177 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1178 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1179 DLIF_TCPSTAT_ZONE_NAME);
1180 if (dlif_tcpstat_zone == NULL) {
1181 panic_plain("%s: failed allocating %s", __func__,
1182 DLIF_TCPSTAT_ZONE_NAME);
1183 /* NOTREACHED */
1184 }
1185 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1186 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1187
1188 dlif_udpstat_size = sizeof (struct udpstat_local);
1189 /* Enforce 64-bit alignment for udpstat_local structure */
1190 dlif_udpstat_bufsize =
1191 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
1192 dlif_udpstat_bufsize =
1193 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
1194 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1195 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1196 DLIF_UDPSTAT_ZONE_NAME);
1197 if (dlif_udpstat_zone == NULL) {
1198 panic_plain("%s: failed allocating %s", __func__,
1199 DLIF_UDPSTAT_ZONE_NAME);
1200 /* NOTREACHED */
1201 }
1202 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1203 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1204
6d2010ae 1205 ifnet_llreach_init();
d1ecb069 1206
91447636 1207 TAILQ_INIT(&dlil_ifnet_head);
91447636 1208 TAILQ_INIT(&ifnet_head);
6d2010ae
A
1209 TAILQ_INIT(&ifnet_detaching_head);
1210
91447636 1211 /* Setup the lock groups we will use */
2d21ac55 1212 dlil_grp_attributes = lck_grp_attr_alloc_init();
91447636 1213
316670eb 1214 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
6d2010ae
A
1215 dlil_grp_attributes);
1216 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1217 dlil_grp_attributes);
1218 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1219 dlil_grp_attributes);
316670eb
A
1220 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1221 dlil_grp_attributes);
1222 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
6d2010ae
A
1223 dlil_grp_attributes);
1224
91447636 1225 /* Setup the lock attributes we will use */
2d21ac55 1226 dlil_lck_attributes = lck_attr_alloc_init();
6d2010ae 1227
91447636 1228 ifnet_lock_attr = lck_attr_alloc_init();
6d2010ae
A
1229
1230 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1231 dlil_lck_attributes);
1232 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1233
316670eb
A
1234 ifnet_fc_init();
1235
2d21ac55
A
1236 lck_attr_free(dlil_lck_attributes);
1237 dlil_lck_attributes = NULL;
6d2010ae
A
1238
1239 ifa_init();
91447636 1240 /*
316670eb
A
1241 * Create and start up the main DLIL input thread and the interface
1242 * detacher threads once everything is initialized.
91447636 1243 */
316670eb 1244 dlil_create_input_thread(NULL, dlil_main_input_thread);
2d21ac55 1245
316670eb
A
1246 if (kernel_thread_start(ifnet_detacher_thread_func,
1247 NULL, &thread) != KERN_SUCCESS) {
1248 panic_plain("%s: couldn't create detacher thread", __func__);
6d2010ae
A
1249 /* NOTREACHED */
1250 }
b0d623f7 1251 thread_deallocate(thread);
6d2010ae 1252
b0d623f7
A
1253#if PF
1254 /* Initialize the packet filter */
1255 pfinit();
1256#endif /* PF */
316670eb
A
1257
1258 /* Initialize queue algorithms */
1259 classq_init();
1260
1261 /* Initialize packet schedulers */
1262 pktsched_init();
91447636 1263}
1c79356b 1264
6d2010ae
A
1265static void
1266if_flt_monitor_busy(struct ifnet *ifp)
1267{
1268 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1269
1270 ++ifp->if_flt_busy;
1271 VERIFY(ifp->if_flt_busy != 0);
1272}
1273
1274static void
1275if_flt_monitor_unbusy(struct ifnet *ifp)
1276{
1277 if_flt_monitor_leave(ifp);
1278}
1279
1280static void
1281if_flt_monitor_enter(struct ifnet *ifp)
1282{
1283 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1284
1285 while (ifp->if_flt_busy) {
1286 ++ifp->if_flt_waiters;
1287 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1288 (PZERO - 1), "if_flt_monitor", NULL);
1289 }
1290 if_flt_monitor_busy(ifp);
1291}
1292
1293static void
1294if_flt_monitor_leave(struct ifnet *ifp)
1295{
1296 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1297
1298 VERIFY(ifp->if_flt_busy != 0);
1299 --ifp->if_flt_busy;
1300
1301 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1302 ifp->if_flt_waiters = 0;
1303 wakeup(&ifp->if_flt_head);
1304 }
1305}
1306
2d21ac55 1307__private_extern__ int
6d2010ae
A
1308dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1309 interface_filter_t *filter_ref)
1310{
1311 int retval = 0;
1312 struct ifnet_filter *filter = NULL;
9bccf70c 1313
6d2010ae
A
1314 ifnet_head_lock_shared();
1315 /* Check that the interface is in the global list */
1316 if (!ifnet_lookup(ifp)) {
1317 retval = ENXIO;
1318 goto done;
1319 }
1320
1321 filter = zalloc(dlif_filt_zone);
1322 if (filter == NULL) {
1323 retval = ENOMEM;
1324 goto done;
1325 }
1326 bzero(filter, dlif_filt_size);
1327
1328 /* refcnt held above during lookup */
91447636
A
1329 filter->filt_ifp = ifp;
1330 filter->filt_cookie = if_filter->iff_cookie;
1331 filter->filt_name = if_filter->iff_name;
1332 filter->filt_protocol = if_filter->iff_protocol;
1333 filter->filt_input = if_filter->iff_input;
1334 filter->filt_output = if_filter->iff_output;
1335 filter->filt_event = if_filter->iff_event;
1336 filter->filt_ioctl = if_filter->iff_ioctl;
1337 filter->filt_detached = if_filter->iff_detached;
6d2010ae
A
1338
1339 lck_mtx_lock(&ifp->if_flt_lock);
1340 if_flt_monitor_enter(ifp);
1341
1342 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636 1343 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
6d2010ae
A
1344
1345 if_flt_monitor_leave(ifp);
1346 lck_mtx_unlock(&ifp->if_flt_lock);
1347
91447636 1348 *filter_ref = filter;
b0d623f7
A
1349
1350 /*
1351 * Bump filter count and route_generation ID to let TCP
1352 * know it shouldn't do TSO on this connection
1353 */
1354 OSAddAtomic(1, &dlil_filter_count);
1355 if (use_routegenid)
1356 routegenid_update();
1357
6d2010ae
A
1358 if (dlil_verbose) {
1359 printf("%s%d: %s filter attached\n", ifp->if_name,
1360 ifp->if_unit, if_filter->iff_name);
1361 }
1362done:
1363 ifnet_head_done();
1364 if (retval != 0 && ifp != NULL) {
1365 DLIL_PRINTF("%s%d: failed to attach %s (err=%d)\n",
1366 ifp->if_name, ifp->if_unit, if_filter->iff_name, retval);
1367 }
1368 if (retval != 0 && filter != NULL)
1369 zfree(dlif_filt_zone, filter);
1370
1371 return (retval);
1c79356b
A
1372}
1373
91447636 1374static int
6d2010ae 1375dlil_detach_filter_internal(interface_filter_t filter, int detached)
1c79356b 1376{
91447636 1377 int retval = 0;
6d2010ae 1378
3a60a9f5 1379 if (detached == 0) {
6d2010ae
A
1380 ifnet_t ifp = NULL;
1381
3a60a9f5
A
1382 ifnet_head_lock_shared();
1383 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6d2010ae
A
1384 interface_filter_t entry = NULL;
1385
1386 lck_mtx_lock(&ifp->if_flt_lock);
3a60a9f5 1387 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
6d2010ae
A
1388 if (entry != filter || entry->filt_skip)
1389 continue;
1390 /*
1391 * We've found a match; since it's possible
1392 * that the thread gets blocked in the monitor,
1393 * we do the lock dance. Interface should
1394 * not be detached since we still have a use
1395 * count held during filter attach.
1396 */
1397 entry->filt_skip = 1; /* skip input/output */
1398 lck_mtx_unlock(&ifp->if_flt_lock);
1399 ifnet_head_done();
1400
1401 lck_mtx_lock(&ifp->if_flt_lock);
1402 if_flt_monitor_enter(ifp);
1403 lck_mtx_assert(&ifp->if_flt_lock,
1404 LCK_MTX_ASSERT_OWNED);
1405
1406 /* Remove the filter from the list */
1407 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1408 filt_next);
1409
1410 if_flt_monitor_leave(ifp);
1411 lck_mtx_unlock(&ifp->if_flt_lock);
1412 if (dlil_verbose) {
1413 printf("%s%d: %s filter detached\n",
1414 ifp->if_name, ifp->if_unit,
1415 filter->filt_name);
1416 }
1417 goto destroy;
3a60a9f5 1418 }
6d2010ae 1419 lck_mtx_unlock(&ifp->if_flt_lock);
3a60a9f5
A
1420 }
1421 ifnet_head_done();
6d2010ae
A
1422
1423 /* filter parameter is not a valid filter ref */
1424 retval = EINVAL;
1425 goto done;
3a60a9f5 1426 }
6d2010ae
A
1427
1428 if (dlil_verbose)
1429 printf("%s filter detached\n", filter->filt_name);
1430
1431destroy:
1432
1433 /* Call the detached function if there is one */
91447636
A
1434 if (filter->filt_detached)
1435 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
9bccf70c 1436
3a60a9f5 1437 /* Free the filter */
6d2010ae
A
1438 zfree(dlif_filt_zone, filter);
1439
b0d623f7
A
1440 /*
1441 * Decrease filter count and route_generation ID to let TCP
1442 * know it should reevalute doing TSO or not
1443 */
1444 OSAddAtomic(-1, &dlil_filter_count);
1445 if (use_routegenid)
1446 routegenid_update();
1447
6d2010ae
A
1448done:
1449 if (retval != 0) {
1450 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1451 filter->filt_name, retval);
1452 }
1453 return (retval);
1c79356b
A
1454}
1455
2d21ac55 1456__private_extern__ void
91447636
A
1457dlil_detach_filter(interface_filter_t filter)
1458{
3a60a9f5
A
1459 if (filter == NULL)
1460 return;
91447636
A
1461 dlil_detach_filter_internal(filter, 0);
1462}
1c79356b 1463
316670eb
A
1464/*
1465 * Main input thread:
1466 *
1467 * a) handles all inbound packets for lo0
1468 * b) handles all inbound packets for interfaces with no dedicated
1469 * input thread (e.g. anything but Ethernet/PDP or those that support
1470 * opportunistic polling.)
1471 * c) protocol registrations
1472 * d) packet injections
1473 */
91447636 1474static void
316670eb 1475dlil_main_input_thread_func(void *v, wait_result_t w)
91447636 1476{
316670eb
A
1477#pragma unused(w)
1478 struct dlil_main_threading_info *inpm = v;
1479 struct dlil_threading_info *inp = v;
1480
1481 VERIFY(inp == dlil_main_input_thread);
1482 VERIFY(inp->ifp == NULL);
1483 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1484
91447636 1485 while (1) {
2d21ac55 1486 struct mbuf *m = NULL, *m_loop = NULL;
316670eb
A
1487 u_int32_t m_cnt, m_cnt_loop;
1488 boolean_t proto_req;
6d2010ae 1489
316670eb 1490 lck_mtx_lock_spin(&inp->input_lck);
6d2010ae 1491
2d21ac55 1492 /* Wait until there is work to be done */
316670eb
A
1493 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1494 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1495 (void) msleep(&inp->input_waiting, &inp->input_lck,
1496 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2d21ac55
A
1497 }
1498
316670eb
A
1499 inp->input_waiting |= DLIL_INPUT_RUNNING;
1500 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2d21ac55 1501
316670eb
A
1502 /* Main input thread cannot be terminated */
1503 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
2d21ac55 1504
316670eb
A
1505 proto_req = (inp->input_waiting &
1506 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
6d2010ae 1507
316670eb
A
1508 /* Packets for non-dedicated interfaces other than lo0 */
1509 m_cnt = qlen(&inp->rcvq_pkts);
1510 m = _getq_all(&inp->rcvq_pkts);
6d2010ae 1511
316670eb
A
1512 /* Packets exclusive for lo0 */
1513 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
1514 m_loop = _getq_all(&inpm->lo_rcvq_pkts);
6d2010ae 1515
316670eb 1516 inp->wtot = 0;
6d2010ae 1517
316670eb 1518 lck_mtx_unlock(&inp->input_lck);
6d2010ae 1519
316670eb
A
1520 /*
1521 * NOTE warning %%% attention !!!!
1522 * We should think about putting some thread starvation
1523 * safeguards if we deal with long chains of packets.
1524 */
1525 if (m_loop != NULL)
1526 dlil_input_packet_list_extended(lo_ifp, m_loop,
1527 m_cnt_loop, inp->mode);
6d2010ae 1528
316670eb
A
1529 if (m != NULL)
1530 dlil_input_packet_list_extended(NULL, m,
1531 m_cnt, inp->mode);
1532
1533 if (proto_req)
1534 proto_input_run();
1535 }
1536
1537 /* NOTREACHED */
1538 VERIFY(0); /* we should never get here */
1539}
1540
1541/*
1542 * Input thread for interfaces with legacy input model.
1543 */
1544static void
1545dlil_input_thread_func(void *v, wait_result_t w)
1546{
1547#pragma unused(w)
1548 struct dlil_threading_info *inp = v;
1549 struct ifnet *ifp = inp->ifp;
1550
1551 VERIFY(inp != dlil_main_input_thread);
1552 VERIFY(ifp != NULL);
1553 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
1554 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2d21ac55 1555
316670eb
A
1556 while (1) {
1557 struct mbuf *m = NULL;
1558 u_int32_t m_cnt;
1559
1560 lck_mtx_lock_spin(&inp->input_lck);
2d21ac55 1561
316670eb
A
1562 /* Wait until there is work to be done */
1563 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1564 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1565 (void) msleep(&inp->input_waiting, &inp->input_lck,
1566 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2d21ac55
A
1567 }
1568
316670eb
A
1569 inp->input_waiting |= DLIL_INPUT_RUNNING;
1570 inp->input_waiting &= ~DLIL_INPUT_WAITING;
6d2010ae 1571
316670eb
A
1572 /*
1573 * Protocol registration and injection must always use
1574 * the main input thread; in theory the latter can utilize
1575 * the corresponding input thread where the packet arrived
1576 * on, but that requires our knowing the interface in advance
1577 * (and the benefits might not worth the trouble.)
1578 */
1579 VERIFY(!(inp->input_waiting &
1580 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
6d2010ae 1581
316670eb
A
1582 /* Packets for this interface */
1583 m_cnt = qlen(&inp->rcvq_pkts);
1584 m = _getq_all(&inp->rcvq_pkts);
6d2010ae 1585
316670eb
A
1586 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
1587 lck_mtx_unlock(&inp->input_lck);
1588
1589 /* Free up pending packets */
1590 if (m != NULL)
1591 mbuf_freem_list(m);
1592
1593 dlil_terminate_input_thread(inp);
1594 /* NOTREACHED */
1595 return;
2d21ac55
A
1596 }
1597
316670eb
A
1598 inp->wtot = 0;
1599
1600 dlil_input_stats_sync(ifp, inp);
1601
1602 lck_mtx_unlock(&inp->input_lck);
2d21ac55 1603
91447636
A
1604 /*
1605 * NOTE warning %%% attention !!!!
6d2010ae
A
1606 * We should think about putting some thread starvation
1607 * safeguards if we deal with long chains of packets.
91447636 1608 */
6d2010ae 1609 if (m != NULL)
316670eb
A
1610 dlil_input_packet_list_extended(NULL, m,
1611 m_cnt, inp->mode);
2d21ac55 1612 }
316670eb
A
1613
1614 /* NOTREACHED */
1615 VERIFY(0); /* we should never get here */
2d21ac55
A
1616}
1617
316670eb
A
1618/*
1619 * Input thread for interfaces with opportunistic polling input model.
1620 */
1621static void
1622dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2d21ac55 1623{
316670eb
A
1624#pragma unused(w)
1625 struct dlil_threading_info *inp = v;
1626 struct ifnet *ifp = inp->ifp;
1627 struct timespec ts;
2d21ac55 1628
316670eb
A
1629 VERIFY(inp != dlil_main_input_thread);
1630 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
2d21ac55 1631
2d21ac55 1632 while (1) {
316670eb
A
1633 struct mbuf *m = NULL;
1634 u_int32_t m_cnt, m_size, poll_req = 0;
1635 ifnet_model_t mode;
1636 struct timespec now, delta;
6d2010ae 1637
316670eb 1638 lck_mtx_lock_spin(&inp->input_lck);
6d2010ae 1639
316670eb
A
1640 /* Link parameters changed? */
1641 if (ifp->if_poll_update != 0) {
1642 ifp->if_poll_update = 0;
1643 dlil_rxpoll_calc_limits(inp);
91447636 1644 }
1c79356b 1645
316670eb
A
1646 /* Current operating mode */
1647 mode = inp->mode;
1c79356b 1648
316670eb
A
1649 /* Wait until there is work to be done */
1650 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING) &&
1651 qempty(&inp->rcvq_pkts)) {
1652 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1653 (void) msleep(&inp->input_waiting, &inp->input_lck,
1654 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1655 }
2d21ac55 1656
316670eb
A
1657 inp->input_waiting |= DLIL_INPUT_RUNNING;
1658 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2d21ac55
A
1659
1660 /*
316670eb
A
1661 * Protocol registration and injection must always use
1662 * the main input thread; in theory the latter can utilize
1663 * the corresponding input thread where the packet arrived
1664 * on, but that requires our knowing the interface in advance
1665 * (and the benefits might not worth the trouble.)
2d21ac55 1666 */
316670eb
A
1667 VERIFY(!(inp->input_waiting &
1668 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2d21ac55 1669
316670eb
A
1670 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
1671 /* Free up pending packets */
1672 _flushq(&inp->rcvq_pkts);
1673 lck_mtx_unlock(&inp->input_lck);
2d21ac55 1674
316670eb
A
1675 dlil_terminate_input_thread(inp);
1676 /* NOTREACHED */
1677 return;
2d21ac55 1678 }
2d21ac55 1679
316670eb
A
1680 /* Total count of all packets */
1681 m_cnt = qlen(&inp->rcvq_pkts);
1682
1683 /* Total bytes of all packets */
1684 m_size = qsize(&inp->rcvq_pkts);
1685
1686 /* Packets for this interface */
1687 m = _getq_all(&inp->rcvq_pkts);
1688 VERIFY(m != NULL || m_cnt == 0);
1689
1690 nanouptime(&now);
1691 if (!net_timerisset(&inp->sample_lasttime))
1692 *(&inp->sample_lasttime) = *(&now);
1693
1694 net_timersub(&now, &inp->sample_lasttime, &delta);
1695 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
1696 u_int32_t ptot, btot;
1697
1698 /* Accumulate statistics for current sampling */
1699 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
1700
1701 if (net_timercmp(&delta, &inp->sample_holdtime, <))
1702 goto skip;
1703
1704 *(&inp->sample_lasttime) = *(&now);
1705
1706 /* Calculate min/max of inbound bytes */
1707 btot = (u_int32_t)inp->sstats.bytes;
1708 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
1709 inp->rxpoll_bmin = btot;
1710 if (btot > inp->rxpoll_bmax)
1711 inp->rxpoll_bmax = btot;
1712
1713 /* Calculate EWMA of inbound bytes */
1714 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
1715
1716 /* Calculate min/max of inbound packets */
1717 ptot = (u_int32_t)inp->sstats.packets;
1718 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
1719 inp->rxpoll_pmin = ptot;
1720 if (ptot > inp->rxpoll_pmax)
1721 inp->rxpoll_pmax = ptot;
1722
1723 /* Calculate EWMA of inbound packets */
1724 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
1725
1726 /* Reset sampling statistics */
1727 PKTCNTR_CLEAR(&inp->sstats);
1728
1729 /* Calculate EWMA of wakeup requests */
1730 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
1731 inp->wtot = 0;
1732
1733 if (dlil_verbose) {
1734 if (!net_timerisset(&inp->dbg_lasttime))
1735 *(&inp->dbg_lasttime) = *(&now);
1736 net_timersub(&now, &inp->dbg_lasttime, &delta);
1737 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
1738 *(&inp->dbg_lasttime) = *(&now);
1739 printf("%s%d: [%s] pkts avg %d max %d "
1740 "limits [%d/%d], wreq avg %d "
1741 "limits [%d/%d], bytes avg %d "
1742 "limits [%d/%d]\n", ifp->if_name,
1743 ifp->if_unit, (inp->mode ==
1744 IFNET_MODEL_INPUT_POLL_ON) ?
1745 "ON" : "OFF", inp->rxpoll_pavg,
1746 inp->rxpoll_pmax,
1747 inp->rxpoll_plowat,
1748 inp->rxpoll_phiwat,
1749 inp->rxpoll_wavg,
1750 inp->rxpoll_wlowat,
1751 inp->rxpoll_whiwat,
1752 inp->rxpoll_bavg,
1753 inp->rxpoll_blowat,
1754 inp->rxpoll_bhiwat);
1755 }
1756 }
2d21ac55 1757
316670eb
A
1758 /* Perform mode transition, if necessary */
1759 if (!net_timerisset(&inp->mode_lasttime))
1760 *(&inp->mode_lasttime) = *(&now);
1761
1762 net_timersub(&now, &inp->mode_lasttime, &delta);
1763 if (net_timercmp(&delta, &inp->mode_holdtime, <))
1764 goto skip;
1765
1766 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
1767 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
1768 inp->rxpoll_wavg <= inp->rxpoll_wlowat &&
1769 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
1770 mode = IFNET_MODEL_INPUT_POLL_OFF;
1771 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
1772 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
1773 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
1774 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
1775 mode = IFNET_MODEL_INPUT_POLL_ON;
1776 }
6d2010ae 1777
316670eb
A
1778 if (mode != inp->mode) {
1779 inp->mode = mode;
1780 *(&inp->mode_lasttime) = *(&now);
1781 poll_req++;
1782 }
1783 }
1784skip:
1785 dlil_input_stats_sync(ifp, inp);
6d2010ae 1786
316670eb 1787 lck_mtx_unlock(&inp->input_lck);
6d2010ae 1788
316670eb
A
1789 /*
1790 * If there's a mode change and interface is still attached,
1791 * perform a downcall to the driver for the new mode. Also
1792 * hold an IO refcnt on the interface to prevent it from
1793 * being detached (will be release below.)
1794 */
1795 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
1796 struct ifnet_model_params p = { mode, { 0 } };
1797 errno_t err;
1798
1799 if (dlil_verbose) {
1800 printf("%s%d: polling is now %s, "
1801 "pkts avg %d max %d limits [%d/%d], "
1802 "wreq avg %d limits [%d/%d], "
1803 "bytes avg %d limits [%d/%d]\n",
1804 ifp->if_name, ifp->if_unit,
1805 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
1806 "ON" : "OFF", inp->rxpoll_pavg,
1807 inp->rxpoll_pmax, inp->rxpoll_plowat,
1808 inp->rxpoll_phiwat, inp->rxpoll_wavg,
1809 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
1810 inp->rxpoll_bavg, inp->rxpoll_blowat,
1811 inp->rxpoll_bhiwat);
1812 }
2d21ac55 1813
316670eb
A
1814 if ((err = ((*ifp->if_input_ctl)(ifp,
1815 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
1816 printf("%s%d: error setting polling mode "
1817 "to %s (%d)\n", ifp->if_name, ifp->if_unit,
1818 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
1819 "ON" : "OFF", err);
1820 }
1c79356b 1821
316670eb
A
1822 switch (mode) {
1823 case IFNET_MODEL_INPUT_POLL_OFF:
1824 ifnet_set_poll_cycle(ifp, NULL);
1825 inp->rxpoll_offreq++;
1826 if (err != 0)
1827 inp->rxpoll_offerr++;
1828 break;
2d21ac55 1829
316670eb
A
1830 case IFNET_MODEL_INPUT_POLL_ON:
1831 net_nsectimer(&if_rxpoll_interval_time, &ts);
1832 ifnet_set_poll_cycle(ifp, &ts);
1833 ifnet_poll(ifp);
1834 inp->rxpoll_onreq++;
1835 if (err != 0)
1836 inp->rxpoll_onerr++;
1837 break;
1838
1839 default:
1840 VERIFY(0);
1841 /* NOTREACHED */
1842 }
1843
1844 /* Release the IO refcnt */
1845 ifnet_decr_iorefcnt(ifp);
1846 }
1847
1848 /*
1849 * NOTE warning %%% attention !!!!
1850 * We should think about putting some thread starvation
1851 * safeguards if we deal with long chains of packets.
1852 */
1853 if (m != NULL)
1854 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
1855 }
1856
1857 /* NOTREACHED */
1858 VERIFY(0); /* we should never get here */
1859}
1860
1861static void
1862dlil_rxpoll_calc_limits(struct dlil_threading_info *inp)
1863{
1864 struct ifnet *ifp = inp->ifp;
1865 u_int64_t sample_holdtime, inbw;
1866
1867 VERIFY(inp != dlil_main_input_thread);
1868 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
1869
1870 if ((inbw = ifnet_input_linkrate(ifp)) == 0) {
1871 sample_holdtime = 0; /* polling is disabled */
1872 inp->rxpoll_wlowat = inp->rxpoll_plowat =
1873 inp->rxpoll_blowat = 0;
1874 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
1875 inp->rxpoll_bhiwat = (u_int32_t)-1;
1876 } else {
1877 unsigned int n, i;
1878
1879 n = 0;
1880 for (i = 0; rxpoll_tbl[i].speed != 0; i++) {
1881 if (inbw < rxpoll_tbl[i].speed)
1882 break;
1883 n = i;
1884 }
1885 sample_holdtime = if_rxpoll_sample_holdtime;
1886 inp->rxpoll_wlowat = if_rxpoll_wlowat;
1887 inp->rxpoll_whiwat = if_rxpoll_whiwat;
1888 inp->rxpoll_plowat = rxpoll_tbl[n].plowat;
1889 inp->rxpoll_phiwat = rxpoll_tbl[n].phiwat;
1890 inp->rxpoll_blowat = rxpoll_tbl[n].blowat;
1891 inp->rxpoll_bhiwat = rxpoll_tbl[n].bhiwat;
1892 }
1893
1894 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
1895 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
1896
1897 if (dlil_verbose) {
1898 printf("%s%d: speed %llu bps, sample per %llu nsec, "
1899 "pkt limits [%d/%d], wreq limits [%d/%d], "
1900 "bytes limits [%d/%d]\n", ifp->if_name, ifp->if_unit,
1901 inbw, sample_holdtime, inp->rxpoll_plowat,
1902 inp->rxpoll_phiwat, inp->rxpoll_wlowat, inp->rxpoll_whiwat,
1903 inp->rxpoll_blowat, inp->rxpoll_bhiwat);
1904 }
1905}
1906
1907errno_t
1908ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
1909 const struct ifnet_stat_increment_param *s)
1910{
1911 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
1912}
1913
1914errno_t
1915ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
1916 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
1917{
1918 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
1919}
1920
1921static errno_t
1922ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
1923 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
1924{
1925 struct thread *tp = current_thread();
1926 struct mbuf *last;
1927 struct dlil_threading_info *inp;
1928 u_int32_t m_cnt = 0, m_size = 0;
1929
1930 /*
1931 * Drop the packet(s) if the parameters are invalid, or if the
1932 * interface is no longer attached; else hold an IO refcnt to
1933 * prevent it from being detached (will be released below.)
1934 */
1935 if (ifp == NULL || m_head == NULL || (s == NULL && ext) ||
1936 (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
1937 if (m_head != NULL)
1938 mbuf_freem_list(m_head);
1939 return (EINVAL);
1940 }
1941
1942 VERIFY(m_tail == NULL || ext);
1943 VERIFY(s != NULL || !ext);
1944
1945 if (m_tail == NULL) {
1946 last = m_head;
1947 while (1) {
1948#if IFNET_INPUT_SANITY_CHK
1949 if (dlil_input_sanity_check != 0)
1950 DLIL_INPUT_CHECK(last, ifp);
1951#endif /* IFNET_INPUT_SANITY_CHK */
1952 m_cnt++;
1953 m_size += m_length(last);
1954 if (mbuf_nextpkt(last) == NULL)
1955 break;
1956 last = mbuf_nextpkt(last);
1957 }
1958 m_tail = last;
1959 } else {
1960#if IFNET_INPUT_SANITY_CHK
1961 if (dlil_input_sanity_check != 0) {
1962 last = m_head;
1963 while (1) {
1964 DLIL_INPUT_CHECK(last, ifp);
1965 m_cnt++;
1966 m_size += m_length(last);
1967 if (mbuf_nextpkt(last) == NULL)
1968 break;
1969 last = mbuf_nextpkt(last);
1970 }
1971 } else {
1972 m_cnt = s->packets_in;
1973 m_size = s->bytes_in;
1974 last = m_tail;
1975 }
1976#else
1977 m_cnt = s->packets_in;
1978 m_size = s->bytes_in;
1979 last = m_tail;
1980#endif /* IFNET_INPUT_SANITY_CHK */
1981 }
1982
1983 if (last != m_tail) {
1984 panic_plain("%s: invalid input packet chain for %s%d, "
1985 "tail mbuf %p instead of %p\n", __func__, ifp->if_name,
1986 ifp->if_unit, m_tail, last);
1987 }
1988
1989 /*
1990 * Assert packet count only for the extended variant, for backwards
1991 * compatibility, since this came directly from the device driver.
1992 * Relax this assertion for input bytes, as the driver may have
1993 * included the link-layer headers in the computation; hence
1994 * m_size is just an approximation.
1995 */
1996 if (ext && s->packets_in != m_cnt) {
1997 panic_plain("%s: input packet count mismatch for %s%d, "
1998 "%d instead of %d\n", __func__, ifp->if_name,
1999 ifp->if_unit, s->packets_in, m_cnt);
2000 }
2001
2002 if ((inp = ifp->if_inp) == NULL)
2003 inp = dlil_main_input_thread;
2004
2005 /*
2006 * If there is a matching DLIL input thread associated with an
2007 * affinity set, associate this thread with the same set. We
2008 * will only do this once.
2009 */
2010 lck_mtx_lock_spin(&inp->input_lck);
2011 if (inp != dlil_main_input_thread && inp->net_affinity &&
2012 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2013 (poll && inp->poll_thr == THREAD_NULL))) {
2014 u_int32_t tag = inp->tag;
2015
2016 if (poll) {
2017 VERIFY(inp->poll_thr == THREAD_NULL);
2018 inp->poll_thr = tp;
2019 } else {
2020 VERIFY(inp->wloop_thr == THREAD_NULL);
2021 inp->wloop_thr = tp;
2022 }
2023 lck_mtx_unlock(&inp->input_lck);
2024
2025 /* Associate the current thread with the new affinity tag */
2026 (void) dlil_affinity_set(tp, tag);
2027
2028 /*
2029 * Take a reference on the current thread; during detach,
2030 * we will need to refer to it in order ot tear down its
2031 * affinity.
2032 */
2033 thread_reference(tp);
2034 lck_mtx_lock_spin(&inp->input_lck);
2035 }
2036
2037 /*
2038 * Because of loopbacked multicast we cannot stuff the ifp in
2039 * the rcvif of the packet header: loopback (lo0) packets use a
2040 * dedicated list so that we can later associate them with lo_ifp
2041 * on their way up the stack. Packets for other interfaces without
2042 * dedicated input threads go to the regular list.
2043 */
2044 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2045 struct dlil_main_threading_info *inpm =
2046 (struct dlil_main_threading_info *)inp;
2047 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail, m_cnt, m_size);
2048 } else {
2049 _addq_multi(&inp->rcvq_pkts, m_head, m_tail, m_cnt, m_size);
2050 }
2051
2052#if IFNET_INPUT_SANITY_CHK
2053 if (dlil_input_sanity_check != 0) {
2054 u_int32_t count;
2055 struct mbuf *m0;
2056
2057 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
2058 count++;
2059
2060 if (count != m_cnt) {
2061 panic_plain("%s%d: invalid packet count %d "
2062 "(expected %d)\n", ifp->if_name, ifp->if_unit,
2063 count, m_cnt);
2064 /* NOTREACHED */
2065 }
2066
2067 inp->input_mbuf_cnt += m_cnt;
2068 }
2069#endif /* IFNET_INPUT_SANITY_CHK */
2070
2071 if (s != NULL) {
2072 dlil_input_stats_add(s, inp, poll);
2073 /*
2074 * If we're using the main input thread, synchronize the
2075 * stats now since we have the interface context. All
2076 * other cases involving dedicated input threads will
2077 * have their stats synchronized there.
2078 */
2079 if (inp == dlil_main_input_thread)
2080 dlil_input_stats_sync(ifp, inp);
2081 }
2082
2083 inp->input_waiting |= DLIL_INPUT_WAITING;
2084 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2085 inp->wtot++;
2086 wakeup_one((caddr_t)&inp->input_waiting);
2087 }
2088 lck_mtx_unlock(&inp->input_lck);
2089
2090 if (ifp != lo_ifp) {
2091 /* Release the IO refcnt */
2092 ifnet_decr_iorefcnt(ifp);
2093 }
2094
2095 return (0);
2096}
2097
2098void
2099ifnet_start(struct ifnet *ifp)
2100{
2101 /*
2102 * If the starter thread is inactive, signal it to do work.
2103 */
2104 lck_mtx_lock_spin(&ifp->if_start_lock);
2105 ifp->if_start_req++;
2106 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL) {
2107 wakeup_one((caddr_t)&ifp->if_start_thread);
2108 }
2109 lck_mtx_unlock(&ifp->if_start_lock);
2110}
2111
2112static void
2113ifnet_start_thread_fn(void *v, wait_result_t w)
2114{
2115#pragma unused(w)
2116 struct ifnet *ifp = v;
2117 char ifname[IFNAMSIZ + 1];
2118 struct timespec *ts = NULL;
2119 struct ifclassq *ifq = &ifp->if_snd;
2120
2121 /*
2122 * Treat the dedicated starter thread for lo0 as equivalent to
2123 * the driver workloop thread; if net_affinity is enabled for
2124 * the main input thread, associate this starter thread to it
2125 * by binding them with the same affinity tag. This is done
2126 * only once (as we only have one lo_ifp which never goes away.)
2127 */
2128 if (ifp == lo_ifp) {
2129 struct dlil_threading_info *inp = dlil_main_input_thread;
2130 struct thread *tp = current_thread();
2131
2132 lck_mtx_lock(&inp->input_lck);
2133 if (inp->net_affinity) {
2134 u_int32_t tag = inp->tag;
2135
2136 VERIFY(inp->wloop_thr == THREAD_NULL);
2137 VERIFY(inp->poll_thr == THREAD_NULL);
2138 inp->wloop_thr = tp;
2139 lck_mtx_unlock(&inp->input_lck);
2140
2141 /* Associate this thread with the affinity tag */
2142 (void) dlil_affinity_set(tp, tag);
2143 } else {
2144 lck_mtx_unlock(&inp->input_lck);
2145 }
2146 }
2147
2148 snprintf(ifname, sizeof (ifname), "%s%d_starter",
2149 ifp->if_name, ifp->if_unit);
2150
2151 lck_mtx_lock_spin(&ifp->if_start_lock);
2152
2153 for (;;) {
2154 (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock,
2155 (PZERO - 1) | PSPIN, ifname, ts);
2156
2157 /* interface is detached? */
2158 if (ifp->if_start_thread == THREAD_NULL) {
2159 ifnet_set_start_cycle(ifp, NULL);
2160 lck_mtx_unlock(&ifp->if_start_lock);
2161 ifnet_purge(ifp);
2162
2163 if (dlil_verbose) {
2164 printf("%s%d: starter thread terminated\n",
2165 ifp->if_name, ifp->if_unit);
2166 }
2167
2168 /* for the extra refcnt from kernel_thread_start() */
2169 thread_deallocate(current_thread());
2170 /* this is the end */
2171 thread_terminate(current_thread());
2172 /* NOTREACHED */
2173 return;
2174 }
2175
2176 ifp->if_start_active = 1;
2177 for (;;) {
2178 u_int32_t req = ifp->if_start_req;
2179
2180 lck_mtx_unlock(&ifp->if_start_lock);
2181 /* invoke the driver's start routine */
2182 ((*ifp->if_start)(ifp));
2183 lck_mtx_lock_spin(&ifp->if_start_lock);
2184
2185 /* if there's no pending request, we're done */
2186 if (req == ifp->if_start_req)
2187 break;
2188 }
2189 ifp->if_start_req = 0;
2190 ifp->if_start_active = 0;
2191 /*
2192 * Wakeup N ns from now if rate-controlled by TBR, and if
2193 * there are still packets in the send queue which haven't
2194 * been dequeued so far; else sleep indefinitely (ts = NULL)
2195 * until ifnet_start() is called again.
2196 */
2197 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2198 &ifp->if_start_cycle : NULL);
2199
2200 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
2201 ts = NULL;
2202 }
2203
2204 /* NOTREACHED */
2205 lck_mtx_unlock(&ifp->if_start_lock);
2206 VERIFY(0); /* we should never get here */
2207}
2208
2209void
2210ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2211{
2212 if (ts == NULL)
2213 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
2214 else
2215 *(&ifp->if_start_cycle) = *ts;
2216
2217 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
2218 printf("%s%d: restart interval set to %lu nsec\n",
2219 ifp->if_name, ifp->if_unit, ts->tv_nsec);
2220}
2221
2222static void
2223ifnet_poll(struct ifnet *ifp)
2224{
2225 /*
2226 * If the poller thread is inactive, signal it to do work.
2227 */
2228 lck_mtx_lock_spin(&ifp->if_poll_lock);
2229 ifp->if_poll_req++;
2230 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
2231 wakeup_one((caddr_t)&ifp->if_poll_thread);
2232 }
2233 lck_mtx_unlock(&ifp->if_poll_lock);
2234}
2235
2236static void
2237ifnet_poll_thread_fn(void *v, wait_result_t w)
2238{
2239#pragma unused(w)
2240 struct dlil_threading_info *inp;
2241 struct ifnet *ifp = v;
2242 char ifname[IFNAMSIZ + 1];
2243 struct timespec *ts = NULL;
2244 struct ifnet_stat_increment_param s;
2245
2246 snprintf(ifname, sizeof (ifname), "%s%d_poller",
2247 ifp->if_name, ifp->if_unit);
2248 bzero(&s, sizeof (s));
2249
2250 lck_mtx_lock_spin(&ifp->if_poll_lock);
2251
2252 inp = ifp->if_inp;
2253 VERIFY(inp != NULL);
2254
2255 for (;;) {
2256 if (ifp->if_poll_thread != THREAD_NULL) {
2257 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
2258 (PZERO - 1) | PSPIN, ifname, ts);
2259 }
2260
2261 /* interface is detached (maybe while asleep)? */
2262 if (ifp->if_poll_thread == THREAD_NULL) {
2263 ifnet_set_poll_cycle(ifp, NULL);
2264 lck_mtx_unlock(&ifp->if_poll_lock);
2265
2266 if (dlil_verbose) {
2267 printf("%s%d: poller thread terminated\n",
2268 ifp->if_name, ifp->if_unit);
2269 }
2270
2271 /* for the extra refcnt from kernel_thread_start() */
2272 thread_deallocate(current_thread());
2273 /* this is the end */
2274 thread_terminate(current_thread());
2275 /* NOTREACHED */
2276 return;
2277 }
2278
2279 ifp->if_poll_active = 1;
2280 for (;;) {
2281 struct mbuf *m_head, *m_tail;
2282 u_int32_t m_lim, m_cnt, m_totlen;
2283 u_int16_t req = ifp->if_poll_req;
2284
2285 lck_mtx_unlock(&ifp->if_poll_lock);
2286
2287 /*
2288 * If no longer attached, there's nothing to do;
2289 * else hold an IO refcnt to prevent the interface
2290 * from being detached (will be released below.)
2291 */
2292 if (!ifnet_is_attached(ifp, 1))
2293 break;
2294
2295 m_lim = (if_rxpoll_max != 0) ? if_rxpoll_max :
2296 MAX((qlimit(&inp->rcvq_pkts)),
2297 (inp->rxpoll_phiwat << 2));
2298
2299 if (dlil_verbose > 1) {
2300 printf("%s%d: polling up to %d pkts, "
2301 "pkts avg %d max %d, wreq avg %d, "
2302 "bytes avg %d\n",
2303 ifp->if_name, ifp->if_unit, m_lim,
2304 inp->rxpoll_pavg, inp->rxpoll_pmax,
2305 inp->rxpoll_wavg, inp->rxpoll_bavg);
2306 }
2307
2308 /* invoke the driver's input poll routine */
2309 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
2310 &m_cnt, &m_totlen));
2311
2312 if (m_head != NULL) {
2313 VERIFY(m_tail != NULL && m_cnt > 0);
2314
2315 if (dlil_verbose > 1) {
2316 printf("%s%d: polled %d pkts, "
2317 "pkts avg %d max %d, wreq avg %d, "
2318 "bytes avg %d\n",
2319 ifp->if_name, ifp->if_unit, m_cnt,
2320 inp->rxpoll_pavg, inp->rxpoll_pmax,
2321 inp->rxpoll_wavg, inp->rxpoll_bavg);
2322 }
2323
2324 /* stats are required for extended variant */
2325 s.packets_in = m_cnt;
2326 s.bytes_in = m_totlen;
2327
2328 (void) ifnet_input_common(ifp, m_head, m_tail,
2329 &s, TRUE, TRUE);
2330 } else if (dlil_verbose > 1) {
2331 printf("%s%d: no packets, pkts avg %d max %d, "
2332 "wreq avg %d, bytes avg %d\n", ifp->if_name,
2333 ifp->if_unit, inp->rxpoll_pavg,
2334 inp->rxpoll_pmax, inp->rxpoll_wavg,
2335 inp->rxpoll_bavg);
2336 }
2337
2338 /* Release the io ref count */
2339 ifnet_decr_iorefcnt(ifp);
2340
2341 lck_mtx_lock_spin(&ifp->if_poll_lock);
2342
2343 /* if there's no pending request, we're done */
2344 if (req == ifp->if_poll_req)
2345 break;
2346 }
2347 ifp->if_poll_req = 0;
2348 ifp->if_poll_active = 0;
2349
2350 /*
2351 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2352 * until ifnet_poll() is called again.
2353 */
2354 ts = &ifp->if_poll_cycle;
2355 if (ts->tv_sec == 0 && ts->tv_nsec == 0)
2356 ts = NULL;
2357 }
2358
2359 /* NOTREACHED */
2360 lck_mtx_unlock(&ifp->if_poll_lock);
2361 VERIFY(0); /* we should never get here */
2362}
2363
2364void
2365ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
2366{
2367 if (ts == NULL)
2368 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
2369 else
2370 *(&ifp->if_poll_cycle) = *ts;
2371
2372 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
2373 printf("%s%d: poll interval set to %lu nsec\n",
2374 ifp->if_name, ifp->if_unit, ts->tv_nsec);
2375}
2376
2377void
2378ifnet_purge(struct ifnet *ifp)
2379{
2380 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
2381 if_qflush(ifp, 0);
2382}
2383
2384void
2385ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
2386{
2387 IFCQ_LOCK_ASSERT_HELD(ifq);
2388
2389 if (!(IFCQ_IS_READY(ifq)))
2390 return;
2391
2392 if (IFCQ_TBR_IS_ENABLED(ifq)) {
2393 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
2394 ifq->ifcq_tbr.tbr_percent, 0 };
2395 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
2396 }
2397
2398 ifclassq_update(ifq, ev);
2399}
2400
2401void
2402ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
2403{
2404 switch (ev) {
2405 case CLASSQ_EV_LINK_SPEED:
2406 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
2407 ifp->if_poll_update++;
2408 break;
2409
2410 default:
2411 break;
2412 }
2413}
2414
2415errno_t
2416ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
2417{
2418 struct ifclassq *ifq;
2419 u_int32_t omodel;
2420 errno_t err;
2421
2422 if (ifp == NULL || (model != IFNET_SCHED_MODEL_DRIVER_MANAGED &&
2423 model != IFNET_SCHED_MODEL_NORMAL))
2424 return (EINVAL);
2425 else if (!(ifp->if_eflags & IFEF_TXSTART))
2426 return (ENXIO);
2427
2428 ifq = &ifp->if_snd;
2429 IFCQ_LOCK(ifq);
2430 omodel = ifp->if_output_sched_model;
2431 ifp->if_output_sched_model = model;
2432 if ((err = ifclassq_pktsched_setup(ifq)) != 0)
2433 ifp->if_output_sched_model = omodel;
2434 IFCQ_UNLOCK(ifq);
2435
2436 return (err);
2437}
2438
2439errno_t
2440ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2441{
2442 if (ifp == NULL)
2443 return (EINVAL);
2444 else if (!(ifp->if_eflags & IFEF_TXSTART))
2445 return (ENXIO);
2446
2447 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
2448
2449 return (0);
2450}
2451
2452errno_t
2453ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2454{
2455 if (ifp == NULL || maxqlen == NULL)
2456 return (EINVAL);
2457 else if (!(ifp->if_eflags & IFEF_TXSTART))
2458 return (ENXIO);
2459
2460 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
2461
2462 return (0);
2463}
2464
2465errno_t
2466ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *qlen)
2467{
2468 if (ifp == NULL || qlen == NULL)
2469 return (EINVAL);
2470 else if (!(ifp->if_eflags & IFEF_TXSTART))
2471 return (ENXIO);
2472
2473 *qlen = ifclassq_get_len(&ifp->if_snd);
2474
2475 return (0);
2476}
2477
2478errno_t
2479ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2480{
2481 struct dlil_threading_info *inp;
2482
2483 if (ifp == NULL)
2484 return (EINVAL);
2485 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
2486 return (ENXIO);
2487
2488 if (maxqlen == 0)
2489 maxqlen = if_rcvq_maxlen;
2490 else if (maxqlen < IF_RCVQ_MINLEN)
2491 maxqlen = IF_RCVQ_MINLEN;
2492
2493 inp = ifp->if_inp;
2494 lck_mtx_lock(&inp->input_lck);
2495 qlimit(&inp->rcvq_pkts) = maxqlen;
2496 lck_mtx_unlock(&inp->input_lck);
2497
2498 return (0);
2499}
2500
2501errno_t
2502ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2503{
2504 struct dlil_threading_info *inp;
2505
2506 if (ifp == NULL || maxqlen == NULL)
2507 return (EINVAL);
2508 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
2509 return (ENXIO);
2510
2511 inp = ifp->if_inp;
2512 lck_mtx_lock(&inp->input_lck);
2513 *maxqlen = qlimit(&inp->rcvq_pkts);
2514 lck_mtx_unlock(&inp->input_lck);
2515 return (0);
2516}
2517
2518errno_t
2519ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
2520{
2521 int error;
2522
2523 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
2524 m->m_nextpkt != NULL) {
2525 if (m != NULL)
2526 m_freem_list(m);
2527 return (EINVAL);
2528 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
2529 !(ifp->if_refflags & IFRF_ATTACHED)) {
2530 /* flag tested without lock for performance */
2531 m_freem(m);
2532 return (ENXIO);
2533 } else if (!(ifp->if_flags & IFF_UP)) {
2534 m_freem(m);
2535 return (ENETDOWN);
2536
2537 }
2538
2539 /* enqueue the packet */
2540 error = ifclassq_enqueue(&ifp->if_snd, m);
2541
2542 /*
2543 * Tell the driver to start dequeueing; do this even when the queue
2544 * for the packet is suspended (EQSUSPENDED), as the driver could still
2545 * be dequeueing from other unsuspended queues.
2546 */
2547 if (error == 0 || error == EQFULL || error == EQSUSPENDED)
2548 ifnet_start(ifp);
2549
2550 return (error);
2551}
2552
2553errno_t
2554ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
2555{
2556 if (ifp == NULL || mp == NULL)
2557 return (EINVAL);
2558 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
2559 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
2560 return (ENXIO);
2561
2562 return (ifclassq_dequeue(&ifp->if_snd, 1, mp, NULL, NULL, NULL));
2563}
2564
2565errno_t
2566ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
2567 struct mbuf **mp)
2568{
2569 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
2570 return (EINVAL);
2571 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
2572 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
2573 return (ENXIO);
2574
2575 return (ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL));
2576}
2577
2578errno_t
2579ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t limit, struct mbuf **head,
2580 struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
2581{
2582 if (ifp == NULL || head == NULL || limit < 1)
2583 return (EINVAL);
2584 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
2585 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
2586 return (ENXIO);
2587
2588 return (ifclassq_dequeue(&ifp->if_snd, limit, head, tail, cnt, len));
2589}
2590
2591errno_t
2592ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
2593 u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
2594 u_int32_t *len)
2595{
2596
2597 if (ifp == NULL || head == NULL || limit < 1 || !MBUF_VALID_SC(sc))
2598 return (EINVAL);
2599 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
2600 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
2601 return (ENXIO);
2602
2603 return (ifclassq_dequeue_sc(&ifp->if_snd, sc, limit, head,
2604 tail, cnt, len));
2605}
2606
2607static int
2608dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
2609 char **frame_header_p, protocol_family_t protocol_family)
2610{
2611 struct ifnet_filter *filter;
2612
2613 /*
2614 * Pass the inbound packet to the interface filters
6d2010ae
A
2615 */
2616 lck_mtx_lock_spin(&ifp->if_flt_lock);
2617 /* prevent filter list from changing in case we drop the lock */
2618 if_flt_monitor_busy(ifp);
2d21ac55
A
2619 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
2620 int result;
2621
6d2010ae
A
2622 if (!filter->filt_skip && filter->filt_input != NULL &&
2623 (filter->filt_protocol == 0 ||
2624 filter->filt_protocol == protocol_family)) {
2625 lck_mtx_unlock(&ifp->if_flt_lock);
2626
2d21ac55 2627 result = (*filter->filt_input)(filter->filt_cookie,
6d2010ae
A
2628 ifp, protocol_family, m_p, frame_header_p);
2629
2630 lck_mtx_lock_spin(&ifp->if_flt_lock);
2d21ac55 2631 if (result != 0) {
6d2010ae
A
2632 /* we're done with the filter list */
2633 if_flt_monitor_unbusy(ifp);
2634 lck_mtx_unlock(&ifp->if_flt_lock);
2d21ac55
A
2635 return (result);
2636 }
2637 }
2638 }
6d2010ae
A
2639 /* we're done with the filter list */
2640 if_flt_monitor_unbusy(ifp);
2641 lck_mtx_unlock(&ifp->if_flt_lock);
b7266188
A
2642
2643 /*
6d2010ae 2644 * Strip away M_PROTO1 bit prior to sending packet up the stack as
b7266188
A
2645 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
2646 */
2647 if (*m_p != NULL)
2648 (*m_p)->m_flags &= ~M_PROTO1;
2649
2d21ac55 2650 return (0);
1c79356b
A
2651}
2652
6d2010ae
A
2653static int
2654dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
2655 protocol_family_t protocol_family)
2656{
2657 struct ifnet_filter *filter;
2658
2659 /*
2660 * Pass the outbound packet to the interface filters
2661 */
2662 lck_mtx_lock_spin(&ifp->if_flt_lock);
2663 /* prevent filter list from changing in case we drop the lock */
2664 if_flt_monitor_busy(ifp);
2665 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
2666 int result;
2667
2668 if (!filter->filt_skip && filter->filt_output != NULL &&
2669 (filter->filt_protocol == 0 ||
2670 filter->filt_protocol == protocol_family)) {
2671 lck_mtx_unlock(&ifp->if_flt_lock);
2672
2673 result = filter->filt_output(filter->filt_cookie, ifp,
2674 protocol_family, m_p);
2675
2676 lck_mtx_lock_spin(&ifp->if_flt_lock);
2677 if (result != 0) {
2678 /* we're done with the filter list */
2679 if_flt_monitor_unbusy(ifp);
2680 lck_mtx_unlock(&ifp->if_flt_lock);
2681 return (result);
2682 }
2683 }
2684 }
2685 /* we're done with the filter list */
2686 if_flt_monitor_unbusy(ifp);
2687 lck_mtx_unlock(&ifp->if_flt_lock);
2688
2689 return (0);
2690}
2691
2d21ac55
A
2692static void
2693dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
1c79356b 2694{
2d21ac55 2695 int error;
1c79356b 2696
2d21ac55
A
2697 if (ifproto->proto_kpi == kProtoKPI_v1) {
2698 /* Version 1 protocols get one packet at a time */
2699 while (m != NULL) {
2700 char * frame_header;
2701 mbuf_t next_packet;
6d2010ae 2702
2d21ac55
A
2703 next_packet = m->m_nextpkt;
2704 m->m_nextpkt = NULL;
2705 frame_header = m->m_pkthdr.header;
2706 m->m_pkthdr.header = NULL;
6d2010ae
A
2707 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
2708 ifproto->protocol_family, m, frame_header);
2d21ac55
A
2709 if (error != 0 && error != EJUSTRETURN)
2710 m_freem(m);
2711 m = next_packet;
2712 }
6d2010ae 2713 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
2d21ac55
A
2714 /* Version 2 protocols support packet lists */
2715 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
6d2010ae 2716 ifproto->protocol_family, m);
2d21ac55
A
2717 if (error != 0 && error != EJUSTRETURN)
2718 m_freem_list(m);
91447636 2719 }
2d21ac55
A
2720 return;
2721}
1c79356b 2722
316670eb
A
2723static void
2724dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
2725 struct dlil_threading_info *inp, boolean_t poll)
2726{
2727 struct ifnet_stat_increment_param *d = &inp->stats;
2728
2729 if (s->packets_in != 0)
2730 d->packets_in += s->packets_in;
2731 if (s->bytes_in != 0)
2732 d->bytes_in += s->bytes_in;
2733 if (s->errors_in != 0)
2734 d->errors_in += s->errors_in;
2735
2736 if (s->packets_out != 0)
2737 d->packets_out += s->packets_out;
2738 if (s->bytes_out != 0)
2739 d->bytes_out += s->bytes_out;
2740 if (s->errors_out != 0)
2741 d->errors_out += s->errors_out;
2742
2743 if (s->collisions != 0)
2744 d->collisions += s->collisions;
2745 if (s->dropped != 0)
2746 d->dropped += s->dropped;
2747
2748 if (poll)
2749 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
2750}
2751
2752static void
2753dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
2754{
2755 struct ifnet_stat_increment_param *s = &inp->stats;
2756
2757 /*
2758 * Use of atomic operations is unavoidable here because
2759 * these stats may also be incremented elsewhere via KPIs.
2760 */
2761 if (s->packets_in != 0) {
2762 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
2763 s->packets_in = 0;
2764 }
2765 if (s->bytes_in != 0) {
2766 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
2767 s->bytes_in = 0;
2768 }
2769 if (s->errors_in != 0) {
2770 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
2771 s->errors_in = 0;
2772 }
2773
2774 if (s->packets_out != 0) {
2775 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
2776 s->packets_out = 0;
2777 }
2778 if (s->bytes_out != 0) {
2779 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
2780 s->bytes_out = 0;
2781 }
2782 if (s->errors_out != 0) {
2783 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
2784 s->errors_out = 0;
2785 }
2786
2787 if (s->collisions != 0) {
2788 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
2789 s->collisions = 0;
2790 }
2791 if (s->dropped != 0) {
2792 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
2793 s->dropped = 0;
2794 }
2795
2796 /*
2797 * No need for atomic operations as they are modified here
2798 * only from within the DLIL input thread context.
2799 */
2800 if (inp->tstats.packets != 0) {
2801 inp->pstats.ifi_poll_packets += inp->tstats.packets;
2802 inp->tstats.packets = 0;
2803 }
2804 if (inp->tstats.bytes != 0) {
2805 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
2806 inp->tstats.bytes = 0;
2807 }
2808}
2809
2810__private_extern__ void
2811dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
2812{
2813 return (dlil_input_packet_list_common(ifp, m, 0,
2814 IFNET_MODEL_INPUT_POLL_OFF, FALSE));
2815}
2816
2d21ac55 2817__private_extern__ void
316670eb
A
2818dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
2819 u_int32_t cnt, ifnet_model_t mode)
2820{
2821 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
2822}
2823
2824static void
2825dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
2826 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
2d21ac55
A
2827{
2828 int error = 0;
2d21ac55
A
2829 protocol_family_t protocol_family;
2830 mbuf_t next_packet;
2831 ifnet_t ifp = ifp_param;
2832 char * frame_header;
2833 struct if_proto * last_ifproto = NULL;
2834 mbuf_t pkt_first = NULL;
2835 mbuf_t * pkt_next = NULL;
316670eb 2836 u_int32_t poll_thresh = 0, poll_ival = 0;
2d21ac55
A
2837
2838 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0);
2839
316670eb
A
2840 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
2841 (poll_ival = if_rxpoll_interval_pkts) > 0)
2842 poll_thresh = cnt;
6d2010ae 2843
2d21ac55 2844 while (m != NULL) {
6d2010ae
A
2845 struct if_proto *ifproto = NULL;
2846 int iorefcnt = 0;
2d21ac55 2847
2d21ac55
A
2848 if (ifp_param == NULL)
2849 ifp = m->m_pkthdr.rcvif;
6d2010ae 2850
316670eb
A
2851 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
2852 poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
2853 ifnet_poll(ifp);
2854
6d2010ae 2855 /* Check if this mbuf looks valid */
316670eb 2856 MBUF_INPUT_CHECK(m, ifp);
6d2010ae
A
2857
2858 next_packet = m->m_nextpkt;
2859 m->m_nextpkt = NULL;
2d21ac55
A
2860 frame_header = m->m_pkthdr.header;
2861 m->m_pkthdr.header = NULL;
2862
316670eb
A
2863 /*
2864 * Get an IO reference count if the interface is not
2865 * loopback (lo0) and it is attached; lo0 never goes
2866 * away, so optimize for that.
6d2010ae
A
2867 */
2868 if (ifp != lo_ifp) {
2869 if (!ifnet_is_attached(ifp, 1)) {
2870 m_freem(m);
2871 goto next;
2872 }
2873 iorefcnt = 1;
2d21ac55 2874 }
d41d1dae 2875
316670eb 2876 ifp_inc_traffic_class_in(ifp, m);
d41d1dae 2877
2d21ac55 2878 /* find which protocol family this packet is for */
6d2010ae 2879 ifnet_lock_shared(ifp);
2d21ac55 2880 error = (*ifp->if_demux)(ifp, m, frame_header,
6d2010ae
A
2881 &protocol_family);
2882 ifnet_lock_done(ifp);
2d21ac55 2883 if (error != 0) {
6d2010ae 2884 if (error == EJUSTRETURN)
2d21ac55 2885 goto next;
2d21ac55
A
2886 protocol_family = 0;
2887 }
6d2010ae 2888
316670eb
A
2889#if CONFIG_EMBEDDED
2890 iptap_ipf_input(ifp, protocol_family, m, frame_header);
2891#endif /* CONFIG_EMBEDDED */
2892
2d21ac55 2893 if (m->m_flags & (M_BCAST|M_MCAST))
6d2010ae 2894 atomic_add_64(&ifp->if_imcasts, 1);
1c79356b 2895
2d21ac55
A
2896 /* run interface filters, exclude VLAN packets PR-3586856 */
2897 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
6d2010ae
A
2898 error = dlil_interface_filters_input(ifp, &m,
2899 &frame_header, protocol_family);
2900 if (error != 0) {
2901 if (error != EJUSTRETURN)
2d21ac55 2902 m_freem(m);
2d21ac55 2903 goto next;
91447636
A
2904 }
2905 }
2d21ac55 2906 if (error != 0 || ((m->m_flags & M_PROMISC) != 0) ) {
91447636 2907 m_freem(m);
2d21ac55 2908 goto next;
91447636 2909 }
6d2010ae 2910
2d21ac55
A
2911 /* Lookup the protocol attachment to this interface */
2912 if (protocol_family == 0) {
2913 ifproto = NULL;
6d2010ae
A
2914 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
2915 (last_ifproto->protocol_family == protocol_family)) {
2916 VERIFY(ifproto == NULL);
2d21ac55 2917 ifproto = last_ifproto;
6d2010ae
A
2918 if_proto_ref(last_ifproto);
2919 } else {
2920 VERIFY(ifproto == NULL);
2921 ifnet_lock_shared(ifp);
2922 /* callee holds a proto refcnt upon success */
2d21ac55 2923 ifproto = find_attached_proto(ifp, protocol_family);
6d2010ae 2924 ifnet_lock_done(ifp);
2d21ac55
A
2925 }
2926 if (ifproto == NULL) {
2927 /* no protocol for this packet, discard */
2928 m_freem(m);
2929 goto next;
2930 }
2931 if (ifproto != last_ifproto) {
2d21ac55
A
2932 if (last_ifproto != NULL) {
2933 /* pass up the list for the previous protocol */
2d21ac55
A
2934 dlil_ifproto_input(last_ifproto, pkt_first);
2935 pkt_first = NULL;
2936 if_proto_free(last_ifproto);
2d21ac55
A
2937 }
2938 last_ifproto = ifproto;
6d2010ae 2939 if_proto_ref(ifproto);
2d21ac55
A
2940 }
2941 /* extend the list */
2942 m->m_pkthdr.header = frame_header;
2943 if (pkt_first == NULL) {
2944 pkt_first = m;
2945 } else {
2946 *pkt_next = m;
2947 }
2948 pkt_next = &m->m_nextpkt;
1c79356b 2949
6d2010ae 2950next:
2d21ac55
A
2951 if (next_packet == NULL && last_ifproto != NULL) {
2952 /* pass up the last list of packets */
2d21ac55
A
2953 dlil_ifproto_input(last_ifproto, pkt_first);
2954 if_proto_free(last_ifproto);
6d2010ae
A
2955 last_ifproto = NULL;
2956 }
2957 if (ifproto != NULL) {
2958 if_proto_free(ifproto);
2959 ifproto = NULL;
2d21ac55 2960 }
316670eb 2961
2d21ac55 2962 m = next_packet;
1c79356b 2963
6d2010ae
A
2964 /* update the driver's multicast filter, if needed */
2965 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
2966 ifp->if_updatemcasts = 0;
2967 if (iorefcnt == 1)
2968 ifnet_decr_iorefcnt(ifp);
91447636 2969 }
6d2010ae 2970
91447636 2971 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0);
1c79356b
A
2972}
2973
6d2010ae
A
2974errno_t
2975if_mcasts_update(struct ifnet *ifp)
2976{
2977 errno_t err;
2978
2979 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
2980 if (err == EAFNOSUPPORT)
2981 err = 0;
2982 printf("%s%d: %s %d suspended link-layer multicast membership(s) "
2983 "(err=%d)\n", ifp->if_name, ifp->if_unit,
2984 (err == 0 ? "successfully restored" : "failed to restore"),
2985 ifp->if_updatemcasts, err);
2986
2987 /* just return success */
2988 return (0);
2989}
2990
91447636
A
2991static int
2992dlil_event_internal(struct ifnet *ifp, struct kev_msg *event)
1c79356b 2993{
91447636 2994 struct ifnet_filter *filter;
6d2010ae
A
2995
2996 /* Get an io ref count if the interface is attached */
2997 if (!ifnet_is_attached(ifp, 1))
2998 goto done;
2999
3000 /*
3001 * Pass the event to the interface filters
3002 */
3003 lck_mtx_lock_spin(&ifp->if_flt_lock);
3004 /* prevent filter list from changing in case we drop the lock */
3005 if_flt_monitor_busy(ifp);
3006 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3007 if (filter->filt_event != NULL) {
3008 lck_mtx_unlock(&ifp->if_flt_lock);
3009
3010 filter->filt_event(filter->filt_cookie, ifp,
3011 filter->filt_protocol, event);
3012
3013 lck_mtx_lock_spin(&ifp->if_flt_lock);
91447636 3014 }
6d2010ae
A
3015 }
3016 /* we're done with the filter list */
3017 if_flt_monitor_unbusy(ifp);
3018 lck_mtx_unlock(&ifp->if_flt_lock);
3019
3020 ifnet_lock_shared(ifp);
3021 if (ifp->if_proto_hash != NULL) {
3022 int i;
3023
3024 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
3025 struct if_proto *proto;
3026
3027 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
3028 next_hash) {
3029 proto_media_event eventp =
3030 (proto->proto_kpi == kProtoKPI_v1 ?
3031 proto->kpi.v1.event :
3032 proto->kpi.v2.event);
3033
3034 if (eventp != NULL) {
3035 if_proto_ref(proto);
3036 ifnet_lock_done(ifp);
3037
3038 eventp(ifp, proto->protocol_family,
3039 event);
3040
3041 ifnet_lock_shared(ifp);
3042 if_proto_free(proto);
91447636
A
3043 }
3044 }
3045 }
91447636 3046 }
6d2010ae
A
3047 ifnet_lock_done(ifp);
3048
3049 /* Pass the event to the interface */
3050 if (ifp->if_event != NULL)
3051 ifp->if_event(ifp, event);
3052
3053 /* Release the io ref count */
3054 ifnet_decr_iorefcnt(ifp);
3055
3056done:
3057 return (kev_post_msg(event));
1c79356b
A
3058}
3059
2d21ac55 3060errno_t
6d2010ae 3061ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
1c79356b 3062{
91447636 3063 struct kev_msg kev_msg;
2d21ac55
A
3064 int result = 0;
3065
6d2010ae
A
3066 if (ifp == NULL || event == NULL)
3067 return (EINVAL);
1c79356b 3068
6d2010ae 3069 bzero(&kev_msg, sizeof (kev_msg));
91447636
A
3070 kev_msg.vendor_code = event->vendor_code;
3071 kev_msg.kev_class = event->kev_class;
3072 kev_msg.kev_subclass = event->kev_subclass;
3073 kev_msg.event_code = event->event_code;
3074 kev_msg.dv[0].data_ptr = &event->event_data[0];
3075 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
3076 kev_msg.dv[1].data_length = 0;
6d2010ae 3077
91447636 3078 result = dlil_event_internal(ifp, &kev_msg);
1c79356b 3079
6d2010ae 3080 return (result);
91447636 3081}
1c79356b 3082
2d21ac55
A
3083#if CONFIG_MACF_NET
3084#include <netinet/ip6.h>
3085#include <netinet/ip.h>
6d2010ae
A
3086static int
3087dlil_get_socket_type(struct mbuf **mp, int family, int raw)
2d21ac55
A
3088{
3089 struct mbuf *m;
3090 struct ip *ip;
3091 struct ip6_hdr *ip6;
3092 int type = SOCK_RAW;
3093
3094 if (!raw) {
3095 switch (family) {
3096 case PF_INET:
3097 m = m_pullup(*mp, sizeof(struct ip));
3098 if (m == NULL)
3099 break;
3100 *mp = m;
3101 ip = mtod(m, struct ip *);
3102 if (ip->ip_p == IPPROTO_TCP)
3103 type = SOCK_STREAM;
3104 else if (ip->ip_p == IPPROTO_UDP)
3105 type = SOCK_DGRAM;
3106 break;
3107 case PF_INET6:
3108 m = m_pullup(*mp, sizeof(struct ip6_hdr));
3109 if (m == NULL)
3110 break;
3111 *mp = m;
3112 ip6 = mtod(m, struct ip6_hdr *);
3113 if (ip6->ip6_nxt == IPPROTO_TCP)
3114 type = SOCK_STREAM;
3115 else if (ip6->ip6_nxt == IPPROTO_UDP)
3116 type = SOCK_DGRAM;
3117 break;
3118 }
3119 }
3120
3121 return (type);
3122}
3123#endif
3124
316670eb
A
3125/*
3126 * This is mostly called from the context of the DLIL input thread;
3127 * because of that there is no need for atomic operations.
3128 */
3129static __inline void
3130ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
d41d1dae 3131{
d41d1dae
A
3132 if (!(m->m_flags & M_PKTHDR))
3133 return;
3134
316670eb
A
3135 switch (m_get_traffic_class(m)) {
3136 case MBUF_TC_BE:
3137 ifp->if_tc.ifi_ibepackets++;
3138 ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len;
3139 break;
3140 case MBUF_TC_BK:
3141 ifp->if_tc.ifi_ibkpackets++;
3142 ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len;
3143 break;
3144 case MBUF_TC_VI:
3145 ifp->if_tc.ifi_ivipackets++;
3146 ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len;
3147 break;
3148 case MBUF_TC_VO:
3149 ifp->if_tc.ifi_ivopackets++;
3150 ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len;
3151 break;
3152 default:
3153 break;
3154 }
3155
3156 if (mbuf_is_traffic_class_privileged(m)) {
3157 ifp->if_tc.ifi_ipvpackets++;
3158 ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len;
3159 }
3160}
3161
3162/*
3163 * This is called from DLIL output, hence multiple threads could end
3164 * up modifying the statistics. We trade off acccuracy for performance
3165 * by not using atomic operations here.
3166 */
3167static __inline void
3168ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
3169{
3170 if (!(m->m_flags & M_PKTHDR))
3171 return;
3172
3173 switch (m_get_traffic_class(m)) {
3174 case MBUF_TC_BE:
3175 ifp->if_tc.ifi_obepackets++;
3176 ifp->if_tc.ifi_obebytes += m->m_pkthdr.len;
3177 break;
3178 case MBUF_TC_BK:
3179 ifp->if_tc.ifi_obkpackets++;
3180 ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len;
3181 break;
3182 case MBUF_TC_VI:
3183 ifp->if_tc.ifi_ovipackets++;
3184 ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len;
3185 break;
3186 case MBUF_TC_VO:
3187 ifp->if_tc.ifi_ovopackets++;
3188 ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len;
3189 break;
3190 default:
3191 break;
3192 }
3193
3194 if (mbuf_is_traffic_class_privileged(m)) {
3195 ifp->if_tc.ifi_opvpackets++;
3196 ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len;
d41d1dae 3197 }
1c79356b
A
3198}
3199
1c79356b 3200/*
91447636
A
3201 * dlil_output
3202 *
3203 * Caller should have a lock on the protocol domain if the protocol
3204 * doesn't support finer grained locking. In most cases, the lock
3205 * will be held from the socket layer and won't be released until
3206 * we return back to the socket layer.
3207 *
3208 * This does mean that we must take a protocol lock before we take
3209 * an interface lock if we're going to take both. This makes sense
3210 * because a protocol is likely to interact with an ifp while it
3211 * is under the protocol lock.
316670eb
A
3212 *
3213 * An advisory code will be returned if adv is not null. This
3214 * can be used to provide feedback about interface queues to the
3215 * application.
1c79356b 3216 */
6d2010ae
A
3217errno_t
3218dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
316670eb 3219 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
6d2010ae
A
3220{
3221 char *frame_type = NULL;
3222 char *dst_linkaddr = NULL;
3223 int retval = 0;
3224 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
3225 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
3226 struct if_proto *proto = NULL;
2d21ac55
A
3227 mbuf_t m;
3228 mbuf_t send_head = NULL;
3229 mbuf_t *send_tail = &send_head;
6d2010ae 3230 int iorefcnt = 0;
316670eb
A
3231#if CONFIG_EMBEDDED
3232 u_int32_t pre = 0, post = 0;
3233#endif /* CONFIG_EMBEDDED */
6d2010ae 3234
91447636 3235 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START,0,0,0,0,0);
6d2010ae
A
3236
3237 /* Get an io refcnt if the interface is attached to prevent ifnet_detach
3238 * from happening while this operation is in progress */
3239 if (!ifnet_is_attached(ifp, 1)) {
3240 retval = ENXIO;
3241 goto cleanup;
3242 }
3243 iorefcnt = 1;
3244
3245 /* update the driver's multicast filter, if needed */
3246 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
3247 ifp->if_updatemcasts = 0;
3248
3249 frame_type = frame_type_buffer;
3250 dst_linkaddr = dst_linkaddr_buffer;
3251
91447636 3252 if (raw == 0) {
6d2010ae
A
3253 ifnet_lock_shared(ifp);
3254 /* callee holds a proto refcnt upon success */
91447636
A
3255 proto = find_attached_proto(ifp, proto_family);
3256 if (proto == NULL) {
6d2010ae 3257 ifnet_lock_done(ifp);
91447636
A
3258 retval = ENXIO;
3259 goto cleanup;
3260 }
6d2010ae 3261 ifnet_lock_done(ifp);
2d21ac55 3262 }
6d2010ae 3263
2d21ac55
A
3264preout_again:
3265 if (packetlist == NULL)
3266 goto cleanup;
6d2010ae 3267
2d21ac55
A
3268 m = packetlist;
3269 packetlist = packetlist->m_nextpkt;
3270 m->m_nextpkt = NULL;
6d2010ae 3271
2d21ac55 3272 if (raw == 0) {
6d2010ae
A
3273 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
3274 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
91447636 3275 retval = 0;
6d2010ae
A
3276 if (preoutp != NULL) {
3277 retval = preoutp(ifp, proto_family, &m, dest, route,
3278 frame_type, dst_linkaddr);
3279
3280 if (retval != 0) {
3281 if (retval == EJUSTRETURN)
3282 goto preout_again;
3283 m_freem(m);
3284 goto cleanup;
91447636 3285 }
1c79356b 3286 }
1c79356b 3287 }
2d21ac55
A
3288
3289#if CONFIG_MACF_NET
3290 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
3291 dlil_get_socket_type(&m, proto_family, raw));
3292 if (retval) {
3293 m_freem(m);
3294 goto cleanup;
3295 }
3296#endif
3297
3298 do {
6d2010ae 3299#if CONFIG_DTRACE
316670eb 3300 if (!raw && proto_family == PF_INET) {
6d2010ae
A
3301 struct ip *ip = mtod(m, struct ip*);
3302 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
3303 struct ip *, ip, struct ifnet *, ifp,
3304 struct ip *, ip, struct ip6_hdr *, NULL);
3305
316670eb 3306 } else if (!raw && proto_family == PF_INET6) {
6d2010ae
A
3307 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr*);
3308 DTRACE_IP6(send, struct mbuf*, m, struct inpcb *, NULL,
3309 struct ip6_hdr *, ip6, struct ifnet*, ifp,
3310 struct ip*, NULL, struct ip6_hdr *, ip6);
3311 }
3312#endif /* CONFIG_DTRACE */
3313
2d21ac55 3314 if (raw == 0 && ifp->if_framer) {
7e4a7d39
A
3315 int rcvif_set = 0;
3316
3317 /*
3318 * If this is a broadcast packet that needs to be
3319 * looped back into the system, set the inbound ifp
3320 * to that of the outbound ifp. This will allow
3321 * us to determine that it is a legitimate packet
3322 * for the system. Only set the ifp if it's not
3323 * already set, just to be safe.
3324 */
3325 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
3326 m->m_pkthdr.rcvif == NULL) {
3327 m->m_pkthdr.rcvif = ifp;
3328 rcvif_set = 1;
3329 }
3330
6d2010ae 3331 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
316670eb
A
3332 frame_type
3333#if CONFIG_EMBEDDED
3334 ,
3335 &pre, &post
3336#endif /* CONFIG_EMBEDDED */
3337 );
2d21ac55 3338 if (retval) {
6d2010ae 3339 if (retval != EJUSTRETURN)
2d21ac55 3340 m_freem(m);
2d21ac55 3341 goto next;
91447636 3342 }
7e4a7d39
A
3343
3344 /*
3345 * Clear the ifp if it was set above, and to be
3346 * safe, only if it is still the same as the
3347 * outbound ifp we have in context. If it was
3348 * looped back, then a copy of it was sent to the
3349 * loopback interface with the rcvif set, and we
3350 * are clearing the one that will go down to the
3351 * layer below.
3352 */
3353 if (rcvif_set && m->m_pkthdr.rcvif == ifp)
3354 m->m_pkthdr.rcvif = NULL;
91447636 3355 }
6d2010ae
A
3356
3357 /*
2d21ac55
A
3358 * Let interface filters (if any) do their thing ...
3359 */
3360 /* Do not pass VLAN tagged packets to filters PR-3586856 */
3361 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
6d2010ae
A
3362 retval = dlil_interface_filters_output(ifp,
3363 &m, proto_family);
3364 if (retval != 0) {
3365 if (retval != EJUSTRETURN)
3366 m_freem(m);
3367 goto next;
1c79356b 3368 }
1c79356b 3369 }
b7266188
A
3370 /*
3371 * Strip away M_PROTO1 bit prior to sending packet to the driver
3372 * as this field may be used by the driver
3373 */
3374 m->m_flags &= ~M_PROTO1;
3375
2d21ac55
A
3376 /*
3377 * If the underlying interface is not capable of handling a
3378 * packet whose data portion spans across physically disjoint
3379 * pages, we need to "normalize" the packet so that we pass
3380 * down a chain of mbufs where each mbuf points to a span that
3381 * resides in the system page boundary. If the packet does
3382 * not cross page(s), the following is a no-op.
3383 */
3384 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
3385 if ((m = m_normalize(m)) == NULL)
3386 goto next;
3387 }
3388
6d2010ae
A
3389 /*
3390 * If this is a TSO packet, make sure the interface still
3391 * advertise TSO capability.
b0d623f7
A
3392 */
3393
6d2010ae
A
3394 if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) &&
3395 !(ifp->if_hwassist & IFNET_TSO_IPV4)) {
3396 retval = EMSGSIZE;
3397 m_freem(m);
3398 goto cleanup;
b0d623f7
A
3399 }
3400
6d2010ae
A
3401 if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) &&
3402 !(ifp->if_hwassist & IFNET_TSO_IPV6)) {
3403 retval = EMSGSIZE;
3404 m_freem(m);
3405 goto cleanup;
b0d623f7 3406 }
6d2010ae 3407
2d21ac55
A
3408 /*
3409 * Finally, call the driver.
3410 */
2d21ac55
A
3411 if ((ifp->if_eflags & IFEF_SENDLIST) != 0) {
3412 *send_tail = m;
3413 send_tail = &m->m_nextpkt;
6d2010ae 3414 } else {
316670eb
A
3415#if CONFIG_EMBEDDED
3416 iptap_ipf_output(ifp, proto_family, (struct mbuf *)m,
3417 pre, post);
3418#endif /* CONFIG_EMBEDDED */
3419 ifp_inc_traffic_class_out(ifp, m);
6d2010ae
A
3420 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
3421 0,0,0,0,0);
316670eb
A
3422 retval = (*ifp->if_output)(ifp, m);
3423 if (retval == EQFULL || retval == EQSUSPENDED) {
3424 if (adv != NULL && adv->code == FADV_SUCCESS) {
3425 adv->code = (retval == EQFULL ?
3426 FADV_FLOW_CONTROLLED :
3427 FADV_SUSPENDED);
3428 }
3429 retval = 0;
3430 }
b0d623f7 3431 if (retval && dlil_verbose) {
6d2010ae
A
3432 printf("%s: output error on %s%d retval = %d\n",
3433 __func__, ifp->if_name, ifp->if_unit,
3434 retval);
2d21ac55 3435 }
6d2010ae
A
3436 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
3437 0,0,0,0,0);
2d21ac55
A
3438 }
3439 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0);
3440
3441next:
3442 m = packetlist;
3443 if (m) {
3444 packetlist = packetlist->m_nextpkt;
3445 m->m_nextpkt = NULL;
3446 }
3447 } while (m);
3448
3449 if (send_head) {
316670eb
A
3450#if CONFIG_EMBEDDED
3451 iptap_ipf_output(ifp, proto_family, (struct mbuf *)send_head,
3452 pre, post);
3453#endif /* CONFIG_EMBEDDED */
3454 ifp_inc_traffic_class_out(ifp, send_head);
d41d1dae 3455
6d2010ae 3456 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0);
316670eb
A
3457 retval = (*ifp->if_output)(ifp, send_head);
3458 if (retval == EQFULL || retval == EQSUSPENDED) {
3459 if (adv != NULL) {
3460 adv->code = (retval == EQFULL ?
3461 FADV_FLOW_CONTROLLED : FADV_SUSPENDED);
3462 }
3463 retval = 0;
3464 }
b0d623f7 3465 if (retval && dlil_verbose) {
6d2010ae
A
3466 printf("%s: output error on %s%d retval = %d\n",
3467 __func__, ifp->if_name, ifp->if_unit, retval);
2d21ac55
A
3468 }
3469 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0);
1c79356b 3470 }
6d2010ae 3471
91447636 3472 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END,0,0,0,0,0);
1c79356b 3473
91447636 3474cleanup:
6d2010ae
A
3475 if (proto != NULL)
3476 if_proto_free(proto);
3477 if (packetlist) /* if any packets are left, clean up */
2d21ac55 3478 mbuf_freem_list(packetlist);
91447636
A
3479 if (retval == EJUSTRETURN)
3480 retval = 0;
6d2010ae
A
3481 if (iorefcnt == 1)
3482 ifnet_decr_iorefcnt(ifp);
3483
3484 return (retval);
1c79356b
A
3485}
3486
2d21ac55 3487errno_t
6d2010ae
A
3488ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
3489 void *ioctl_arg)
3490{
3491 struct ifnet_filter *filter;
3492 int retval = EOPNOTSUPP;
3493 int result = 0;
3494
2d21ac55 3495 if (ifp == NULL || ioctl_code == 0)
6d2010ae
A
3496 return (EINVAL);
3497
3498 /* Get an io ref count if the interface is attached */
3499 if (!ifnet_is_attached(ifp, 1))
3500 return (EOPNOTSUPP);
3501
91447636
A
3502 /* Run the interface filters first.
3503 * We want to run all filters before calling the protocol,
3504 * interface family, or interface.
3505 */
6d2010ae
A
3506 lck_mtx_lock_spin(&ifp->if_flt_lock);
3507 /* prevent filter list from changing in case we drop the lock */
3508 if_flt_monitor_busy(ifp);
91447636 3509 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
6d2010ae
A
3510 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
3511 filter->filt_protocol == proto_fam)) {
3512 lck_mtx_unlock(&ifp->if_flt_lock);
3513
3514 result = filter->filt_ioctl(filter->filt_cookie, ifp,
3515 proto_fam, ioctl_code, ioctl_arg);
3516
3517 lck_mtx_lock_spin(&ifp->if_flt_lock);
3518
91447636
A
3519 /* Only update retval if no one has handled the ioctl */
3520 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3521 if (result == ENOTSUP)
3522 result = EOPNOTSUPP;
3523 retval = result;
6d2010ae
A
3524 if (retval != 0 && retval != EOPNOTSUPP) {
3525 /* we're done with the filter list */
3526 if_flt_monitor_unbusy(ifp);
3527 lck_mtx_unlock(&ifp->if_flt_lock);
91447636
A
3528 goto cleanup;
3529 }
3530 }
3531 }
3532 }
6d2010ae
A
3533 /* we're done with the filter list */
3534 if_flt_monitor_unbusy(ifp);
3535 lck_mtx_unlock(&ifp->if_flt_lock);
3536
91447636 3537 /* Allow the protocol to handle the ioctl */
6d2010ae
A
3538 if (proto_fam != 0) {
3539 struct if_proto *proto;
3540
3541 /* callee holds a proto refcnt upon success */
3542 ifnet_lock_shared(ifp);
3543 proto = find_attached_proto(ifp, proto_fam);
3544 ifnet_lock_done(ifp);
3545 if (proto != NULL) {
3546 proto_media_ioctl ioctlp =
3547 (proto->proto_kpi == kProtoKPI_v1 ?
3548 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
91447636 3549 result = EOPNOTSUPP;
6d2010ae
A
3550 if (ioctlp != NULL)
3551 result = ioctlp(ifp, proto_fam, ioctl_code,
3552 ioctl_arg);
3553 if_proto_free(proto);
3554
91447636
A
3555 /* Only update retval if no one has handled the ioctl */
3556 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3557 if (result == ENOTSUP)
3558 result = EOPNOTSUPP;
3559 retval = result;
6d2010ae 3560 if (retval && retval != EOPNOTSUPP)
91447636 3561 goto cleanup;
91447636
A
3562 }
3563 }
3564 }
6d2010ae 3565
91447636 3566 /* retval is either 0 or EOPNOTSUPP */
6d2010ae 3567
91447636
A
3568 /*
3569 * Let the interface handle this ioctl.
3570 * If it returns EOPNOTSUPP, ignore that, we may have
3571 * already handled this in the protocol or family.
3572 */
6d2010ae 3573 if (ifp->if_ioctl)
91447636 3574 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
6d2010ae 3575
91447636
A
3576 /* Only update retval if no one has handled the ioctl */
3577 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3578 if (result == ENOTSUP)
3579 result = EOPNOTSUPP;
3580 retval = result;
3581 if (retval && retval != EOPNOTSUPP) {
3582 goto cleanup;
3583 }
3584 }
1c79356b 3585
6d2010ae 3586cleanup:
91447636
A
3587 if (retval == EJUSTRETURN)
3588 retval = 0;
6d2010ae
A
3589
3590 ifnet_decr_iorefcnt(ifp);
3591
3592 return (retval);
91447636 3593}
1c79356b 3594
91447636 3595__private_extern__ errno_t
6d2010ae 3596dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
91447636
A
3597{
3598 errno_t error = 0;
6d2010ae
A
3599
3600
3601 if (ifp->if_set_bpf_tap) {
3602 /* Get an io reference on the interface if it is attached */
3603 if (!ifnet_is_attached(ifp, 1))
3604 return ENXIO;
91447636 3605 error = ifp->if_set_bpf_tap(ifp, mode, callback);
6d2010ae
A
3606 ifnet_decr_iorefcnt(ifp);
3607 }
3608 return (error);
1c79356b
A
3609}
3610
2d21ac55 3611errno_t
6d2010ae
A
3612dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
3613 struct sockaddr *ll_addr, size_t ll_len)
1c79356b 3614{
91447636
A
3615 errno_t result = EOPNOTSUPP;
3616 struct if_proto *proto;
3617 const struct sockaddr *verify;
2d21ac55 3618 proto_media_resolve_multi resolvep;
6d2010ae
A
3619
3620 if (!ifnet_is_attached(ifp, 1))
3621 return result;
3622
91447636 3623 bzero(ll_addr, ll_len);
6d2010ae
A
3624
3625 /* Call the protocol first; callee holds a proto refcnt upon success */
3626 ifnet_lock_shared(ifp);
91447636 3627 proto = find_attached_proto(ifp, proto_addr->sa_family);
6d2010ae 3628 ifnet_lock_done(ifp);
2d21ac55 3629 if (proto != NULL) {
6d2010ae
A
3630 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
3631 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
2d21ac55 3632 if (resolvep != NULL)
6d2010ae 3633 result = resolvep(ifp, proto_addr,
316670eb 3634 (struct sockaddr_dl*)(void *)ll_addr, ll_len);
6d2010ae 3635 if_proto_free(proto);
91447636 3636 }
6d2010ae 3637
91447636
A
3638 /* Let the interface verify the multicast address */
3639 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
3640 if (result == 0)
3641 verify = ll_addr;
3642 else
3643 verify = proto_addr;
3644 result = ifp->if_check_multi(ifp, verify);
3645 }
6d2010ae
A
3646
3647 ifnet_decr_iorefcnt(ifp);
3648 return (result);
91447636 3649}
1c79356b 3650
91447636 3651__private_extern__ errno_t
6d2010ae
A
3652dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
3653 const struct sockaddr_dl* sender_hw, const struct sockaddr* sender_proto,
3654 const struct sockaddr_dl* target_hw, const struct sockaddr* target_proto)
91447636
A
3655{
3656 struct if_proto *proto;
3657 errno_t result = 0;
6d2010ae
A
3658
3659 /* callee holds a proto refcnt upon success */
3660 ifnet_lock_shared(ifp);
91447636 3661 proto = find_attached_proto(ifp, target_proto->sa_family);
6d2010ae 3662 ifnet_lock_done(ifp);
2d21ac55 3663 if (proto == NULL) {
91447636 3664 result = ENOTSUP;
6d2010ae 3665 } else {
2d21ac55 3666 proto_media_send_arp arpp;
6d2010ae
A
3667 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
3668 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
2d21ac55
A
3669 if (arpp == NULL)
3670 result = ENOTSUP;
3671 else
6d2010ae
A
3672 result = arpp(ifp, arpop, sender_hw, sender_proto,
3673 target_hw, target_proto);
3674 if_proto_free(proto);
91447636 3675 }
6d2010ae
A
3676
3677 return (result);
91447636 3678}
1c79356b 3679
316670eb
A
3680__private_extern__ errno_t
3681net_thread_check_lock(u_int32_t flag)
3682{
3683 struct uthread *uth = get_bsdthread_info(current_thread());
3684 return ((uth->uu_network_lock_held & flag) == flag);
3685}
3686
3687__private_extern__ void
3688net_thread_set_lock(u_int32_t flag)
3689{
3690 struct uthread *uth = get_bsdthread_info(current_thread());
3691
3692 VERIFY((uth->uu_network_lock_held & flag) != flag);
3693 uth->uu_network_lock_held |= flag;
3694}
3695
3696__private_extern__ void
3697net_thread_unset_lock(u_int32_t flag)
3698{
3699 struct uthread *uth = get_bsdthread_info(current_thread());
3700
3701 VERIFY((uth->uu_network_lock_held & flag) == flag);
3702 uth->uu_network_lock_held &= (~flag);
3703}
3704
2d21ac55
A
3705static __inline__ int
3706_is_announcement(const struct sockaddr_in * sender_sin,
6d2010ae 3707 const struct sockaddr_in * target_sin)
2d21ac55
A
3708{
3709 if (sender_sin == NULL) {
6d2010ae 3710 return (FALSE);
2d21ac55
A
3711 }
3712 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
3713}
3714
91447636 3715__private_extern__ errno_t
6d2010ae
A
3716dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw,
3717 const struct sockaddr* sender_proto, const struct sockaddr_dl* target_hw,
316670eb 3718 const struct sockaddr* target_proto0, u_int32_t rtflags)
91447636
A
3719{
3720 errno_t result = 0;
2d21ac55
A
3721 const struct sockaddr_in * sender_sin;
3722 const struct sockaddr_in * target_sin;
316670eb
A
3723 struct sockaddr_inarp target_proto_sinarp;
3724 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
6d2010ae
A
3725
3726 if (target_proto == NULL || (sender_proto != NULL &&
3727 sender_proto->sa_family != target_proto->sa_family))
3728 return (EINVAL);
3729
316670eb
A
3730 /*
3731 * If the target is a (default) router, provide that
3732 * information to the send_arp callback routine.
3733 */
3734 if (rtflags & RTF_ROUTER) {
3735 bcopy(target_proto, &target_proto_sinarp,
3736 sizeof (struct sockaddr_in));
3737 target_proto_sinarp.sin_other |= SIN_ROUTER;
3738 target_proto = (struct sockaddr *)&target_proto_sinarp;
3739 }
3740
91447636
A
3741 /*
3742 * If this is an ARP request and the target IP is IPv4LL,
2d21ac55
A
3743 * send the request on all interfaces. The exception is
3744 * an announcement, which must only appear on the specific
3745 * interface.
91447636 3746 */
316670eb
A
3747 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
3748 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
6d2010ae
A
3749 if (target_proto->sa_family == AF_INET &&
3750 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
3751 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
3752 !_is_announcement(target_sin, sender_sin)) {
91447636
A
3753 ifnet_t *ifp_list;
3754 u_int32_t count;
3755 u_int32_t ifp_on;
6d2010ae 3756
91447636
A
3757 result = ENOTSUP;
3758
3759 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
3760 for (ifp_on = 0; ifp_on < count; ifp_on++) {
6d2010ae
A
3761 errno_t new_result;
3762 ifaddr_t source_hw = NULL;
3763 ifaddr_t source_ip = NULL;
3764 struct sockaddr_in source_ip_copy;
3765 struct ifnet *cur_ifp = ifp_list[ifp_on];
3766
91447636 3767 /*
6d2010ae
A
3768 * Only arp on interfaces marked for IPv4LL
3769 * ARPing. This may mean that we don't ARP on
3770 * the interface the subnet route points to.
91447636 3771 */
6d2010ae 3772 if (!(cur_ifp->if_eflags & IFEF_ARPLL))
91447636 3773 continue;
b0d623f7 3774
91447636 3775 /* Find the source IP address */
6d2010ae
A
3776 ifnet_lock_shared(cur_ifp);
3777 source_hw = cur_ifp->if_lladdr;
3778 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
3779 ifa_link) {
3780 IFA_LOCK(source_ip);
3781 if (source_ip->ifa_addr != NULL &&
3782 source_ip->ifa_addr->sa_family ==
3783 AF_INET) {
3784 /* Copy the source IP address */
3785 source_ip_copy =
3786 *(struct sockaddr_in *)
316670eb 3787 (void *)source_ip->ifa_addr;
6d2010ae 3788 IFA_UNLOCK(source_ip);
91447636
A
3789 break;
3790 }
6d2010ae 3791 IFA_UNLOCK(source_ip);
91447636 3792 }
6d2010ae 3793
91447636
A
3794 /* No IP Source, don't arp */
3795 if (source_ip == NULL) {
6d2010ae 3796 ifnet_lock_done(cur_ifp);
91447636
A
3797 continue;
3798 }
6d2010ae
A
3799
3800 IFA_ADDREF(source_hw);
3801 ifnet_lock_done(cur_ifp);
3802
91447636 3803 /* Send the ARP */
6d2010ae 3804 new_result = dlil_send_arp_internal(cur_ifp,
316670eb
A
3805 arpop, (struct sockaddr_dl *)(void *)
3806 source_hw->ifa_addr,
6d2010ae
A
3807 (struct sockaddr *)&source_ip_copy, NULL,
3808 target_proto);
b0d623f7 3809
6d2010ae 3810 IFA_REMREF(source_hw);
91447636
A
3811 if (result == ENOTSUP) {
3812 result = new_result;
3813 }
3814 }
6d2010ae 3815 ifnet_list_free(ifp_list);
91447636 3816 }
6d2010ae
A
3817 } else {
3818 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
3819 sender_proto, target_hw, target_proto);
91447636 3820 }
6d2010ae
A
3821
3822 return (result);
91447636 3823}
1c79356b 3824
6d2010ae
A
3825/*
3826 * Caller must hold ifnet head lock.
3827 */
3828static int
3829ifnet_lookup(struct ifnet *ifp)
91447636 3830{
6d2010ae
A
3831 struct ifnet *_ifp;
3832
3833 lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
3834 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
3835 if (_ifp == ifp)
91447636 3836 break;
6d2010ae
A
3837 }
3838 return (_ifp != NULL);
91447636 3839}
6d2010ae
A
3840/*
3841 * Caller has to pass a non-zero refio argument to get a
3842 * IO reference count. This will prevent ifnet_detach from
3843 * being called when there are outstanding io reference counts.
91447636 3844 */
6d2010ae
A
3845int
3846ifnet_is_attached(struct ifnet *ifp, int refio)
3847{
3848 int ret;
3849
3850 lck_mtx_lock_spin(&ifp->if_ref_lock);
3851 if ((ret = ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
3852 IFRF_ATTACHED))) {
3853 if (refio > 0)
3854 ifp->if_refio++;
3855 }
3856 lck_mtx_unlock(&ifp->if_ref_lock);
3857
3858 return (ret);
3859}
3860
3861void
3862ifnet_decr_iorefcnt(struct ifnet *ifp)
3863{
3864 lck_mtx_lock_spin(&ifp->if_ref_lock);
3865 VERIFY(ifp->if_refio > 0);
3866 VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) != 0);
3867 ifp->if_refio--;
3868
3869 /* if there are no more outstanding io references, wakeup the
3870 * ifnet_detach thread if detaching flag is set.
3871 */
3872 if (ifp->if_refio == 0 &&
3873 (ifp->if_refflags & IFRF_DETACHING) != 0) {
6d2010ae 3874 wakeup(&(ifp->if_refio));
91447636 3875 }
6d2010ae
A
3876 lck_mtx_unlock(&ifp->if_ref_lock);
3877}
b0d623f7 3878
6d2010ae
A
3879static void
3880dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
3881{
3882 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
3883 ctrace_t *tr;
3884 u_int32_t idx;
3885 u_int16_t *cnt;
1c79356b 3886
6d2010ae
A
3887 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
3888 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
3889 /* NOTREACHED */
3890 }
3891
3892 if (refhold) {
3893 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
3894 tr = dl_if_dbg->dldbg_if_refhold;
3895 } else {
3896 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
3897 tr = dl_if_dbg->dldbg_if_refrele;
3898 }
3899
3900 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
3901 ctrace_record(&tr[idx]);
91447636 3902}
1c79356b 3903
6d2010ae
A
3904errno_t
3905dlil_if_ref(struct ifnet *ifp)
3906{
3907 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
3908
3909 if (dl_if == NULL)
3910 return (EINVAL);
3911
3912 lck_mtx_lock_spin(&dl_if->dl_if_lock);
3913 ++dl_if->dl_if_refcnt;
3914 if (dl_if->dl_if_refcnt == 0) {
3915 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
3916 /* NOTREACHED */
3917 }
3918 if (dl_if->dl_if_trace != NULL)
3919 (*dl_if->dl_if_trace)(dl_if, TRUE);
3920 lck_mtx_unlock(&dl_if->dl_if_lock);
3921
3922 return (0);
91447636 3923}
1c79356b 3924
6d2010ae
A
3925errno_t
3926dlil_if_free(struct ifnet *ifp)
3927{
3928 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
3929
3930 if (dl_if == NULL)
3931 return (EINVAL);
3932
3933 lck_mtx_lock_spin(&dl_if->dl_if_lock);
3934 if (dl_if->dl_if_refcnt == 0) {
3935 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
3936 /* NOTREACHED */
3937 }
3938 --dl_if->dl_if_refcnt;
3939 if (dl_if->dl_if_trace != NULL)
3940 (*dl_if->dl_if_trace)(dl_if, FALSE);
3941 lck_mtx_unlock(&dl_if->dl_if_lock);
3942
3943 return (0);
3944}
1c79356b 3945
2d21ac55 3946static errno_t
6d2010ae
A
3947dlil_attach_protocol_internal(struct if_proto *proto,
3948 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count)
91447636 3949{
6d2010ae 3950 struct kev_dl_proto_data ev_pr_data;
91447636
A
3951 struct ifnet *ifp = proto->ifp;
3952 int retval = 0;
b0d623f7 3953 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
6d2010ae
A
3954 struct if_proto *prev_proto;
3955 struct if_proto *_proto;
3956
3957 /* callee holds a proto refcnt upon success */
3958 ifnet_lock_exclusive(ifp);
3959 _proto = find_attached_proto(ifp, proto->protocol_family);
3960 if (_proto != NULL) {
91447636 3961 ifnet_lock_done(ifp);
6d2010ae
A
3962 if_proto_free(_proto);
3963 return (EEXIST);
91447636 3964 }
6d2010ae 3965
91447636
A
3966 /*
3967 * Call family module add_proto routine so it can refine the
3968 * demux descriptors as it wishes.
3969 */
6d2010ae
A
3970 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
3971 demux_count);
91447636 3972 if (retval) {
6d2010ae
A
3973 ifnet_lock_done(ifp);
3974 return (retval);
91447636 3975 }
6d2010ae 3976
91447636
A
3977 /*
3978 * Insert the protocol in the hash
3979 */
6d2010ae
A
3980 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
3981 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
3982 prev_proto = SLIST_NEXT(prev_proto, next_hash);
3983 if (prev_proto)
3984 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
3985 else
3986 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
3987 proto, next_hash);
3988
3989 /* hold a proto refcnt for attach */
3990 if_proto_ref(proto);
1c79356b 3991
91447636 3992 /*
6d2010ae
A
3993 * The reserved field carries the number of protocol still attached
3994 * (subject to change)
91447636 3995 */
91447636
A
3996 ev_pr_data.proto_family = proto->protocol_family;
3997 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
6d2010ae
A
3998 ifnet_lock_done(ifp);
3999
4000 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
4001 (struct net_event_data *)&ev_pr_data,
4002 sizeof (struct kev_dl_proto_data));
4003 return (retval);
91447636 4004}
0b4e3aa0 4005
2d21ac55
A
4006errno_t
4007ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 4008 const struct ifnet_attach_proto_param *proto_details)
91447636
A
4009{
4010 int retval = 0;
4011 struct if_proto *ifproto = NULL;
6d2010ae
A
4012
4013 ifnet_head_lock_shared();
4014 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4015 retval = EINVAL;
4016 goto end;
4017 }
4018 /* Check that the interface is in the global list */
4019 if (!ifnet_lookup(ifp)) {
4020 retval = ENXIO;
4021 goto end;
4022 }
4023
4024 ifproto = zalloc(dlif_proto_zone);
4025 if (ifproto == NULL) {
91447636
A
4026 retval = ENOMEM;
4027 goto end;
4028 }
6d2010ae
A
4029 bzero(ifproto, dlif_proto_size);
4030
4031 /* refcnt held above during lookup */
91447636
A
4032 ifproto->ifp = ifp;
4033 ifproto->protocol_family = protocol;
4034 ifproto->proto_kpi = kProtoKPI_v1;
4035 ifproto->kpi.v1.input = proto_details->input;
4036 ifproto->kpi.v1.pre_output = proto_details->pre_output;
4037 ifproto->kpi.v1.event = proto_details->event;
4038 ifproto->kpi.v1.ioctl = proto_details->ioctl;
4039 ifproto->kpi.v1.detached = proto_details->detached;
4040 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
4041 ifproto->kpi.v1.send_arp = proto_details->send_arp;
6d2010ae 4042
2d21ac55 4043 retval = dlil_attach_protocol_internal(ifproto,
6d2010ae
A
4044 proto_details->demux_list, proto_details->demux_count);
4045
4046 if (dlil_verbose) {
4047 printf("%s%d: attached v1 protocol %d\n", ifp->if_name,
4048 ifp->if_unit, protocol);
4049 }
4050
9bccf70c 4051end:
6d2010ae
A
4052 if (retval != 0 && retval != EEXIST && ifp != NULL) {
4053 DLIL_PRINTF("%s%d: failed to attach v1 protocol %d (err=%d)\n",
4054 ifp->if_name, ifp->if_unit, protocol, retval);
4055 }
4056 ifnet_head_done();
4057 if (retval != 0 && ifproto != NULL)
4058 zfree(dlif_proto_zone, ifproto);
4059 return (retval);
1c79356b
A
4060}
4061
2d21ac55
A
4062errno_t
4063ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6d2010ae 4064 const struct ifnet_attach_proto_param_v2 *proto_details)
91447636 4065{
2d21ac55 4066 int retval = 0;
91447636 4067 struct if_proto *ifproto = NULL;
6d2010ae
A
4068
4069 ifnet_head_lock_shared();
4070 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4071 retval = EINVAL;
4072 goto end;
4073 }
4074 /* Check that the interface is in the global list */
4075 if (!ifnet_lookup(ifp)) {
4076 retval = ENXIO;
4077 goto end;
4078 }
4079
4080 ifproto = zalloc(dlif_proto_zone);
4081 if (ifproto == NULL) {
91447636
A
4082 retval = ENOMEM;
4083 goto end;
4084 }
2d21ac55 4085 bzero(ifproto, sizeof(*ifproto));
6d2010ae
A
4086
4087 /* refcnt held above during lookup */
2d21ac55
A
4088 ifproto->ifp = ifp;
4089 ifproto->protocol_family = protocol;
4090 ifproto->proto_kpi = kProtoKPI_v2;
4091 ifproto->kpi.v2.input = proto_details->input;
4092 ifproto->kpi.v2.pre_output = proto_details->pre_output;
4093 ifproto->kpi.v2.event = proto_details->event;
4094 ifproto->kpi.v2.ioctl = proto_details->ioctl;
4095 ifproto->kpi.v2.detached = proto_details->detached;
4096 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
4097 ifproto->kpi.v2.send_arp = proto_details->send_arp;
1c79356b 4098
6d2010ae
A
4099 retval = dlil_attach_protocol_internal(ifproto,
4100 proto_details->demux_list, proto_details->demux_count);
1c79356b 4101
6d2010ae
A
4102 if (dlil_verbose) {
4103 printf("%s%d: attached v2 protocol %d\n", ifp->if_name,
4104 ifp->if_unit, protocol);
91447636 4105 }
6d2010ae
A
4106
4107end:
4108 if (retval != 0 && retval != EEXIST && ifp != NULL) {
4109 DLIL_PRINTF("%s%d: failed to attach v2 protocol %d (err=%d)\n",
4110 ifp->if_name, ifp->if_unit, protocol, retval);
2d21ac55 4111 }
6d2010ae
A
4112 ifnet_head_done();
4113 if (retval != 0 && ifproto != NULL)
4114 zfree(dlif_proto_zone, ifproto);
4115 return (retval);
91447636 4116}
1c79356b 4117
2d21ac55
A
4118errno_t
4119ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
91447636
A
4120{
4121 struct if_proto *proto = NULL;
4122 int retval = 0;
6d2010ae
A
4123
4124 if (ifp == NULL || proto_family == 0) {
4125 retval = EINVAL;
91447636
A
4126 goto end;
4127 }
6d2010ae
A
4128
4129 ifnet_lock_exclusive(ifp);
4130 /* callee holds a proto refcnt upon success */
91447636 4131 proto = find_attached_proto(ifp, proto_family);
91447636
A
4132 if (proto == NULL) {
4133 retval = ENXIO;
6d2010ae 4134 ifnet_lock_done(ifp);
91447636
A
4135 goto end;
4136 }
6d2010ae
A
4137
4138 /* call family module del_proto */
91447636
A
4139 if (ifp->if_del_proto)
4140 ifp->if_del_proto(ifp, proto->protocol_family);
1c79356b 4141
6d2010ae
A
4142 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
4143 proto, if_proto, next_hash);
4144
4145 if (proto->proto_kpi == kProtoKPI_v1) {
4146 proto->kpi.v1.input = ifproto_media_input_v1;
4147 proto->kpi.v1.pre_output= ifproto_media_preout;
4148 proto->kpi.v1.event = ifproto_media_event;
4149 proto->kpi.v1.ioctl = ifproto_media_ioctl;
4150 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
4151 proto->kpi.v1.send_arp = ifproto_media_send_arp;
4152 } else {
4153 proto->kpi.v2.input = ifproto_media_input_v2;
4154 proto->kpi.v2.pre_output = ifproto_media_preout;
4155 proto->kpi.v2.event = ifproto_media_event;
4156 proto->kpi.v2.ioctl = ifproto_media_ioctl;
4157 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
4158 proto->kpi.v2.send_arp = ifproto_media_send_arp;
4159 }
4160 proto->detached = 1;
4161 ifnet_lock_done(ifp);
4162
4163 if (dlil_verbose) {
4164 printf("%s%d: detached %s protocol %d\n", ifp->if_name,
4165 ifp->if_unit, (proto->proto_kpi == kProtoKPI_v1) ?
4166 "v1" : "v2", proto_family);
4167 }
4168
4169 /* release proto refcnt held during protocol attach */
4170 if_proto_free(proto);
91447636
A
4171
4172 /*
6d2010ae
A
4173 * Release proto refcnt held during lookup; the rest of
4174 * protocol detach steps will happen when the last proto
4175 * reference is released.
91447636 4176 */
6d2010ae
A
4177 if_proto_free(proto);
4178
91447636 4179end:
6d2010ae 4180 return (retval);
91447636 4181}
1c79356b 4182
6d2010ae
A
4183
4184static errno_t
4185ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
4186 struct mbuf *packet, char *header)
91447636 4187{
6d2010ae
A
4188#pragma unused(ifp, protocol, packet, header)
4189 return (ENXIO);
4190}
4191
4192static errno_t
4193ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
4194 struct mbuf *packet)
4195{
4196#pragma unused(ifp, protocol, packet)
4197 return (ENXIO);
4198
4199}
4200
4201static errno_t
4202ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
4203 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
4204 char *link_layer_dest)
4205{
4206#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
4207 return (ENXIO);
9bccf70c 4208
91447636 4209}
9bccf70c 4210
91447636 4211static void
6d2010ae
A
4212ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
4213 const struct kev_msg *event)
4214{
4215#pragma unused(ifp, protocol, event)
4216}
4217
4218static errno_t
4219ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
4220 unsigned long command, void *argument)
4221{
4222#pragma unused(ifp, protocol, command, argument)
4223 return (ENXIO);
4224}
4225
4226static errno_t
4227ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
4228 struct sockaddr_dl *out_ll, size_t ll_len)
4229{
4230#pragma unused(ifp, proto_addr, out_ll, ll_len)
4231 return (ENXIO);
4232}
4233
4234static errno_t
4235ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
4236 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4237 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4238{
4239#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
4240 return (ENXIO);
91447636 4241}
9bccf70c 4242
91447636
A
4243extern int if_next_index(void);
4244
2d21ac55 4245errno_t
6d2010ae 4246ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
91447636 4247{
91447636 4248 struct ifnet *tmp_if;
6d2010ae
A
4249 struct ifaddr *ifa;
4250 struct if_data_internal if_data_saved;
4251 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
316670eb
A
4252 struct dlil_threading_info *dl_inp;
4253 u_int32_t sflags = 0;
4254 int err;
1c79356b 4255
6d2010ae
A
4256 if (ifp == NULL)
4257 return (EINVAL);
4258
7ddcb079
A
4259 /*
4260 * Serialize ifnet attach using dlil_ifnet_lock, in order to
4261 * prevent the interface from being configured while it is
4262 * embryonic, as ifnet_head_lock is dropped and reacquired
4263 * below prior to marking the ifnet with IFRF_ATTACHED.
4264 */
4265 dlil_if_lock();
6d2010ae 4266 ifnet_head_lock_exclusive();
91447636
A
4267 /* Verify we aren't already on the list */
4268 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
4269 if (tmp_if == ifp) {
4270 ifnet_head_done();
7ddcb079 4271 dlil_if_unlock();
6d2010ae 4272 return (EEXIST);
91447636
A
4273 }
4274 }
0b4e3aa0 4275
6d2010ae
A
4276 lck_mtx_lock_spin(&ifp->if_ref_lock);
4277 if (ifp->if_refflags & IFRF_ATTACHED) {
316670eb 4278 panic_plain("%s: flags mismatch (attached set) ifp=%p",
6d2010ae
A
4279 __func__, ifp);
4280 /* NOTREACHED */
91447636 4281 }
6d2010ae 4282 lck_mtx_unlock(&ifp->if_ref_lock);
1c79356b 4283
6d2010ae 4284 ifnet_lock_exclusive(ifp);
b0d623f7 4285
6d2010ae
A
4286 /* Sanity check */
4287 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
4288 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
4289
4290 if (ll_addr != NULL) {
4291 if (ifp->if_addrlen == 0) {
4292 ifp->if_addrlen = ll_addr->sdl_alen;
4293 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
4294 ifnet_lock_done(ifp);
4295 ifnet_head_done();
7ddcb079 4296 dlil_if_unlock();
6d2010ae 4297 return (EINVAL);
b0d623f7
A
4298 }
4299 }
4300
91447636 4301 /*
b0d623f7 4302 * Allow interfaces without protocol families to attach
91447636
A
4303 * only if they have the necessary fields filled out.
4304 */
6d2010ae
A
4305 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
4306 DLIL_PRINTF("%s: Attempt to attach interface without "
4307 "family module - %d\n", __func__, ifp->if_family);
4308 ifnet_lock_done(ifp);
4309 ifnet_head_done();
7ddcb079 4310 dlil_if_unlock();
6d2010ae 4311 return (ENODEV);
1c79356b
A
4312 }
4313
6d2010ae
A
4314 /* Allocate protocol hash table */
4315 VERIFY(ifp->if_proto_hash == NULL);
4316 ifp->if_proto_hash = zalloc(dlif_phash_zone);
4317 if (ifp->if_proto_hash == NULL) {
4318 ifnet_lock_done(ifp);
4319 ifnet_head_done();
7ddcb079 4320 dlil_if_unlock();
6d2010ae
A
4321 return (ENOBUFS);
4322 }
4323 bzero(ifp->if_proto_hash, dlif_phash_size);
91447636 4324
6d2010ae
A
4325 lck_mtx_lock_spin(&ifp->if_flt_lock);
4326 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
91447636 4327 TAILQ_INIT(&ifp->if_flt_head);
6d2010ae
A
4328 VERIFY(ifp->if_flt_busy == 0);
4329 VERIFY(ifp->if_flt_waiters == 0);
4330 lck_mtx_unlock(&ifp->if_flt_lock);
4331
4332 VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
4333 TAILQ_INIT(&ifp->if_prefixhead);
4334
4335 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
4336 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
91447636 4337 LIST_INIT(&ifp->if_multiaddrs);
6d2010ae 4338 }
1c79356b 4339
6d2010ae
A
4340 VERIFY(ifp->if_allhostsinm == NULL);
4341 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
4342 TAILQ_INIT(&ifp->if_addrhead);
4343
6d2010ae
A
4344 if (ifp->if_index == 0) {
4345 int idx = if_next_index();
4346
4347 if (idx == -1) {
4348 ifp->if_index = 0;
4349 ifnet_lock_done(ifp);
4350 ifnet_head_done();
7ddcb079 4351 dlil_if_unlock();
6d2010ae 4352 return (ENOBUFS);
1c79356b 4353 }
6d2010ae
A
4354 ifp->if_index = idx;
4355 }
4356 /* There should not be anything occupying this slot */
4357 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
4358
4359 /* allocate (if needed) and initialize a link address */
4360 VERIFY(!(dl_if->dl_if_flags & DLIF_REUSE) || ifp->if_lladdr != NULL);
4361 ifa = dlil_alloc_lladdr(ifp, ll_addr);
4362 if (ifa == NULL) {
4363 ifnet_lock_done(ifp);
4364 ifnet_head_done();
7ddcb079 4365 dlil_if_unlock();
6d2010ae
A
4366 return (ENOBUFS);
4367 }
4368
4369 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
4370 ifnet_addrs[ifp->if_index - 1] = ifa;
4371
4372 /* make this address the first on the list */
4373 IFA_LOCK(ifa);
4374 /* hold a reference for ifnet_addrs[] */
4375 IFA_ADDREF_LOCKED(ifa);
4376 /* if_attach_link_ifa() holds a reference for ifa_link */
4377 if_attach_link_ifa(ifp, ifa);
4378 IFA_UNLOCK(ifa);
4379
2d21ac55 4380#if CONFIG_MACF_NET
6d2010ae 4381 mac_ifnet_label_associate(ifp);
2d21ac55 4382#endif
2d21ac55 4383
6d2010ae
A
4384 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
4385 ifindex2ifnet[ifp->if_index] = ifp;
2d21ac55 4386
6d2010ae
A
4387 /* Hold a reference to the underlying dlil_ifnet */
4388 ifnet_reference(ifp);
4389
316670eb
A
4390 /* Clear stats (save and restore other fields that we care) */
4391 if_data_saved = ifp->if_data;
4392 bzero(&ifp->if_data, sizeof (ifp->if_data));
4393 ifp->if_data.ifi_type = if_data_saved.ifi_type;
4394 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
4395 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
4396 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
4397 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
4398 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
4399 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
4400 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
4401 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
4402 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
4403 ifnet_touch_lastchange(ifp);
4404
4405 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
4406 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED);
4407
4408 /* By default, use SFB and enable flow advisory */
4409 sflags = PKTSCHEDF_QALG_SFB;
4410 if (if_flowadv)
4411 sflags |= PKTSCHEDF_QALG_FLOWCTL;
4412
4413 /* Initialize transmit queue(s) */
4414 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
4415 if (err != 0) {
4416 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
4417 "err=%d", __func__, ifp, err);
4418 /* NOTREACHED */
4419 }
4420
4421 /* Sanity checks on the input thread storage */
4422 dl_inp = &dl_if->dl_if_inpstorage;
4423 bzero(&dl_inp->stats, sizeof (dl_inp->stats));
4424 VERIFY(dl_inp->input_waiting == 0);
4425 VERIFY(dl_inp->wtot == 0);
4426 VERIFY(dl_inp->ifp == NULL);
4427 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
4428 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
4429 VERIFY(!dl_inp->net_affinity);
4430 VERIFY(ifp->if_inp == NULL);
4431 VERIFY(dl_inp->input_thr == THREAD_NULL);
4432 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
4433 VERIFY(dl_inp->poll_thr == THREAD_NULL);
4434 VERIFY(dl_inp->tag == 0);
4435 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
4436 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
4437 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
4438 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
4439#if IFNET_INPUT_SANITY_CHK
4440 VERIFY(dl_inp->input_mbuf_cnt == 0);
4441#endif /* IFNET_INPUT_SANITY_CHK */
4442
4443 /*
4444 * A specific DLIL input thread is created per Ethernet/cellular
4445 * interface or for an interface which supports opportunistic
4446 * input polling. Pseudo interfaces or other types of interfaces
4447 * use the main input thread instead.
4448 */
4449 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
4450 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
4451 ifp->if_inp = dl_inp;
4452 err = dlil_create_input_thread(ifp, ifp->if_inp);
4453 if (err != 0) {
4454 panic_plain("%s: ifp=%p couldn't get an input thread; "
4455 "err=%d", __func__, ifp, err);
4456 /* NOTREACHED */
4457 }
4458 }
4459
6d2010ae 4460 /*
316670eb
A
4461 * If the driver supports the new transmit model, create a workloop
4462 * starter thread to invoke the if_start callback where the packets
4463 * may be dequeued and transmitted.
6d2010ae 4464 */
316670eb
A
4465 if (ifp->if_eflags & IFEF_TXSTART) {
4466 VERIFY(ifp->if_start != NULL);
4467 VERIFY(ifp->if_start_thread == THREAD_NULL);
4468
4469 ifnet_set_start_cycle(ifp, NULL);
4470 ifp->if_start_active = 0;
4471 ifp->if_start_req = 0;
4472 if ((err = kernel_thread_start(ifnet_start_thread_fn, ifp,
4473 &ifp->if_start_thread)) != KERN_SUCCESS) {
4474 panic_plain("%s: ifp=%p couldn't get a start thread; "
4475 "err=%d", __func__, ifp, err);
6d2010ae
A
4476 /* NOTREACHED */
4477 }
316670eb
A
4478 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
4479 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
4480 }
4481
4482 /*
4483 * If the driver supports the new receive model, create a poller
4484 * thread to invoke if_input_poll callback where the packets may
4485 * be dequeued from the driver and processed for reception.
4486 */
4487 if (ifp->if_eflags & IFEF_RXPOLL) {
4488 VERIFY(ifp->if_input_poll != NULL);
4489 VERIFY(ifp->if_input_ctl != NULL);
4490 VERIFY(ifp->if_poll_thread == THREAD_NULL);
4491
4492 ifnet_set_poll_cycle(ifp, NULL);
4493 ifp->if_poll_update = 0;
4494 ifp->if_poll_active = 0;
4495 ifp->if_poll_req = 0;
4496 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
4497 &ifp->if_poll_thread)) != KERN_SUCCESS) {
4498 panic_plain("%s: ifp=%p couldn't get a poll thread; "
6d2010ae
A
4499 "err=%d", __func__, ifp, err);
4500 /* NOTREACHED */
4501 }
316670eb
A
4502 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
4503 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
91447636 4504 }
6d2010ae 4505
316670eb
A
4506 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
4507 VERIFY(ifp->if_desc.ifd_len == 0);
4508 VERIFY(ifp->if_desc.ifd_desc != NULL);
6d2010ae
A
4509
4510 /* Record attach PC stacktrace */
4511 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
4512
4513 ifp->if_updatemcasts = 0;
4514 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
4515 struct ifmultiaddr *ifma;
4516 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
4517 IFMA_LOCK(ifma);
4518 if (ifma->ifma_addr->sa_family == AF_LINK ||
4519 ifma->ifma_addr->sa_family == AF_UNSPEC)
4520 ifp->if_updatemcasts++;
4521 IFMA_UNLOCK(ifma);
4522 }
4523
4524 printf("%s%d: attached with %d suspended link-layer multicast "
4525 "membership(s)\n", ifp->if_name, ifp->if_unit,
4526 ifp->if_updatemcasts);
4527 }
4528
0c530ab8 4529 ifnet_lock_done(ifp);
b0d623f7 4530 ifnet_head_done();
6d2010ae
A
4531
4532 lck_mtx_lock(&ifp->if_cached_route_lock);
4533 /* Enable forwarding cached route */
4534 ifp->if_fwd_cacheok = 1;
4535 /* Clean up any existing cached routes */
4536 if (ifp->if_fwd_route.ro_rt != NULL)
4537 rtfree(ifp->if_fwd_route.ro_rt);
4538 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
4539 if (ifp->if_src_route.ro_rt != NULL)
4540 rtfree(ifp->if_src_route.ro_rt);
4541 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
4542 if (ifp->if_src_route6.ro_rt != NULL)
4543 rtfree(ifp->if_src_route6.ro_rt);
4544 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
4545 lck_mtx_unlock(&ifp->if_cached_route_lock);
4546
4547 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
4548
b0d623f7 4549 /*
6d2010ae
A
4550 * Allocate and attach IGMPv3/MLDv2 interface specific variables
4551 * and trees; do this before the ifnet is marked as attached.
4552 * The ifnet keeps the reference to the info structures even after
4553 * the ifnet is detached, since the network-layer records still
4554 * refer to the info structures even after that. This also
4555 * makes it possible for them to still function after the ifnet
4556 * is recycled or reattached.
b0d623f7 4557 */
6d2010ae
A
4558#if INET
4559 if (IGMP_IFINFO(ifp) == NULL) {
4560 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
4561 VERIFY(IGMP_IFINFO(ifp) != NULL);
4562 } else {
4563 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
4564 igmp_domifreattach(IGMP_IFINFO(ifp));
4565 }
4566#endif /* INET */
4567#if INET6
4568 if (MLD_IFINFO(ifp) == NULL) {
4569 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
4570 VERIFY(MLD_IFINFO(ifp) != NULL);
4571 } else {
4572 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
4573 mld_domifreattach(MLD_IFINFO(ifp));
4574 }
4575#endif /* INET6 */
b0d623f7 4576
6d2010ae
A
4577 /*
4578 * Finally, mark this ifnet as attached.
4579 */
4580 lck_mtx_lock(rnh_lock);
4581 ifnet_lock_exclusive(ifp);
316670eb
A
4582 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
4583 ifp->if_lqm = (ifp == lo_ifp) ? IFNET_LQM_THRESH_GOOD :
4584 IFNET_LQM_THRESH_UNKNOWN;
6d2010ae
A
4585 lck_mtx_lock_spin(&ifp->if_ref_lock);
4586 ifp->if_refflags = IFRF_ATTACHED;
4587 lck_mtx_unlock(&ifp->if_ref_lock);
d1ecb069 4588 if (net_rtref) {
6d2010ae
A
4589 /* boot-args override; enable idle notification */
4590 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
d1ecb069 4591 IFRF_IDLE_NOTIFY);
6d2010ae
A
4592 } else {
4593 /* apply previous request(s) to set the idle flags, if any */
4594 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
4595 ifp->if_idle_new_flags_mask);
4596
d1ecb069 4597 }
6d2010ae
A
4598 ifnet_lock_done(ifp);
4599 lck_mtx_unlock(rnh_lock);
7ddcb079 4600 dlil_if_unlock();
6d2010ae
A
4601
4602#if PF
4603 /*
4604 * Attach packet filter to this interface, if enabled.
4605 */
4606 pf_ifnet_hook(ifp, 1);
4607#endif /* PF */
d1ecb069 4608
2d21ac55 4609 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
1c79356b 4610
6d2010ae
A
4611 if (dlil_verbose) {
4612 printf("%s%d: attached%s\n", ifp->if_name, ifp->if_unit,
4613 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
4614 }
4615
4616 return (0);
4617}
4618
4619/*
4620 * Prepare the storage for the first/permanent link address, which must
4621 * must have the same lifetime as the ifnet itself. Although the link
4622 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
4623 * its location in memory must never change as it may still be referred
4624 * to by some parts of the system afterwards (unfortunate implementation
4625 * artifacts inherited from BSD.)
4626 *
4627 * Caller must hold ifnet lock as writer.
4628 */
4629static struct ifaddr *
4630dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
4631{
4632 struct ifaddr *ifa, *oifa;
4633 struct sockaddr_dl *asdl, *msdl;
4634 char workbuf[IFNAMSIZ*2];
4635 int namelen, masklen, socksize;
4636 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4637
4638 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
4639 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
4640
4641 namelen = snprintf(workbuf, sizeof (workbuf), "%s%d",
4642 ifp->if_name, ifp->if_unit);
4643 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
4644 socksize = masklen + ifp->if_addrlen;
4645#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
4646 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
4647 socksize = sizeof(struct sockaddr_dl);
4648 socksize = ROUNDUP(socksize);
4649#undef ROUNDUP
4650
4651 ifa = ifp->if_lladdr;
4652 if (socksize > DLIL_SDLMAXLEN ||
4653 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
4654 /*
4655 * Rare, but in the event that the link address requires
4656 * more storage space than DLIL_SDLMAXLEN, allocate the
4657 * largest possible storages for address and mask, such
4658 * that we can reuse the same space when if_addrlen grows.
4659 * This same space will be used when if_addrlen shrinks.
4660 */
4661 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
4662 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
4663 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
4664 if (ifa == NULL)
4665 return (NULL);
4666 ifa_lock_init(ifa);
4667 /* Don't set IFD_ALLOC, as this is permanent */
4668 ifa->ifa_debug = IFD_LINK;
4669 }
4670 IFA_LOCK(ifa);
4671 /* address and mask sockaddr_dl locations */
4672 asdl = (struct sockaddr_dl *)(ifa + 1);
4673 bzero(asdl, SOCK_MAXADDRLEN);
316670eb
A
4674 msdl = (struct sockaddr_dl *)(void *)
4675 ((char *)asdl + SOCK_MAXADDRLEN);
6d2010ae
A
4676 bzero(msdl, SOCK_MAXADDRLEN);
4677 } else {
4678 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
4679 /*
4680 * Use the storage areas for address and mask within the
4681 * dlil_ifnet structure. This is the most common case.
4682 */
4683 if (ifa == NULL) {
4684 ifa = &dl_if->dl_if_lladdr.ifa;
4685 ifa_lock_init(ifa);
4686 /* Don't set IFD_ALLOC, as this is permanent */
4687 ifa->ifa_debug = IFD_LINK;
4688 }
4689 IFA_LOCK(ifa);
4690 /* address and mask sockaddr_dl locations */
316670eb 4691 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
6d2010ae 4692 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
316670eb 4693 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
6d2010ae
A
4694 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
4695 }
4696
4697 /* hold a permanent reference for the ifnet itself */
4698 IFA_ADDREF_LOCKED(ifa);
4699 oifa = ifp->if_lladdr;
4700 ifp->if_lladdr = ifa;
4701
4702 VERIFY(ifa->ifa_debug == IFD_LINK);
4703 ifa->ifa_ifp = ifp;
4704 ifa->ifa_rtrequest = link_rtrequest;
4705 ifa->ifa_addr = (struct sockaddr *)asdl;
4706 asdl->sdl_len = socksize;
4707 asdl->sdl_family = AF_LINK;
4708 bcopy(workbuf, asdl->sdl_data, namelen);
4709 asdl->sdl_nlen = namelen;
4710 asdl->sdl_index = ifp->if_index;
4711 asdl->sdl_type = ifp->if_type;
4712 if (ll_addr != NULL) {
4713 asdl->sdl_alen = ll_addr->sdl_alen;
4714 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
4715 } else {
4716 asdl->sdl_alen = 0;
4717 }
4718 ifa->ifa_netmask = (struct sockaddr*)msdl;
4719 msdl->sdl_len = masklen;
4720 while (namelen != 0)
4721 msdl->sdl_data[--namelen] = 0xff;
4722 IFA_UNLOCK(ifa);
4723
4724 if (oifa != NULL)
4725 IFA_REMREF(oifa);
4726
4727 return (ifa);
4728}
4729
4730static void
4731if_purgeaddrs(struct ifnet *ifp)
4732{
4733#if INET
4734 in_purgeaddrs(ifp);
4735#endif /* INET */
4736#if INET6
4737 in6_purgeaddrs(ifp);
4738#endif /* INET6 */
4739#if NETAT
4740 at_purgeaddrs(ifp);
4741#endif
1c79356b
A
4742}
4743
2d21ac55 4744errno_t
6d2010ae 4745ifnet_detach(ifnet_t ifp)
1c79356b 4746{
6d2010ae
A
4747 if (ifp == NULL)
4748 return (EINVAL);
4749
6d2010ae 4750 lck_mtx_lock(rnh_lock);
316670eb 4751 ifnet_head_lock_exclusive();
91447636 4752 ifnet_lock_exclusive(ifp);
6d2010ae
A
4753
4754 /*
4755 * Check to see if this interface has previously triggered
4756 * aggressive protocol draining; if so, decrement the global
4757 * refcnt and clear PR_AGGDRAIN on the route domain if
4758 * there are no more of such an interface around.
4759 */
4760 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
4761
4762 lck_mtx_lock_spin(&ifp->if_ref_lock);
4763 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
4764 lck_mtx_unlock(&ifp->if_ref_lock);
4765 ifnet_lock_done(ifp);
6d2010ae 4766 ifnet_head_done();
13f56ec4 4767 lck_mtx_unlock(rnh_lock);
6d2010ae
A
4768 return (EINVAL);
4769 } else if (ifp->if_refflags & IFRF_DETACHING) {
91447636 4770 /* Interface has already been detached */
6d2010ae 4771 lck_mtx_unlock(&ifp->if_ref_lock);
91447636 4772 ifnet_lock_done(ifp);
6d2010ae 4773 ifnet_head_done();
13f56ec4 4774 lck_mtx_unlock(rnh_lock);
6d2010ae 4775 return (ENXIO);
55e303ae 4776 }
6d2010ae
A
4777 /* Indicate this interface is being detached */
4778 ifp->if_refflags &= ~IFRF_ATTACHED;
4779 ifp->if_refflags |= IFRF_DETACHING;
4780 lck_mtx_unlock(&ifp->if_ref_lock);
4781
4782 if (dlil_verbose)
4783 printf("%s%d: detaching\n", ifp->if_name, ifp->if_unit);
4784
91447636 4785 /*
6d2010ae
A
4786 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
4787 * no longer be visible during lookups from this point.
91447636 4788 */
6d2010ae
A
4789 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
4790 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
4791 ifp->if_link.tqe_next = NULL;
4792 ifp->if_link.tqe_prev = NULL;
4793 ifindex2ifnet[ifp->if_index] = NULL;
4794
4795 /* Record detach PC stacktrace */
4796 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
4797
91447636 4798 ifnet_lock_done(ifp);
6d2010ae 4799 ifnet_head_done();
13f56ec4 4800 lck_mtx_unlock(rnh_lock);
6d2010ae 4801
316670eb
A
4802 /* Reset Link Quality Metric (unless loopback [lo0]) */
4803 if (ifp != lo_ifp)
4804 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF);
4805
4806 /* Reset TCP local statistics */
4807 if (ifp->if_tcp_stat != NULL)
4808 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
4809
4810 /* Reset UDP local statistics */
4811 if (ifp->if_udp_stat != NULL)
4812 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
4813
2d21ac55
A
4814 /* Let BPF know we're detaching */
4815 bpfdetach(ifp);
6d2010ae
A
4816
4817 /* Mark the interface as DOWN */
4818 if_down(ifp);
4819
316670eb
A
4820 /* Drain send queue */
4821 ifclassq_teardown(ifp);
4822
6d2010ae
A
4823 /* Disable forwarding cached route */
4824 lck_mtx_lock(&ifp->if_cached_route_lock);
4825 ifp->if_fwd_cacheok = 0;
4826 lck_mtx_unlock(&ifp->if_cached_route_lock);
4827
d1ecb069 4828 /*
6d2010ae
A
4829 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
4830 * references to the info structures and leave them attached to
4831 * this ifnet.
d1ecb069 4832 */
6d2010ae
A
4833#if INET
4834 igmp_domifdetach(ifp);
4835#endif /* INET */
4836#if INET6
4837 mld_domifdetach(ifp);
4838#endif /* INET6 */
4839
4840 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
4841
4842 /* Let worker thread take care of the rest, to avoid reentrancy */
7ddcb079 4843 dlil_if_lock();
6d2010ae 4844 ifnet_detaching_enqueue(ifp);
7ddcb079 4845 dlil_if_unlock();
6d2010ae
A
4846
4847 return (0);
4848}
4849
4850static void
4851ifnet_detaching_enqueue(struct ifnet *ifp)
4852{
7ddcb079 4853 dlil_if_lock_assert();
6d2010ae
A
4854
4855 ++ifnet_detaching_cnt;
4856 VERIFY(ifnet_detaching_cnt != 0);
4857 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
4858 wakeup((caddr_t)&ifnet_delayed_run);
4859}
4860
4861static struct ifnet *
4862ifnet_detaching_dequeue(void)
4863{
4864 struct ifnet *ifp;
4865
7ddcb079 4866 dlil_if_lock_assert();
6d2010ae
A
4867
4868 ifp = TAILQ_FIRST(&ifnet_detaching_head);
4869 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
4870 if (ifp != NULL) {
4871 VERIFY(ifnet_detaching_cnt != 0);
4872 --ifnet_detaching_cnt;
4873 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
4874 ifp->if_detaching_link.tqe_next = NULL;
4875 ifp->if_detaching_link.tqe_prev = NULL;
4876 }
4877 return (ifp);
4878}
4879
316670eb
A
4880static int
4881ifnet_detacher_thread_cont(int err)
6d2010ae 4882{
316670eb 4883#pragma unused(err)
6d2010ae
A
4884 struct ifnet *ifp;
4885
4886 for (;;) {
316670eb 4887 dlil_if_lock_assert();
6d2010ae 4888 while (ifnet_detaching_cnt == 0) {
316670eb
A
4889 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
4890 (PZERO - 1), "ifnet_detacher_cont", 0,
4891 ifnet_detacher_thread_cont);
4892 /* NOTREACHED */
6d2010ae
A
4893 }
4894
4895 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
4896
4897 /* Take care of detaching ifnet */
4898 ifp = ifnet_detaching_dequeue();
316670eb
A
4899 if (ifp != NULL) {
4900 dlil_if_unlock();
6d2010ae 4901 ifnet_detach_final(ifp);
316670eb
A
4902 dlil_if_lock();
4903 }
55e303ae 4904 }
316670eb
A
4905 /* NOTREACHED */
4906 return (0);
4907}
4908
4909static void
4910ifnet_detacher_thread_func(void *v, wait_result_t w)
4911{
4912#pragma unused(v, w)
4913 dlil_if_lock();
4914 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
4915 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
4916 /*
4917 * msleep0() shouldn't have returned as PCATCH was not set;
4918 * therefore assert in this case.
4919 */
4920 dlil_if_unlock();
4921 VERIFY(0);
6d2010ae 4922}
b0d623f7 4923
6d2010ae
A
4924static void
4925ifnet_detach_final(struct ifnet *ifp)
4926{
4927 struct ifnet_filter *filter, *filter_next;
4928 struct ifnet_filter_head fhead;
316670eb 4929 struct dlil_threading_info *inp;
6d2010ae
A
4930 struct ifaddr *ifa;
4931 ifnet_detached_func if_free;
4932 int i;
4933
4934 lck_mtx_lock(&ifp->if_ref_lock);
4935 if (!(ifp->if_refflags & IFRF_DETACHING)) {
4936 panic("%s: flags mismatch (detaching not set) ifp=%p",
4937 __func__, ifp);
4938 /* NOTREACHED */
4939 }
4940
316670eb
A
4941 /*
4942 * Wait until the existing IO references get released
4943 * before we proceed with ifnet_detach. This is not a
4944 * common case, so block without using a continuation.
b0d623f7 4945 */
6d2010ae
A
4946 while (ifp->if_refio > 0) {
4947 printf("%s: Waiting for IO references on %s%d interface "
4948 "to be released\n", __func__, ifp->if_name, ifp->if_unit);
4949 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
4950 (PZERO - 1), "ifnet_ioref_wait", NULL);
4951 }
4952 lck_mtx_unlock(&ifp->if_ref_lock);
4953
4954 /* Detach interface filters */
4955 lck_mtx_lock(&ifp->if_flt_lock);
4956 if_flt_monitor_enter(ifp);
b0d623f7 4957
6d2010ae 4958 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
91447636
A
4959 fhead = ifp->if_flt_head;
4960 TAILQ_INIT(&ifp->if_flt_head);
2d21ac55 4961
6d2010ae
A
4962 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
4963 filter_next = TAILQ_NEXT(filter, filt_next);
4964 lck_mtx_unlock(&ifp->if_flt_lock);
4965
4966 dlil_detach_filter_internal(filter, 1);
4967 lck_mtx_lock(&ifp->if_flt_lock);
4968 }
4969 if_flt_monitor_leave(ifp);
4970 lck_mtx_unlock(&ifp->if_flt_lock);
4971
4972 /* Tell upper layers to drop their network addresses */
4973 if_purgeaddrs(ifp);
4974
4975 ifnet_lock_exclusive(ifp);
4976
4977 /* Uplumb all protocols */
4978 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
4979 struct if_proto *proto;
4980
4981 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
4982 while (proto != NULL) {
4983 protocol_family_t family = proto->protocol_family;
4984 ifnet_lock_done(ifp);
4985 proto_unplumb(family, ifp);
4986 ifnet_lock_exclusive(ifp);
4987 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
4988 }
4989 /* There should not be any protocols left */
4990 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
4991 }
4992 zfree(dlif_phash_zone, ifp->if_proto_hash);
4993 ifp->if_proto_hash = NULL;
4994
4995 /* Detach (permanent) link address from if_addrhead */
4996 ifa = TAILQ_FIRST(&ifp->if_addrhead);
4997 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
4998 IFA_LOCK(ifa);
4999 if_detach_link_ifa(ifp, ifa);
5000 IFA_UNLOCK(ifa);
5001
5002 /* Remove (permanent) link address from ifnet_addrs[] */
5003 IFA_REMREF(ifa);
5004 ifnet_addrs[ifp->if_index - 1] = NULL;
5005
5006 /* This interface should not be on {ifnet_head,detaching} */
5007 VERIFY(ifp->if_link.tqe_next == NULL);
5008 VERIFY(ifp->if_link.tqe_prev == NULL);
5009 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5010 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5011
5012 /* Prefix list should be empty by now */
5013 VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
5014
5015 /* The slot should have been emptied */
5016 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5017
5018 /* There should not be any addresses left */
5019 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
1c79356b 5020
316670eb
A
5021 /*
5022 * Signal the starter thread to terminate itself.
5023 */
5024 if (ifp->if_start_thread != THREAD_NULL) {
5025 lck_mtx_lock_spin(&ifp->if_start_lock);
5026 ifp->if_start_thread = THREAD_NULL;
5027 wakeup_one((caddr_t)&ifp->if_start_thread);
5028 lck_mtx_unlock(&ifp->if_start_lock);
5029 }
5030
5031 /*
5032 * Signal the poller thread to terminate itself.
5033 */
5034 if (ifp->if_poll_thread != THREAD_NULL) {
5035 lck_mtx_lock_spin(&ifp->if_poll_lock);
5036 ifp->if_poll_thread = THREAD_NULL;
5037 wakeup_one((caddr_t)&ifp->if_poll_thread);
5038 lck_mtx_unlock(&ifp->if_poll_lock);
5039 }
5040
2d21ac55
A
5041 /*
5042 * If thread affinity was set for the workloop thread, we will need
5043 * to tear down the affinity and release the extra reference count
316670eb
A
5044 * taken at attach time. Does not apply to lo0 or other interfaces
5045 * without dedicated input threads.
2d21ac55 5046 */
316670eb
A
5047 if ((inp = ifp->if_inp) != NULL) {
5048 VERIFY(inp != dlil_main_input_thread);
5049
5050 if (inp->net_affinity) {
5051 struct thread *tp, *wtp, *ptp;
5052
5053 lck_mtx_lock_spin(&inp->input_lck);
5054 wtp = inp->wloop_thr;
5055 inp->wloop_thr = THREAD_NULL;
5056 ptp = inp->poll_thr;
5057 inp->poll_thr = THREAD_NULL;
5058 tp = inp->input_thr; /* don't nullify now */
5059 inp->tag = 0;
5060 inp->net_affinity = FALSE;
5061 lck_mtx_unlock(&inp->input_lck);
5062
5063 /* Tear down poll thread affinity */
5064 if (ptp != NULL) {
5065 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
5066 (void) dlil_affinity_set(ptp,
5067 THREAD_AFFINITY_TAG_NULL);
5068 thread_deallocate(ptp);
6d2010ae 5069 }
2d21ac55 5070
2d21ac55 5071 /* Tear down workloop thread affinity */
316670eb
A
5072 if (wtp != NULL) {
5073 (void) dlil_affinity_set(wtp,
2d21ac55 5074 THREAD_AFFINITY_TAG_NULL);
316670eb 5075 thread_deallocate(wtp);
2d21ac55 5076 }
1c79356b 5077
316670eb 5078 /* Tear down DLIL input thread affinity */
2d21ac55
A
5079 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
5080 thread_deallocate(tp);
9bccf70c 5081 }
1c79356b 5082
316670eb
A
5083 /* disassociate ifp DLIL input thread */
5084 ifp->if_inp = NULL;
6d2010ae 5085
316670eb
A
5086 lck_mtx_lock_spin(&inp->input_lck);
5087 inp->input_waiting |= DLIL_INPUT_TERMINATE;
5088 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
5089 wakeup_one((caddr_t)&inp->input_waiting);
91447636 5090 }
316670eb 5091 lck_mtx_unlock(&inp->input_lck);
55e303ae 5092 }
6d2010ae
A
5093
5094 /* The driver might unload, so point these to ourselves */
5095 if_free = ifp->if_free;
5096 ifp->if_output = ifp_if_output;
316670eb
A
5097 ifp->if_pre_enqueue = ifp_if_output;
5098 ifp->if_start = ifp_if_start;
5099 ifp->if_output_ctl = ifp_if_ctl;
5100 ifp->if_input_poll = ifp_if_input_poll;
5101 ifp->if_input_ctl = ifp_if_ctl;
6d2010ae
A
5102 ifp->if_ioctl = ifp_if_ioctl;
5103 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
5104 ifp->if_free = ifp_if_free;
5105 ifp->if_demux = ifp_if_demux;
5106 ifp->if_event = ifp_if_event;
5107 ifp->if_framer = ifp_if_framer;
5108 ifp->if_add_proto = ifp_if_add_proto;
5109 ifp->if_del_proto = ifp_if_del_proto;
5110 ifp->if_check_multi = ifp_if_check_multi;
5111
316670eb
A
5112 /* wipe out interface description */
5113 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5114 ifp->if_desc.ifd_len = 0;
5115 VERIFY(ifp->if_desc.ifd_desc != NULL);
5116 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
5117
6d2010ae
A
5118 ifnet_lock_done(ifp);
5119
5120#if PF
5121 /*
5122 * Detach this interface from packet filter, if enabled.
5123 */
5124 pf_ifnet_hook(ifp, 0);
5125#endif /* PF */
5126
5127 /* Filter list should be empty */
5128 lck_mtx_lock_spin(&ifp->if_flt_lock);
5129 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
5130 VERIFY(ifp->if_flt_busy == 0);
5131 VERIFY(ifp->if_flt_waiters == 0);
5132 lck_mtx_unlock(&ifp->if_flt_lock);
5133
316670eb
A
5134 /* Last chance to drain send queue */
5135 if_qflush(ifp, 0);
5136
6d2010ae
A
5137 /* Last chance to cleanup any cached route */
5138 lck_mtx_lock(&ifp->if_cached_route_lock);
5139 VERIFY(!ifp->if_fwd_cacheok);
5140 if (ifp->if_fwd_route.ro_rt != NULL)
b0d623f7 5141 rtfree(ifp->if_fwd_route.ro_rt);
6d2010ae
A
5142 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
5143 if (ifp->if_src_route.ro_rt != NULL)
5144 rtfree(ifp->if_src_route.ro_rt);
5145 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
5146 if (ifp->if_src_route6.ro_rt != NULL)
5147 rtfree(ifp->if_src_route6.ro_rt);
5148 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
5149 lck_mtx_unlock(&ifp->if_cached_route_lock);
5150
5151 ifnet_llreach_ifdetach(ifp);
5152
5153 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
5154
5155 if (if_free != NULL)
5156 if_free(ifp);
5157
5158 /*
5159 * Finally, mark this ifnet as detached.
5160 */
5161 lck_mtx_lock_spin(&ifp->if_ref_lock);
5162 if (!(ifp->if_refflags & IFRF_DETACHING)) {
5163 panic("%s: flags mismatch (detaching not set) ifp=%p",
5164 __func__, ifp);
5165 /* NOTREACHED */
55e303ae 5166 }
6d2010ae
A
5167 ifp->if_refflags &= ~IFRF_DETACHING;
5168 lck_mtx_unlock(&ifp->if_ref_lock);
5169
5170 if (dlil_verbose)
5171 printf("%s%d: detached\n", ifp->if_name, ifp->if_unit);
5172
5173 /* Release reference held during ifnet attach */
5174 ifnet_release(ifp);
1c79356b 5175}
9bccf70c 5176
91447636 5177static errno_t
6d2010ae 5178ifp_if_output(struct ifnet *ifp, struct mbuf *m)
9bccf70c 5179{
6d2010ae
A
5180#pragma unused(ifp)
5181 m_freem(m);
5182 return (0);
9bccf70c
A
5183}
5184
316670eb
A
5185static void
5186ifp_if_start(struct ifnet *ifp)
5187{
5188 ifnet_purge(ifp);
5189}
5190
5191static void
5192ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
5193 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
5194{
5195#pragma unused(ifp, flags, max_cnt)
5196 if (m_head != NULL)
5197 *m_head = NULL;
5198 if (m_tail != NULL)
5199 *m_tail = NULL;
5200 if (cnt != NULL)
5201 *cnt = 0;
5202 if (len != NULL)
5203 *len = 0;
5204}
5205
5206static errno_t
5207ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
5208{
5209#pragma unused(ifp, cmd, arglen, arg)
5210 return (EOPNOTSUPP);
5211}
5212
6d2010ae
A
5213static errno_t
5214ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
9bccf70c 5215{
6d2010ae
A
5216#pragma unused(ifp, fh, pf)
5217 m_freem(m);
5218 return (EJUSTRETURN);
9bccf70c
A
5219}
5220
6d2010ae
A
5221static errno_t
5222ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
5223 const struct ifnet_demux_desc *da, u_int32_t dc)
9bccf70c 5224{
6d2010ae
A
5225#pragma unused(ifp, pf, da, dc)
5226 return (EINVAL);
9bccf70c
A
5227}
5228
91447636 5229static errno_t
6d2010ae 5230ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
9bccf70c 5231{
6d2010ae
A
5232#pragma unused(ifp, pf)
5233 return (EINVAL);
5234}
5235
5236static errno_t
5237ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
5238{
5239#pragma unused(ifp, sa)
5240 return (EOPNOTSUPP);
5241}
5242
316670eb
A
5243static errno_t ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
5244const struct sockaddr *sa, const char *ll, const char *t
5245#if CONFIG_EMBEDDED
5246 ,
5247 u_int32_t *pre, u_int32_t *post
5248#endif /* CONFIG_EMBEDDED */
5249 )
6d2010ae
A
5250{
5251#pragma unused(ifp, m, sa, ll, t)
5252 m_freem(*m);
5253 *m = NULL;
316670eb
A
5254#if CONFIG_EMBEDDED
5255 *pre = 0;
5256 *post = 0;
5257#endif /* CONFIG_EMBEDDED */
6d2010ae
A
5258 return (EJUSTRETURN);
5259}
5260
316670eb 5261errno_t
6d2010ae
A
5262ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
5263{
5264#pragma unused(ifp, cmd, arg)
5265 return (EOPNOTSUPP);
5266}
5267
5268static errno_t
5269ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
5270{
5271#pragma unused(ifp, tm, f)
5272 /* XXX not sure what to do here */
5273 return (0);
5274}
5275
5276static void
5277ifp_if_free(struct ifnet *ifp)
5278{
5279#pragma unused(ifp)
5280}
5281
5282static void
5283ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
5284{
5285#pragma unused(ifp, e)
9bccf70c
A
5286}
5287
2d21ac55 5288__private_extern__
6d2010ae
A
5289int dlil_if_acquire(u_int32_t family, const void *uniqueid,
5290 size_t uniqueid_len, struct ifnet **ifp)
5291{
5292 struct ifnet *ifp1 = NULL;
5293 struct dlil_ifnet *dlifp1 = NULL;
5294 void *buf, *base, **pbuf;
5295 int ret = 0;
5296
7ddcb079 5297 dlil_if_lock();
6d2010ae
A
5298 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
5299 ifp1 = (struct ifnet *)dlifp1;
5300
5301 if (ifp1->if_family != family)
5302 continue;
5303
5304 lck_mtx_lock(&dlifp1->dl_if_lock);
5305 /* same uniqueid and same len or no unique id specified */
5306 if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) &&
5307 !bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len)) {
5308 /* check for matching interface in use */
5309 if (dlifp1->dl_if_flags & DLIF_INUSE) {
5310 if (uniqueid_len) {
5311 ret = EBUSY;
5312 lck_mtx_unlock(&dlifp1->dl_if_lock);
9bccf70c 5313 goto end;
6d2010ae
A
5314 }
5315 } else {
5316 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
5317 lck_mtx_unlock(&dlifp1->dl_if_lock);
5318 *ifp = ifp1;
5319 goto end;
5320 }
5321 }
5322 lck_mtx_unlock(&dlifp1->dl_if_lock);
5323 }
5324
5325 /* no interface found, allocate a new one */
5326 buf = zalloc(dlif_zone);
5327 if (buf == NULL) {
5328 ret = ENOMEM;
5329 goto end;
5330 }
5331 bzero(buf, dlif_bufsize);
5332
5333 /* Get the 64-bit aligned base address for this object */
5334 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
5335 sizeof (u_int64_t));
5336 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
5337
5338 /*
5339 * Wind back a pointer size from the aligned base and
5340 * save the original address so we can free it later.
5341 */
5342 pbuf = (void **)((intptr_t)base - sizeof (void *));
5343 *pbuf = buf;
5344 dlifp1 = base;
5345
5346 if (uniqueid_len) {
5347 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
5348 M_NKE, M_WAITOK);
5349 if (dlifp1->dl_if_uniqueid == NULL) {
5350 zfree(dlif_zone, dlifp1);
5351 ret = ENOMEM;
5352 goto end;
5353 }
5354 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
5355 dlifp1->dl_if_uniqueid_len = uniqueid_len;
5356 }
5357
5358 ifp1 = (struct ifnet *)dlifp1;
5359 dlifp1->dl_if_flags = DLIF_INUSE;
5360 if (ifnet_debug) {
5361 dlifp1->dl_if_flags |= DLIF_DEBUG;
5362 dlifp1->dl_if_trace = dlil_if_trace;
5363 }
5364 ifp1->if_name = dlifp1->dl_if_namestorage;
316670eb
A
5365
5366 /* initialize interface description */
5367 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
5368 ifp1->if_desc.ifd_len = 0;
5369 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
5370
2d21ac55 5371#if CONFIG_MACF_NET
6d2010ae 5372 mac_ifnet_label_init(ifp1);
2d21ac55 5373#endif
9bccf70c 5374
316670eb
A
5375 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
5376 DLIL_PRINTF("%s: failed to allocate if local stats, "
5377 "error: %d\n", __func__, ret);
5378 /* This probably shouldn't be fatal */
5379 ret = 0;
5380 }
5381
6d2010ae
A
5382 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
5383 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
5384 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
5385 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6d2010ae
A
5386 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
5387 ifnet_lock_attr);
5388 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
5389
316670eb
A
5390 /* for send data paths */
5391 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
5392 ifnet_lock_attr);
5393 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
5394 ifnet_lock_attr);
5395 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
5396 ifnet_lock_attr);
5397
5398 /* for receive data paths */
5399 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
5400 ifnet_lock_attr);
5401
6d2010ae
A
5402 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
5403
5404 *ifp = ifp1;
9bccf70c
A
5405
5406end:
7ddcb079 5407 dlil_if_unlock();
9bccf70c 5408
6d2010ae
A
5409 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
5410 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
5411
5412 return (ret);
9bccf70c
A
5413}
5414
2d21ac55 5415__private_extern__ void
6d2010ae
A
5416dlil_if_release(ifnet_t ifp)
5417{
5418 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
5419
5420 ifnet_lock_exclusive(ifp);
5421 lck_mtx_lock(&dlifp->dl_if_lock);
5422 dlifp->dl_if_flags &= ~DLIF_INUSE;
5423 strncpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
5424 ifp->if_name = dlifp->dl_if_namestorage;
5425 lck_mtx_unlock(&dlifp->dl_if_lock);
2d21ac55 5426#if CONFIG_MACF_NET
6d2010ae
A
5427 /*
5428 * We can either recycle the MAC label here or in dlil_if_acquire().
5429 * It seems logical to do it here but this means that anything that
5430 * still has a handle on ifp will now see it as unlabeled.
5431 * Since the interface is "dead" that may be OK. Revisit later.
5432 */
5433 mac_ifnet_label_recycle(ifp);
2d21ac55 5434#endif
6d2010ae 5435 ifnet_lock_done(ifp);
9bccf70c 5436}
4a3eedf9 5437
7ddcb079
A
5438__private_extern__ void
5439dlil_if_lock(void)
5440{
5441 lck_mtx_lock(&dlil_ifnet_lock);
5442}
5443
5444__private_extern__ void
5445dlil_if_unlock(void)
5446{
5447 lck_mtx_unlock(&dlil_ifnet_lock);
5448}
5449
5450__private_extern__ void
5451dlil_if_lock_assert(void)
5452{
5453 lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
5454}
5455
4a3eedf9
A
5456__private_extern__ void
5457dlil_proto_unplumb_all(struct ifnet *ifp)
5458{
5459 /*
5460 * if_proto_hash[0-3] are for PF_INET, PF_INET6, PF_APPLETALK
5461 * and PF_VLAN, where each bucket contains exactly one entry;
5462 * PF_VLAN does not need an explicit unplumb.
5463 *
5464 * if_proto_hash[4] is for other protocols; we expect anything
5465 * in this bucket to respond to the DETACHING event (which would
5466 * have happened by now) and do the unplumb then.
5467 */
5468 (void) proto_unplumb(PF_INET, ifp);
5469#if INET6
5470 (void) proto_unplumb(PF_INET6, ifp);
5471#endif /* INET6 */
5472#if NETAT
5473 (void) proto_unplumb(PF_APPLETALK, ifp);
5474#endif /* NETAT */
5475}
6d2010ae
A
5476
5477static void
5478ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
5479{
5480 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
5481 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
5482
5483 route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
5484
5485 lck_mtx_unlock(&ifp->if_cached_route_lock);
5486}
5487
5488static void
5489ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
5490{
5491 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
5492 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
5493
5494 if (ifp->if_fwd_cacheok) {
5495 route_copyin(src, &ifp->if_src_route, sizeof (*src));
5496 } else {
5497 rtfree(src->ro_rt);
5498 src->ro_rt = NULL;
5499 }
5500 lck_mtx_unlock(&ifp->if_cached_route_lock);
5501}
5502
5503#if INET6
5504static void
5505ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
5506{
5507 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
5508 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
5509
5510 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
5511 sizeof (*dst));
5512
5513 lck_mtx_unlock(&ifp->if_cached_route_lock);
5514}
5515
5516static void
5517ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
5518{
5519 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
5520 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
5521
5522 if (ifp->if_fwd_cacheok) {
5523 route_copyin((struct route *)src,
5524 (struct route *)&ifp->if_src_route6, sizeof (*src));
5525 } else {
5526 rtfree(src->ro_rt);
5527 src->ro_rt = NULL;
5528 }
5529 lck_mtx_unlock(&ifp->if_cached_route_lock);
5530}
5531#endif /* INET6 */
5532
5533struct rtentry *
5534ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
5535{
5536 struct route src_rt;
316670eb
A
5537 struct sockaddr_in *dst;
5538
5539 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
6d2010ae
A
5540
5541 ifp_src_route_copyout(ifp, &src_rt);
5542
5543 if (src_rt.ro_rt == NULL || !(src_rt.ro_rt->rt_flags & RTF_UP) ||
5544 src_ip.s_addr != dst->sin_addr.s_addr ||
5545 src_rt.ro_rt->generation_id != route_generation) {
5546 if (src_rt.ro_rt != NULL) {
5547 rtfree(src_rt.ro_rt);
5548 src_rt.ro_rt = NULL;
5549 } else if (dst->sin_family != AF_INET) {
5550 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
5551 dst->sin_len = sizeof (src_rt.ro_dst);
5552 dst->sin_family = AF_INET;
5553 }
5554 dst->sin_addr = src_ip;
5555
5556 if (src_rt.ro_rt == NULL) {
5557 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
5558 0, 0, ifp->if_index);
5559
5560 if (src_rt.ro_rt != NULL) {
5561 /* retain a ref, copyin consumes one */
5562 struct rtentry *rte = src_rt.ro_rt;
5563 RT_ADDREF(rte);
5564 ifp_src_route_copyin(ifp, &src_rt);
5565 src_rt.ro_rt = rte;
5566 }
5567 }
5568 }
5569
5570 return (src_rt.ro_rt);
5571}
5572
5573#if INET6
5574struct rtentry*
5575ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
5576{
5577 struct route_in6 src_rt;
5578
5579 ifp_src_route6_copyout(ifp, &src_rt);
5580
5581 if (src_rt.ro_rt == NULL || !(src_rt.ro_rt->rt_flags & RTF_UP) ||
5582 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr) ||
5583 src_rt.ro_rt->generation_id != route_generation) {
5584 if (src_rt.ro_rt != NULL) {
5585 rtfree(src_rt.ro_rt);
5586 src_rt.ro_rt = NULL;
5587 } else if (src_rt.ro_dst.sin6_family != AF_INET6) {
5588 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
5589 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
5590 src_rt.ro_dst.sin6_family = AF_INET6;
5591 }
5592 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
316670eb
A
5593 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
5594 sizeof (src_rt.ro_dst.sin6_addr));
6d2010ae
A
5595
5596 if (src_rt.ro_rt == NULL) {
5597 src_rt.ro_rt = rtalloc1_scoped(
5598 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
5599 ifp->if_index);
5600
5601 if (src_rt.ro_rt != NULL) {
5602 /* retain a ref, copyin consumes one */
5603 struct rtentry *rte = src_rt.ro_rt;
5604 RT_ADDREF(rte);
5605 ifp_src_route6_copyin(ifp, &src_rt);
5606 src_rt.ro_rt = rte;
5607 }
5608 }
5609 }
5610
5611 return (src_rt.ro_rt);
5612}
5613#endif /* INET6 */
316670eb
A
5614
5615void
5616if_lqm_update(struct ifnet *ifp, int lqm)
5617{
5618 struct kev_dl_link_quality_metric_data ev_lqm_data;
5619
5620 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
5621
5622 /* Normalize to edge */
5623 if (lqm > IFNET_LQM_THRESH_UNKNOWN && lqm <= IFNET_LQM_THRESH_POOR)
5624 lqm = IFNET_LQM_THRESH_POOR;
5625 else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD)
5626 lqm = IFNET_LQM_THRESH_GOOD;
5627
5628 ifnet_lock_exclusive(ifp);
5629 if (lqm == ifp->if_lqm) {
5630 ifnet_lock_done(ifp);
5631 return; /* nothing to update */
5632 }
5633 ifp->if_lqm = lqm;
5634 ifnet_lock_done(ifp);
5635
5636 bzero(&ev_lqm_data, sizeof (ev_lqm_data));
5637 ev_lqm_data.link_quality_metric = lqm;
5638
5639 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
5640 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
5641}
5642
5643/* for uuid.c */
5644int
5645uuid_get_ethernet(u_int8_t *node)
5646{
5647 struct ifnet *ifp;
5648 struct sockaddr_dl *sdl;
5649
5650 ifnet_head_lock_shared();
5651 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
5652 ifnet_lock_shared(ifp);
5653 IFA_LOCK_SPIN(ifp->if_lladdr);
5654 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
5655 if (sdl->sdl_type == IFT_ETHER) {
5656 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
5657 IFA_UNLOCK(ifp->if_lladdr);
5658 ifnet_lock_done(ifp);
5659 ifnet_head_done();
5660 return (0);
5661 }
5662 IFA_UNLOCK(ifp->if_lladdr);
5663 ifnet_lock_done(ifp);
5664 }
5665 ifnet_head_done();
5666
5667 return (-1);
5668}
5669
5670static int
5671sysctl_rxpoll SYSCTL_HANDLER_ARGS
5672{
5673#pragma unused(arg1, arg2)
5674 int i, err;
5675
5676 i = if_rxpoll;
5677
5678 err = sysctl_handle_int(oidp, &i, 0, req);
5679 if (err != 0 || req->newptr == USER_ADDR_NULL)
5680 return (err);
5681
5682 if (net_rxpoll == 0)
5683 return (ENXIO);
5684
5685 if_rxpoll = i;
5686 return (err);
5687}
5688
5689static int
5690sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
5691{
5692#pragma unused(arg1, arg2)
5693 int i, err;
5694
5695 i = if_sndq_maxlen;
5696
5697 err = sysctl_handle_int(oidp, &i, 0, req);
5698 if (err != 0 || req->newptr == USER_ADDR_NULL)
5699 return (err);
5700
5701 if (i < IF_SNDQ_MINLEN)
5702 i = IF_SNDQ_MINLEN;
5703
5704 if_sndq_maxlen = i;
5705 return (err);
5706}
5707
5708static int
5709sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
5710{
5711#pragma unused(arg1, arg2)
5712 int i, err;
5713
5714 i = if_rcvq_maxlen;
5715
5716 err = sysctl_handle_int(oidp, &i, 0, req);
5717 if (err != 0 || req->newptr == USER_ADDR_NULL)
5718 return (err);
5719
5720 if (i < IF_RCVQ_MINLEN)
5721 i = IF_RCVQ_MINLEN;
5722
5723 if_rcvq_maxlen = i;
5724 return (err);
5725}
5726
5727void
5728ifnet_fclist_append(struct sfb *sp, struct sfb_fc_list *fcl)
5729{
5730 struct sfb_bin_fcentry *fce, *tfce;
5731
5732 lck_mtx_lock_spin(&ifnet_fclist_lock);
5733
5734 SLIST_FOREACH_SAFE(fce, fcl, fce_link, tfce) {
5735 SLIST_REMOVE(fcl, fce, sfb_bin_fcentry, fce_link);
5736 SLIST_INSERT_HEAD(&ifnet_fclist, fce, fce_link);
5737 sp->sfb_stats.flow_feedback++;
5738 }
5739 VERIFY(SLIST_EMPTY(fcl) && !SLIST_EMPTY(&ifnet_fclist));
5740
5741 wakeup(&ifnet_fclist);
5742
5743 lck_mtx_unlock(&ifnet_fclist_lock);
5744}
5745
5746struct sfb_bin_fcentry *
5747ifnet_fce_alloc(int how)
5748{
5749 struct sfb_bin_fcentry *fce;
5750
5751 fce = (how == M_WAITOK) ? zalloc(ifnet_fcezone) :
5752 zalloc_noblock(ifnet_fcezone);
5753 if (fce != NULL)
5754 bzero(fce, ifnet_fcezone_size);
5755
5756 return (fce);
5757}
5758
5759void
5760ifnet_fce_free(struct sfb_bin_fcentry *fce)
5761{
5762 zfree(ifnet_fcezone, fce);
5763}
5764
5765static void
5766ifnet_fc_init(void)
5767{
5768 thread_t thread = THREAD_NULL;
5769
5770 SLIST_INIT(&ifnet_fclist);
5771 lck_mtx_init(&ifnet_fclist_lock, ifnet_snd_lock_group, NULL);
5772
5773 ifnet_fcezone_size = P2ROUNDUP(sizeof (struct sfb_bin_fcentry),
5774 sizeof (u_int64_t));
5775 ifnet_fcezone = zinit(ifnet_fcezone_size,
5776 IFNET_FCEZONE_MAX * ifnet_fcezone_size, 0, IFNET_FCEZONE_NAME);
5777 if (ifnet_fcezone == NULL) {
5778 panic("%s: failed allocating %s", __func__, IFNET_FCEZONE_NAME);
5779 /* NOTREACHED */
5780 }
5781 zone_change(ifnet_fcezone, Z_EXPAND, TRUE);
5782 zone_change(ifnet_fcezone, Z_CALLERACCT, FALSE);
5783
5784 if (kernel_thread_start(ifnet_fc_thread_func,
5785 NULL, &thread) != KERN_SUCCESS) {
5786 panic("%s: couldn't create flow event advisory thread",
5787 __func__);
5788 /* NOTREACHED */
5789 }
5790 thread_deallocate(thread);
5791}
5792
5793static int
5794ifnet_fc_thread_cont(int err)
5795{
5796#pragma unused(err)
5797 struct sfb_bin_fcentry *fce;
5798 struct inp_fc_entry *infc;
5799
5800 for (;;) {
5801 lck_mtx_assert(&ifnet_fclist_lock, LCK_MTX_ASSERT_OWNED);
5802 while (SLIST_EMPTY(&ifnet_fclist)) {
5803 (void) msleep0(&ifnet_fclist, &ifnet_fclist_lock,
5804 (PSOCK | PSPIN), "ifnet_fc_cont", 0,
5805 ifnet_fc_thread_cont);
5806 /* NOTREACHED */
5807 }
5808
5809 fce = SLIST_FIRST(&ifnet_fclist);
5810 SLIST_REMOVE(&ifnet_fclist, fce, sfb_bin_fcentry, fce_link);
5811 SLIST_NEXT(fce, fce_link) = NULL;
5812 lck_mtx_unlock(&ifnet_fclist_lock);
5813
5814 infc = inp_fc_getinp(fce->fce_flowhash);
5815 if (infc == NULL) {
5816 ifnet_fce_free(fce);
5817 lck_mtx_lock_spin(&ifnet_fclist_lock);
5818 continue;
5819 }
5820 VERIFY(infc->infc_inp != NULL);
5821
5822 inp_fc_feedback(infc->infc_inp);
5823
5824 inp_fc_entry_free(infc);
5825 ifnet_fce_free(fce);
5826 lck_mtx_lock_spin(&ifnet_fclist_lock);
5827 }
5828}
5829
5830static void
5831ifnet_fc_thread_func(void *v, wait_result_t w)
5832{
5833#pragma unused(v, w)
5834 lck_mtx_lock(&ifnet_fclist_lock);
5835 (void) msleep0(&ifnet_fclist, &ifnet_fclist_lock,
5836 (PSOCK | PSPIN), "ifnet_fc", 0, ifnet_fc_thread_cont);
5837 /*
5838 * msleep0() shouldn't have returned as PCATCH was not set;
5839 * therefore assert in this case.
5840 */
5841 lck_mtx_unlock(&ifnet_fclist_lock);
5842 VERIFY(0);
5843}
5844
5845void
5846dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
5847 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
5848{
5849 struct kev_dl_node_presence kev;
5850 struct sockaddr_dl *sdl;
5851 struct sockaddr_in6 *sin6;
5852
5853 VERIFY(ifp);
5854 VERIFY(sa);
5855 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
5856
5857 bzero(&kev, sizeof (kev));
5858 sin6 = &kev.sin6_node_address;
5859 sdl = &kev.sdl_node_address;
5860 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
5861 kev.rssi = rssi;
5862 kev.link_quality_metric = lqm;
5863 kev.node_proximity_metric = npm;
5864 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
5865
5866 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
5867 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
5868 &kev.link_data, sizeof (kev));
5869}
5870
5871void
5872dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
5873{
5874 struct kev_dl_node_absence kev;
5875 struct sockaddr_in6 *sin6;
5876 struct sockaddr_dl *sdl;
5877
5878 VERIFY(ifp);
5879 VERIFY(sa);
5880 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
5881
5882 bzero(&kev, sizeof (kev));
5883 sin6 = &kev.sin6_node_address;
5884 sdl = &kev.sdl_node_address;
5885 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
5886
5887 nd6_alt_node_absent(ifp, sin6);
5888 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
5889 &kev.link_data, sizeof (kev));
5890}
5891
5892errno_t
5893ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
5894 struct proc *p)
5895{
5896 u_int32_t level = IFNET_THROTTLE_OFF;
5897 errno_t result = 0;
5898
5899 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
5900
5901 if (cmd == SIOCSIFOPPORTUNISTIC) {
5902 /*
5903 * XXX: Use priv_check_cred() instead of root check?
5904 */
5905 if ((result = proc_suser(p)) != 0)
5906 return (result);
5907
5908 if (ifr->ifr_opportunistic.ifo_flags ==
5909 IFRIFOF_BLOCK_OPPORTUNISTIC)
5910 level = IFNET_THROTTLE_OPPORTUNISTIC;
5911 else if (ifr->ifr_opportunistic.ifo_flags == 0)
5912 level = IFNET_THROTTLE_OFF;
5913 else
5914 result = EINVAL;
5915
5916 if (result == 0)
5917 result = ifnet_set_throttle(ifp, level);
5918 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
5919 ifr->ifr_opportunistic.ifo_flags = 0;
5920 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
5921 ifr->ifr_opportunistic.ifo_flags |=
5922 IFRIFOF_BLOCK_OPPORTUNISTIC;
5923 }
5924 }
5925
5926 /*
5927 * Return the count of current opportunistic connections
5928 * over the interface.
5929 */
5930 if (result == 0) {
5931 uint32_t flags = 0;
5932 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
5933 INPCB_OPPORTUNISTIC_SETCMD : 0;
5934 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
5935 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
5936 ifr->ifr_opportunistic.ifo_inuse =
5937 udp_count_opportunistic(ifp->if_index, flags) +
5938 tcp_count_opportunistic(ifp->if_index, flags);
5939 }
5940
5941 if (result == EALREADY)
5942 result = 0;
5943
5944 return (result);
5945}
5946
5947int
5948ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
5949{
5950 struct ifclassq *ifq;
5951 int err = 0;
5952
5953 if (!(ifp->if_eflags & IFEF_TXSTART))
5954 return (ENXIO);
5955
5956 *level = IFNET_THROTTLE_OFF;
5957
5958 ifq = &ifp->if_snd;
5959 IFCQ_LOCK(ifq);
5960 /* Throttling works only for IFCQ, not ALTQ instances */
5961 if (IFCQ_IS_ENABLED(ifq))
5962 IFCQ_GET_THROTTLE(ifq, *level, err);
5963 IFCQ_UNLOCK(ifq);
5964
5965 return (err);
5966}
5967
5968int
5969ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
5970{
5971 struct ifclassq *ifq;
5972 int err = 0;
5973
5974 if (!(ifp->if_eflags & IFEF_TXSTART))
5975 return (ENXIO);
5976
5977 switch (level) {
5978 case IFNET_THROTTLE_OFF:
5979 case IFNET_THROTTLE_OPPORTUNISTIC:
5980#if PF_ALTQ
5981 /* Throttling works only for IFCQ, not ALTQ instances */
5982 if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
5983 return (ENXIO);
5984#endif /* PF_ALTQ */
5985 break;
5986 default:
5987 return (EINVAL);
5988 }
5989
5990 ifq = &ifp->if_snd;
5991 IFCQ_LOCK(ifq);
5992 if (IFCQ_IS_ENABLED(ifq))
5993 IFCQ_SET_THROTTLE(ifq, level, err);
5994 IFCQ_UNLOCK(ifq);
5995
5996 if (err == 0) {
5997 printf("%s%d: throttling level set to %d\n", ifp->if_name,
5998 ifp->if_unit, level);
5999 if (level == IFNET_THROTTLE_OFF)
6000 ifnet_start(ifp);
6001 }
6002
6003 return (err);
6004}