2 * Copyright (c) 1999-2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_ipsec.h>
71 #include <net/if_llreach.h>
72 #include <net/if_utun.h>
73 #include <net/kpi_interfacefilter.h>
74 #include <net/classq/classq.h>
75 #include <net/classq/classq_sfb.h>
76 #include <net/flowhash.h>
77 #include <net/ntstat.h>
78 #include <net/if_llatbl.h>
79 #include <net/net_api_stats.h>
80 #include <net/if_ports_used.h>
83 #include <netinet/in_var.h>
84 #include <netinet/igmp_var.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/tcp.h>
87 #include <netinet/tcp_var.h>
88 #include <netinet/udp.h>
89 #include <netinet/udp_var.h>
90 #include <netinet/if_ether.h>
91 #include <netinet/in_pcb.h>
92 #include <netinet/in_tclass.h>
96 #include <netinet6/in6_var.h>
97 #include <netinet6/nd6.h>
98 #include <netinet6/mld6_var.h>
99 #include <netinet6/scope6_var.h>
102 #include <libkern/OSAtomic.h>
103 #include <libkern/tree.h>
105 #include <dev/random/randomdev.h>
106 #include <machine/machine_routines.h>
108 #include <mach/thread_act.h>
109 #include <mach/sdt.h>
112 #include <sys/kauth.h>
113 #include <security/mac_framework.h>
114 #include <net/ethernet.h>
115 #include <net/firewire.h>
119 #include <net/pfvar.h>
121 #include <net/pktsched/pktsched.h>
124 #include <net/necp.h>
128 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
129 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
130 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
131 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
132 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
134 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
135 #define MAX_LINKADDR 4 /* LONGWORDS */
136 #define M_NKE M_IFADDR
139 #define DLIL_PRINTF printf
141 #define DLIL_PRINTF kprintf
144 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
145 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
147 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
148 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
156 * List of if_proto structures in if_proto_hash[] is protected by
157 * the ifnet lock. The rest of the fields are initialized at protocol
158 * attach time and never change, thus no lock required as long as
159 * a reference to it is valid, via if_proto_ref().
162 SLIST_ENTRY(if_proto
) next_hash
;
166 protocol_family_t protocol_family
;
170 proto_media_input input
;
171 proto_media_preout pre_output
;
172 proto_media_event event
;
173 proto_media_ioctl ioctl
;
174 proto_media_detached detached
;
175 proto_media_resolve_multi resolve_multi
;
176 proto_media_send_arp send_arp
;
179 proto_media_input_v2 input
;
180 proto_media_preout pre_output
;
181 proto_media_event event
;
182 proto_media_ioctl ioctl
;
183 proto_media_detached detached
;
184 proto_media_resolve_multi resolve_multi
;
185 proto_media_send_arp send_arp
;
190 SLIST_HEAD(proto_hash_entry
, if_proto
);
192 #define DLIL_SDLDATALEN \
193 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
196 struct ifnet dl_if
; /* public ifnet */
198 * DLIL private fields, protected by dl_if_lock
200 decl_lck_mtx_data(, dl_if_lock
);
201 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
202 u_int32_t dl_if_flags
; /* flags (below) */
203 u_int32_t dl_if_refcnt
; /* refcnt */
204 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
205 void *dl_if_uniqueid
; /* unique interface id */
206 size_t dl_if_uniqueid_len
; /* length of the unique id */
207 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
208 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
210 struct ifaddr ifa
; /* lladdr ifa */
211 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
212 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
214 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
215 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
216 ctrace_t dl_if_attach
; /* attach PC stacktrace */
217 ctrace_t dl_if_detach
; /* detach PC stacktrace */
220 /* Values for dl_if_flags (private to DLIL) */
221 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
222 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
223 #define DLIF_DEBUG 0x4 /* has debugging info */
225 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
228 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
230 struct dlil_ifnet_dbg
{
231 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
232 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
233 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
235 * Circular lists of ifnet_{reference,release} callers.
237 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
238 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
241 #define DLIL_TO_IFP(s) (&s->dl_if)
242 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
244 struct ifnet_filter
{
245 TAILQ_ENTRY(ifnet_filter
) filt_next
;
247 u_int32_t filt_flags
;
249 const char *filt_name
;
251 protocol_family_t filt_protocol
;
252 iff_input_func filt_input
;
253 iff_output_func filt_output
;
254 iff_event_func filt_event
;
255 iff_ioctl_func filt_ioctl
;
256 iff_detached_func filt_detached
;
259 struct proto_input_entry
;
261 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
262 static lck_grp_t
*dlil_lock_group
;
263 lck_grp_t
*ifnet_lock_group
;
264 static lck_grp_t
*ifnet_head_lock_group
;
265 static lck_grp_t
*ifnet_snd_lock_group
;
266 static lck_grp_t
*ifnet_rcv_lock_group
;
267 lck_attr_t
*ifnet_lock_attr
;
268 decl_lck_rw_data(static, ifnet_head_lock
);
269 decl_lck_mtx_data(static, dlil_ifnet_lock
);
270 u_int32_t dlil_filter_disable_tso_count
= 0;
273 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
275 static unsigned int ifnet_debug
; /* debugging (disabled) */
277 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
278 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
279 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
281 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
282 #define DLIF_ZONE_NAME "ifnet" /* zone name */
284 static unsigned int dlif_filt_size
; /* size of ifnet_filter */
285 static struct zone
*dlif_filt_zone
; /* zone for ifnet_filter */
287 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
288 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
290 static unsigned int dlif_phash_size
; /* size of ifnet proto hash table */
291 static struct zone
*dlif_phash_zone
; /* zone for ifnet proto hash table */
293 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
294 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
296 static unsigned int dlif_proto_size
; /* size of if_proto */
297 static struct zone
*dlif_proto_zone
; /* zone for if_proto */
299 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
300 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
302 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
303 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
304 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
306 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
307 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
309 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
310 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
311 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
313 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
314 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
316 static u_int32_t net_rtref
;
318 static struct dlil_main_threading_info dlil_main_input_thread_info
;
319 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
320 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
322 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
, bool update_generation
);
323 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
324 static void dlil_if_trace(struct dlil_ifnet
*, int);
325 static void if_proto_ref(struct if_proto
*);
326 static void if_proto_free(struct if_proto
*);
327 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
328 static u_int32_t
dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
329 u_int32_t list_count
);
330 static void if_flt_monitor_busy(struct ifnet
*);
331 static void if_flt_monitor_unbusy(struct ifnet
*);
332 static void if_flt_monitor_enter(struct ifnet
*);
333 static void if_flt_monitor_leave(struct ifnet
*);
334 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
335 char **, protocol_family_t
);
336 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
338 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
339 const struct sockaddr_dl
*);
340 static int ifnet_lookup(struct ifnet
*);
341 static void if_purgeaddrs(struct ifnet
*);
343 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
344 struct mbuf
*, char *);
345 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
347 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
348 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
349 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
350 const struct kev_msg
*);
351 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
352 unsigned long, void *);
353 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
354 struct sockaddr_dl
*, size_t);
355 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
356 const struct sockaddr_dl
*, const struct sockaddr
*,
357 const struct sockaddr_dl
*, const struct sockaddr
*);
359 static errno_t
ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
360 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
361 boolean_t poll
, struct thread
*tp
);
362 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
363 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
364 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
365 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
366 protocol_family_t
*);
367 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
368 const struct ifnet_demux_desc
*, u_int32_t
);
369 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
370 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
372 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
373 const struct sockaddr
*, const char *, const char *,
374 u_int32_t
*, u_int32_t
*);
376 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
377 const struct sockaddr
*, const char *, const char *);
378 #endif /* CONFIG_EMBEDDED */
379 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
380 const struct sockaddr
*, const char *, const char *,
381 u_int32_t
*, u_int32_t
*);
382 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
383 static void ifp_if_free(struct ifnet
*);
384 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
385 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
386 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
388 static void dlil_main_input_thread_func(void *, wait_result_t
);
389 static void dlil_input_thread_func(void *, wait_result_t
);
390 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
391 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*);
392 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
393 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
394 struct dlil_threading_info
*, boolean_t
);
395 static void dlil_input_stats_sync(struct ifnet
*, struct dlil_threading_info
*);
396 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
397 u_int32_t
, ifnet_model_t
, boolean_t
);
398 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
399 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
401 #if DEBUG || DEVELOPMENT
402 static void dlil_verify_sum16(void);
403 #endif /* DEBUG || DEVELOPMENT */
404 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
406 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
409 static void ifnet_detacher_thread_func(void *, wait_result_t
);
410 static int ifnet_detacher_thread_cont(int);
411 static void ifnet_detach_final(struct ifnet
*);
412 static void ifnet_detaching_enqueue(struct ifnet
*);
413 static struct ifnet
*ifnet_detaching_dequeue(void);
415 static void ifnet_start_thread_fn(void *, wait_result_t
);
416 static void ifnet_poll_thread_fn(void *, wait_result_t
);
417 static void ifnet_poll(struct ifnet
*);
418 static errno_t
ifnet_enqueue_common(struct ifnet
*, void *,
419 classq_pkt_type_t
, boolean_t
, boolean_t
*);
421 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
422 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
424 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
425 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
428 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
429 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
430 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
431 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
432 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
433 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
434 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
435 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
436 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
437 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
438 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
440 struct chain_len_stats tx_chain_len_stats
;
441 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
;
443 #if TEST_INPUT_THREAD_TERMINATION
444 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
;
445 #endif /* TEST_INPUT_THREAD_TERMINATION */
447 /* The following are protected by dlil_ifnet_lock */
448 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
449 static u_int32_t ifnet_detaching_cnt
;
450 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
452 decl_lck_mtx_data(static, ifnet_fc_lock
);
454 static uint32_t ifnet_flowhash_seed
;
456 struct ifnet_flowhash_key
{
457 char ifk_name
[IFNAMSIZ
];
461 uint32_t ifk_capabilities
;
462 uint32_t ifk_capenable
;
463 uint32_t ifk_output_sched_model
;
468 /* Flow control entry per interface */
469 struct ifnet_fc_entry
{
470 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
471 u_int32_t ifce_flowhash
;
472 struct ifnet
*ifce_ifp
;
475 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
476 static int ifce_cmp(const struct ifnet_fc_entry
*,
477 const struct ifnet_fc_entry
*);
478 static int ifnet_fc_add(struct ifnet
*);
479 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
480 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
482 /* protected by ifnet_fc_lock */
483 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
484 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
485 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
487 static unsigned int ifnet_fc_zone_size
; /* sizeof ifnet_fc_entry */
488 static struct zone
*ifnet_fc_zone
; /* ifnet_fc_entry zone */
490 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
491 #define IFNET_FC_ZONE_MAX 32
493 extern void bpfdetach(struct ifnet
*);
494 extern void proto_input_run(void);
496 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
498 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
501 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
504 #ifdef CONFIG_EMBEDDED
505 int dlil_lladdr_ckreq
= 1;
507 int dlil_lladdr_ckreq
= 0;
512 int dlil_verbose
= 1;
514 int dlil_verbose
= 0;
516 #if IFNET_INPUT_SANITY_CHK
517 /* sanity checking of input packet lists received */
518 static u_int32_t dlil_input_sanity_check
= 0;
519 #endif /* IFNET_INPUT_SANITY_CHK */
520 /* rate limit debug messages */
521 struct timespec dlil_dbgrate
= { 1, 0 };
523 SYSCTL_DECL(_net_link_generic_system
);
525 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
526 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
528 #define IF_SNDQ_MINLEN 32
529 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
530 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
531 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
532 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
534 #define IF_RCVQ_MINLEN 32
535 #define IF_RCVQ_MAXLEN 256
536 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
537 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
538 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
539 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
541 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
542 static u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
543 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
544 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
545 "ilog2 of EWMA decay rate of avg inbound packets");
547 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
548 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
549 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
550 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
551 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
552 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
553 "Q", "input poll mode freeze time");
555 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
556 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
557 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
558 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
559 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
560 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
561 "Q", "input poll sampling time");
563 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
564 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
565 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
566 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
567 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
568 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
569 "Q", "input poll interval (time)");
571 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
572 static u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
573 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
574 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
575 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
577 #define IF_RXPOLL_WLOWAT 10
578 static u_int32_t if_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
579 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
580 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_wlowat
,
581 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
582 "I", "input poll wakeup low watermark");
584 #define IF_RXPOLL_WHIWAT 100
585 static u_int32_t if_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
586 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
587 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_whiwat
,
588 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
589 "I", "input poll wakeup high watermark");
591 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
592 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
593 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
594 "max packets per poll call");
596 static u_int32_t if_rxpoll
= 1;
597 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
598 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
599 sysctl_rxpoll
, "I", "enable opportunistic input polling");
601 #if TEST_INPUT_THREAD_TERMINATION
602 static u_int32_t if_input_thread_termination_spin
= 0;
603 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, input_thread_termination_spin
,
604 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
605 &if_input_thread_termination_spin
, 0,
606 sysctl_input_thread_termination_spin
,
607 "I", "input thread termination spin limit");
608 #endif /* TEST_INPUT_THREAD_TERMINATION */
610 static u_int32_t cur_dlil_input_threads
= 0;
611 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
612 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
613 "Current number of DLIL input threads");
615 #if IFNET_INPUT_SANITY_CHK
616 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
617 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
618 "Turn on sanity checking in DLIL input");
619 #endif /* IFNET_INPUT_SANITY_CHK */
621 static u_int32_t if_flowadv
= 1;
622 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
623 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
624 "enable flow-advisory mechanism");
626 static u_int32_t if_delaybased_queue
= 1;
627 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
628 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
629 "enable delay based dynamic queue sizing");
631 static uint64_t hwcksum_in_invalidated
= 0;
632 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
633 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
634 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
636 uint32_t hwcksum_dbg
= 0;
637 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
638 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
639 "enable hardware cksum debugging");
641 u_int32_t ifnet_start_delayed
= 0;
642 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delayed
,
643 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_start_delayed
, 0,
644 "number of times start was delayed");
646 u_int32_t ifnet_delay_start_disabled
= 0;
647 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delay_disabled
,
648 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_delay_start_disabled
, 0,
649 "number of times start was delayed");
651 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
652 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
653 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
654 #define HWCKSUM_DBG_MASK \
655 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
656 HWCKSUM_DBG_FINALIZE_FORCED)
658 static uint32_t hwcksum_dbg_mode
= 0;
659 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
660 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
661 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
663 static uint64_t hwcksum_dbg_partial_forced
= 0;
664 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
665 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
666 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
668 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
669 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
670 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
671 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
673 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
674 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
675 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
676 &hwcksum_dbg_partial_rxoff_forced
, 0,
677 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
678 "forced partial cksum rx offset");
680 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
681 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
682 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
683 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
684 "adjusted partial cksum rx offset");
686 static uint64_t hwcksum_dbg_verified
= 0;
687 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
688 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
689 &hwcksum_dbg_verified
, "packets verified for having good checksum");
691 static uint64_t hwcksum_dbg_bad_cksum
= 0;
692 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
693 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
694 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
696 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
697 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
698 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
699 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
701 static uint64_t hwcksum_dbg_adjusted
= 0;
702 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
703 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
704 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
706 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
707 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
708 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
709 &hwcksum_dbg_finalized_hdr
, "finalized headers");
711 static uint64_t hwcksum_dbg_finalized_data
= 0;
712 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
713 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
714 &hwcksum_dbg_finalized_data
, "finalized payloads");
716 uint32_t hwcksum_tx
= 1;
717 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
718 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
719 "enable transmit hardware checksum offload");
721 uint32_t hwcksum_rx
= 1;
722 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
723 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
724 "enable receive hardware checksum offload");
726 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, tx_chain_len_stats
,
727 CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 9,
728 sysctl_tx_chain_len_stats
, "S", "");
730 uint32_t tx_chain_len_count
= 0;
731 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, tx_chain_len_count
,
732 CTLFLAG_RW
| CTLFLAG_LOCKED
, &tx_chain_len_count
, 0, "");
734 static uint32_t threshold_notify
= 1; /* enable/disable */
735 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_notify
,
736 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_notify
, 0, "");
738 static uint32_t threshold_interval
= 2; /* in seconds */
739 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_interval
,
740 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_interval
, 0, "");
742 #if (DEVELOPMENT || DEBUG)
743 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
;
744 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_kao_frames
,
745 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_kao_frames
, "");
746 #endif /* DEVELOPMENT || DEBUG */
748 struct net_api_stats net_api_stats
;
749 SYSCTL_STRUCT(_net
, OID_AUTO
, api_stats
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
750 &net_api_stats
, net_api_stats
, "");
753 unsigned int net_rxpoll
= 1;
754 unsigned int net_affinity
= 1;
755 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
757 extern u_int32_t inject_buckets
;
759 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
760 static lck_attr_t
*dlil_lck_attributes
= NULL
;
762 /* DLIL data threshold thread call */
763 static void dlil_dt_tcall_fn(thread_call_param_t
, thread_call_param_t
);
765 static void dlil_mit_tcall_fn(thread_call_param_t
, thread_call_param_t
);
767 uint32_t dlil_rcv_mit_pkts_min
= 5;
768 uint32_t dlil_rcv_mit_pkts_max
= 64;
769 uint32_t dlil_rcv_mit_interval
= (500 * 1000);
771 #if (DEVELOPMENT || DEBUG)
772 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_pkts_min
,
773 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_pkts_min
, 0, "");
774 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_pkts_max
,
775 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_pkts_max
, 0, "");
776 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_interval
,
777 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_interval
, 0, "");
778 #endif /* DEVELOPMENT || DEBUG */
781 #define DLIL_INPUT_CHECK(m, ifp) { \
782 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
783 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
784 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
785 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
790 #define DLIL_EWMA(old, new, decay) do { \
792 if ((_avg = (old)) > 0) \
793 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
799 #define MBPS (1ULL * 1000 * 1000)
800 #define GBPS (MBPS * 1000)
802 struct rxpoll_time_tbl
{
803 u_int64_t speed
; /* downlink speed */
804 u_int32_t plowat
; /* packets low watermark */
805 u_int32_t phiwat
; /* packets high watermark */
806 u_int32_t blowat
; /* bytes low watermark */
807 u_int32_t bhiwat
; /* bytes high watermark */
810 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
811 { 10 * MBPS
, 2, 8, (1 * 1024), (6 * 1024) },
812 { 100 * MBPS
, 10, 40, (4 * 1024), (64 * 1024) },
813 { 1 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
814 { 10 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
815 { 100 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
820 proto_hash_value(u_int32_t protocol_family
)
823 * dlil_proto_unplumb_all() depends on the mapping between
824 * the hash bucket index and the protocol family defined
825 * here; future changes must be applied there as well.
827 switch (protocol_family
) {
841 * Caller must already be holding ifnet lock.
843 static struct if_proto
*
844 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
846 struct if_proto
*proto
= NULL
;
847 u_int32_t i
= proto_hash_value(protocol_family
);
849 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
851 if (ifp
->if_proto_hash
!= NULL
)
852 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
854 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
)
855 proto
= SLIST_NEXT(proto
, next_hash
);
864 if_proto_ref(struct if_proto
*proto
)
866 atomic_add_32(&proto
->refcount
, 1);
869 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
872 if_proto_free(struct if_proto
*proto
)
875 struct ifnet
*ifp
= proto
->ifp
;
876 u_int32_t proto_family
= proto
->protocol_family
;
877 struct kev_dl_proto_data ev_pr_data
;
879 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
883 /* No more reference on this, protocol must have been detached */
884 VERIFY(proto
->detached
);
886 if (proto
->proto_kpi
== kProtoKPI_v1
) {
887 if (proto
->kpi
.v1
.detached
)
888 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
890 if (proto
->proto_kpi
== kProtoKPI_v2
) {
891 if (proto
->kpi
.v2
.detached
)
892 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
896 * Cleanup routes that may still be in the routing table for that
897 * interface/protocol pair.
899 if_rtproto_del(ifp
, proto_family
);
902 * The reserved field carries the number of protocol still attached
903 * (subject to change)
905 ifnet_lock_shared(ifp
);
906 ev_pr_data
.proto_family
= proto_family
;
907 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
908 ifnet_lock_done(ifp
);
910 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
911 (struct net_event_data
*)&ev_pr_data
,
912 sizeof (struct kev_dl_proto_data
));
914 if (ev_pr_data
.proto_remaining_count
== 0) {
916 * The protocol count has gone to zero, mark the interface down.
917 * This used to be done by configd.KernelEventMonitor, but that
918 * is inherently prone to races (rdar://problem/30810208).
920 (void) ifnet_set_flags(ifp
, 0, IFF_UP
);
921 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
922 dlil_post_sifflags_msg(ifp
);
925 zfree(dlif_proto_zone
, proto
);
928 __private_extern__
void
929 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
934 unsigned int type
= 0;
938 case IFNET_LCK_ASSERT_EXCLUSIVE
:
939 type
= LCK_RW_ASSERT_EXCLUSIVE
;
942 case IFNET_LCK_ASSERT_SHARED
:
943 type
= LCK_RW_ASSERT_SHARED
;
946 case IFNET_LCK_ASSERT_OWNED
:
947 type
= LCK_RW_ASSERT_HELD
;
950 case IFNET_LCK_ASSERT_NOTOWNED
:
951 /* nothing to do here for RW lock; bypass assert */
956 panic("bad ifnet assert type: %d", what
);
960 LCK_RW_ASSERT(&ifp
->if_lock
, type
);
963 __private_extern__
void
964 ifnet_lock_shared(struct ifnet
*ifp
)
966 lck_rw_lock_shared(&ifp
->if_lock
);
969 __private_extern__
void
970 ifnet_lock_exclusive(struct ifnet
*ifp
)
972 lck_rw_lock_exclusive(&ifp
->if_lock
);
975 __private_extern__
void
976 ifnet_lock_done(struct ifnet
*ifp
)
978 lck_rw_done(&ifp
->if_lock
);
982 __private_extern__
void
983 if_inetdata_lock_shared(struct ifnet
*ifp
)
985 lck_rw_lock_shared(&ifp
->if_inetdata_lock
);
988 __private_extern__
void
989 if_inetdata_lock_exclusive(struct ifnet
*ifp
)
991 lck_rw_lock_exclusive(&ifp
->if_inetdata_lock
);
994 __private_extern__
void
995 if_inetdata_lock_done(struct ifnet
*ifp
)
997 lck_rw_done(&ifp
->if_inetdata_lock
);
1002 __private_extern__
void
1003 if_inet6data_lock_shared(struct ifnet
*ifp
)
1005 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
1008 __private_extern__
void
1009 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
1011 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
1014 __private_extern__
void
1015 if_inet6data_lock_done(struct ifnet
*ifp
)
1017 lck_rw_done(&ifp
->if_inet6data_lock
);
1021 __private_extern__
void
1022 ifnet_head_lock_shared(void)
1024 lck_rw_lock_shared(&ifnet_head_lock
);
1027 __private_extern__
void
1028 ifnet_head_lock_exclusive(void)
1030 lck_rw_lock_exclusive(&ifnet_head_lock
);
1033 __private_extern__
void
1034 ifnet_head_done(void)
1036 lck_rw_done(&ifnet_head_lock
);
1039 __private_extern__
void
1040 ifnet_head_assert_exclusive(void)
1042 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
1046 * dlil_ifp_protolist
1047 * - get the list of protocols attached to the interface, or just the number
1048 * of attached protocols
1049 * - if the number returned is greater than 'list_count', truncation occurred
1052 * - caller must already be holding ifnet lock.
1055 dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
1056 u_int32_t list_count
)
1058 u_int32_t count
= 0;
1061 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
1063 if (ifp
->if_proto_hash
== NULL
)
1066 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1067 struct if_proto
*proto
;
1068 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1069 if (list
!= NULL
&& count
< list_count
) {
1070 list
[count
] = proto
->protocol_family
;
1079 __private_extern__ u_int32_t
1080 if_get_protolist(struct ifnet
* ifp
, u_int32_t
*protolist
, u_int32_t count
)
1082 ifnet_lock_shared(ifp
);
1083 count
= dlil_ifp_protolist(ifp
, protolist
, count
);
1084 ifnet_lock_done(ifp
);
1088 __private_extern__
void
1089 if_free_protolist(u_int32_t
*list
)
1091 _FREE(list
, M_TEMP
);
1094 __private_extern__
void
1095 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
1096 u_int32_t event_code
, struct net_event_data
*event_data
,
1097 u_int32_t event_data_len
)
1099 struct net_event_data ev_data
;
1100 struct kev_msg ev_msg
;
1102 bzero(&ev_msg
, sizeof (ev_msg
));
1103 bzero(&ev_data
, sizeof (ev_data
));
1105 * a net event always starts with a net_event_data structure
1106 * but the caller can generate a simple net event or
1107 * provide a longer event structure to post
1109 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1110 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1111 ev_msg
.kev_subclass
= event_subclass
;
1112 ev_msg
.event_code
= event_code
;
1114 if (event_data
== NULL
) {
1115 event_data
= &ev_data
;
1116 event_data_len
= sizeof (struct net_event_data
);
1119 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
1120 event_data
->if_family
= ifp
->if_family
;
1121 event_data
->if_unit
= (u_int32_t
)ifp
->if_unit
;
1123 ev_msg
.dv
[0].data_length
= event_data_len
;
1124 ev_msg
.dv
[0].data_ptr
= event_data
;
1125 ev_msg
.dv
[1].data_length
= 0;
1127 /* Don't update interface generation for quality and RRC state changess */
1128 bool update_generation
= (event_subclass
!= KEV_DL_SUBCLASS
||
1129 (event_code
!= KEV_DL_LINK_QUALITY_METRIC_CHANGED
&&
1130 event_code
!= KEV_DL_RRC_STATE_CHANGED
));
1132 dlil_event_internal(ifp
, &ev_msg
, update_generation
);
1135 __private_extern__
int
1136 dlil_alloc_local_stats(struct ifnet
*ifp
)
1139 void *buf
, *base
, **pbuf
;
1144 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1145 /* allocate tcpstat_local structure */
1146 buf
= zalloc(dlif_tcpstat_zone
);
1151 bzero(buf
, dlif_tcpstat_bufsize
);
1153 /* Get the 64-bit aligned base address for this object */
1154 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1155 sizeof (u_int64_t
));
1156 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1157 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1160 * Wind back a pointer size from the aligned base and
1161 * save the original address so we can free it later.
1163 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1165 ifp
->if_tcp_stat
= base
;
1167 /* allocate udpstat_local structure */
1168 buf
= zalloc(dlif_udpstat_zone
);
1173 bzero(buf
, dlif_udpstat_bufsize
);
1175 /* Get the 64-bit aligned base address for this object */
1176 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1177 sizeof (u_int64_t
));
1178 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1179 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1182 * Wind back a pointer size from the aligned base and
1183 * save the original address so we can free it later.
1185 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1187 ifp
->if_udp_stat
= base
;
1189 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof (u_int64_t
)) &&
1190 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof (u_int64_t
)));
1195 if (ifp
->if_ipv4_stat
== NULL
) {
1196 MALLOC(ifp
->if_ipv4_stat
, struct if_tcp_ecn_stat
*,
1197 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1198 if (ifp
->if_ipv4_stat
== NULL
) {
1204 if (ifp
->if_ipv6_stat
== NULL
) {
1205 MALLOC(ifp
->if_ipv6_stat
, struct if_tcp_ecn_stat
*,
1206 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1207 if (ifp
->if_ipv6_stat
== NULL
) {
1214 if (ifp
->if_tcp_stat
!= NULL
) {
1216 ((intptr_t)ifp
->if_tcp_stat
- sizeof (void *));
1217 zfree(dlif_tcpstat_zone
, *pbuf
);
1218 ifp
->if_tcp_stat
= NULL
;
1220 if (ifp
->if_udp_stat
!= NULL
) {
1222 ((intptr_t)ifp
->if_udp_stat
- sizeof (void *));
1223 zfree(dlif_udpstat_zone
, *pbuf
);
1224 ifp
->if_udp_stat
= NULL
;
1226 if (ifp
->if_ipv4_stat
!= NULL
) {
1227 FREE(ifp
->if_ipv4_stat
, M_TEMP
);
1228 ifp
->if_ipv4_stat
= NULL
;
1230 if (ifp
->if_ipv6_stat
!= NULL
) {
1231 FREE(ifp
->if_ipv6_stat
, M_TEMP
);
1232 ifp
->if_ipv6_stat
= NULL
;
1240 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
)
1242 thread_continue_t func
;
1246 /* NULL ifp indicates the main input thread, called at dlil_init time */
1248 func
= dlil_main_input_thread_func
;
1249 VERIFY(inp
== dlil_main_input_thread
);
1250 (void) strlcat(inp
->input_name
,
1251 "main_input", DLIL_THREADNAME_LEN
);
1252 } else if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1253 func
= dlil_rxpoll_input_thread_func
;
1254 VERIFY(inp
!= dlil_main_input_thread
);
1255 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1256 "%s_input_poll", if_name(ifp
));
1258 func
= dlil_input_thread_func
;
1259 VERIFY(inp
!= dlil_main_input_thread
);
1260 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1261 "%s_input", if_name(ifp
));
1263 VERIFY(inp
->input_thr
== THREAD_NULL
);
1265 inp
->lck_grp
= lck_grp_alloc_init(inp
->input_name
, dlil_grp_attributes
);
1266 lck_mtx_init(&inp
->input_lck
, inp
->lck_grp
, dlil_lck_attributes
);
1268 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1269 inp
->ifp
= ifp
; /* NULL for main input thread */
1271 net_timerclear(&inp
->mode_holdtime
);
1272 net_timerclear(&inp
->mode_lasttime
);
1273 net_timerclear(&inp
->sample_holdtime
);
1274 net_timerclear(&inp
->sample_lasttime
);
1275 net_timerclear(&inp
->dbg_lasttime
);
1278 * For interfaces that support opportunistic polling, set the
1279 * low and high watermarks for outstanding inbound packets/bytes.
1280 * Also define freeze times for transitioning between modes
1281 * and updating the average.
1283 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1284 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1285 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1287 limit
= (u_int32_t
)-1;
1290 _qinit(&inp
->rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1291 if (inp
== dlil_main_input_thread
) {
1292 struct dlil_main_threading_info
*inpm
=
1293 (struct dlil_main_threading_info
*)inp
;
1294 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1297 error
= kernel_thread_start(func
, inp
, &inp
->input_thr
);
1298 if (error
== KERN_SUCCESS
) {
1299 ml_thread_policy(inp
->input_thr
, MACHINE_GROUP
,
1300 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_NETISR
));
1302 * We create an affinity set so that the matching workloop
1303 * thread or the starter thread (for loopback) can be
1304 * scheduled on the same processor set as the input thread.
1307 struct thread
*tp
= inp
->input_thr
;
1310 * Randomize to reduce the probability
1311 * of affinity tag namespace collision.
1313 read_frandom(&tag
, sizeof (tag
));
1314 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1315 thread_reference(tp
);
1317 inp
->net_affinity
= TRUE
;
1320 } else if (inp
== dlil_main_input_thread
) {
1321 panic_plain("%s: couldn't create main input thread", __func__
);
1324 panic_plain("%s: couldn't create %s input thread", __func__
,
1328 OSAddAtomic(1, &cur_dlil_input_threads
);
1333 #if TEST_INPUT_THREAD_TERMINATION
1335 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1337 #pragma unused(arg1, arg2)
1341 i
= if_input_thread_termination_spin
;
1343 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
1344 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
1347 if (net_rxpoll
== 0)
1350 if_input_thread_termination_spin
= i
;
1353 #endif /* TEST_INPUT_THREAD_TERMINATION */
1356 dlil_clean_threading_info(struct dlil_threading_info
*inp
)
1358 lck_mtx_destroy(&inp
->input_lck
, inp
->lck_grp
);
1359 lck_grp_free(inp
->lck_grp
);
1361 inp
->input_waiting
= 0;
1363 bzero(inp
->input_name
, sizeof (inp
->input_name
));
1365 VERIFY(qhead(&inp
->rcvq_pkts
) == NULL
&& qempty(&inp
->rcvq_pkts
));
1366 qlimit(&inp
->rcvq_pkts
) = 0;
1367 bzero(&inp
->stats
, sizeof (inp
->stats
));
1369 VERIFY(!inp
->net_affinity
);
1370 inp
->input_thr
= THREAD_NULL
;
1371 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
1372 VERIFY(inp
->poll_thr
== THREAD_NULL
);
1373 VERIFY(inp
->tag
== 0);
1375 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1376 bzero(&inp
->tstats
, sizeof (inp
->tstats
));
1377 bzero(&inp
->pstats
, sizeof (inp
->pstats
));
1378 bzero(&inp
->sstats
, sizeof (inp
->sstats
));
1380 net_timerclear(&inp
->mode_holdtime
);
1381 net_timerclear(&inp
->mode_lasttime
);
1382 net_timerclear(&inp
->sample_holdtime
);
1383 net_timerclear(&inp
->sample_lasttime
);
1384 net_timerclear(&inp
->dbg_lasttime
);
1386 #if IFNET_INPUT_SANITY_CHK
1387 inp
->input_mbuf_cnt
= 0;
1388 #endif /* IFNET_INPUT_SANITY_CHK */
1392 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1394 struct ifnet
*ifp
= inp
->ifp
;
1396 VERIFY(current_thread() == inp
->input_thr
);
1397 VERIFY(inp
!= dlil_main_input_thread
);
1399 OSAddAtomic(-1, &cur_dlil_input_threads
);
1401 #if TEST_INPUT_THREAD_TERMINATION
1402 { /* do something useless that won't get optimized away */
1404 for (uint32_t i
= 0;
1405 i
< if_input_thread_termination_spin
;
1409 printf("the value is %d\n", v
);
1411 #endif /* TEST_INPUT_THREAD_TERMINATION */
1413 lck_mtx_lock_spin(&inp
->input_lck
);
1414 VERIFY((inp
->input_waiting
& DLIL_INPUT_TERMINATE
) != 0);
1415 inp
->input_waiting
|= DLIL_INPUT_TERMINATE_COMPLETE
;
1416 wakeup_one((caddr_t
)&inp
->input_waiting
);
1417 lck_mtx_unlock(&inp
->input_lck
);
1419 /* for the extra refcnt from kernel_thread_start() */
1420 thread_deallocate(current_thread());
1423 printf("%s: input thread terminated\n",
1427 /* this is the end */
1428 thread_terminate(current_thread());
1432 static kern_return_t
1433 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1435 thread_affinity_policy_data_t policy
;
1437 bzero(&policy
, sizeof (policy
));
1438 policy
.affinity_tag
= tag
;
1439 return (thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1440 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
));
1446 thread_t thread
= THREAD_NULL
;
1449 * The following fields must be 64-bit aligned for atomic operations.
1451 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1452 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1453 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1454 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1455 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1456 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1457 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1458 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1459 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1460 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1461 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1462 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1463 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1464 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1465 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1467 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1468 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1469 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1470 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1471 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1472 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1473 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1474 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1475 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1476 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1477 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1478 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1479 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1480 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1481 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1484 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1486 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1487 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1488 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1489 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1490 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1491 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1492 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1493 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1494 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1495 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT
== IFNET_CSUM_ZERO_INVERT
);
1496 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1497 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1498 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1499 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1502 * ... as well as the mbuf checksum flags counterparts.
1504 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1505 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1506 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1507 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1508 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1509 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1510 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1511 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1512 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1513 _CASSERT(CSUM_ZERO_INVERT
== IF_HWASSIST_CSUM_ZERO_INVERT
);
1514 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1517 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1519 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1520 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1522 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1523 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1524 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1525 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1527 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1528 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1529 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1531 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1532 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1533 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1534 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1535 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1536 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1537 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1538 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1539 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1540 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1541 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1542 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1543 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1544 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1545 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1546 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1548 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1549 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1550 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1551 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1552 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1553 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1554 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC
== IFNET_SUBFAMILY_INTCOPROC
);
1556 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1557 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1559 PE_parse_boot_argn("net_affinity", &net_affinity
,
1560 sizeof (net_affinity
));
1562 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof (net_rxpoll
));
1564 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof (net_rtref
));
1566 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof (ifnet_debug
));
1568 dlif_size
= (ifnet_debug
== 0) ? sizeof (struct dlil_ifnet
) :
1569 sizeof (struct dlil_ifnet_dbg
);
1570 /* Enforce 64-bit alignment for dlil_ifnet structure */
1571 dlif_bufsize
= dlif_size
+ sizeof (void *) + sizeof (u_int64_t
);
1572 dlif_bufsize
= P2ROUNDUP(dlif_bufsize
, sizeof (u_int64_t
));
1573 dlif_zone
= zinit(dlif_bufsize
, DLIF_ZONE_MAX
* dlif_bufsize
,
1575 if (dlif_zone
== NULL
) {
1576 panic_plain("%s: failed allocating %s", __func__
,
1580 zone_change(dlif_zone
, Z_EXPAND
, TRUE
);
1581 zone_change(dlif_zone
, Z_CALLERACCT
, FALSE
);
1583 dlif_filt_size
= sizeof (struct ifnet_filter
);
1584 dlif_filt_zone
= zinit(dlif_filt_size
,
1585 DLIF_FILT_ZONE_MAX
* dlif_filt_size
, 0, DLIF_FILT_ZONE_NAME
);
1586 if (dlif_filt_zone
== NULL
) {
1587 panic_plain("%s: failed allocating %s", __func__
,
1588 DLIF_FILT_ZONE_NAME
);
1591 zone_change(dlif_filt_zone
, Z_EXPAND
, TRUE
);
1592 zone_change(dlif_filt_zone
, Z_CALLERACCT
, FALSE
);
1594 dlif_phash_size
= sizeof (struct proto_hash_entry
) * PROTO_HASH_SLOTS
;
1595 dlif_phash_zone
= zinit(dlif_phash_size
,
1596 DLIF_PHASH_ZONE_MAX
* dlif_phash_size
, 0, DLIF_PHASH_ZONE_NAME
);
1597 if (dlif_phash_zone
== NULL
) {
1598 panic_plain("%s: failed allocating %s", __func__
,
1599 DLIF_PHASH_ZONE_NAME
);
1602 zone_change(dlif_phash_zone
, Z_EXPAND
, TRUE
);
1603 zone_change(dlif_phash_zone
, Z_CALLERACCT
, FALSE
);
1605 dlif_proto_size
= sizeof (struct if_proto
);
1606 dlif_proto_zone
= zinit(dlif_proto_size
,
1607 DLIF_PROTO_ZONE_MAX
* dlif_proto_size
, 0, DLIF_PROTO_ZONE_NAME
);
1608 if (dlif_proto_zone
== NULL
) {
1609 panic_plain("%s: failed allocating %s", __func__
,
1610 DLIF_PROTO_ZONE_NAME
);
1613 zone_change(dlif_proto_zone
, Z_EXPAND
, TRUE
);
1614 zone_change(dlif_proto_zone
, Z_CALLERACCT
, FALSE
);
1616 dlif_tcpstat_size
= sizeof (struct tcpstat_local
);
1617 /* Enforce 64-bit alignment for tcpstat_local structure */
1618 dlif_tcpstat_bufsize
=
1619 dlif_tcpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1620 dlif_tcpstat_bufsize
=
1621 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof (u_int64_t
));
1622 dlif_tcpstat_zone
= zinit(dlif_tcpstat_bufsize
,
1623 DLIF_TCPSTAT_ZONE_MAX
* dlif_tcpstat_bufsize
, 0,
1624 DLIF_TCPSTAT_ZONE_NAME
);
1625 if (dlif_tcpstat_zone
== NULL
) {
1626 panic_plain("%s: failed allocating %s", __func__
,
1627 DLIF_TCPSTAT_ZONE_NAME
);
1630 zone_change(dlif_tcpstat_zone
, Z_EXPAND
, TRUE
);
1631 zone_change(dlif_tcpstat_zone
, Z_CALLERACCT
, FALSE
);
1633 dlif_udpstat_size
= sizeof (struct udpstat_local
);
1634 /* Enforce 64-bit alignment for udpstat_local structure */
1635 dlif_udpstat_bufsize
=
1636 dlif_udpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1637 dlif_udpstat_bufsize
=
1638 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof (u_int64_t
));
1639 dlif_udpstat_zone
= zinit(dlif_udpstat_bufsize
,
1640 DLIF_TCPSTAT_ZONE_MAX
* dlif_udpstat_bufsize
, 0,
1641 DLIF_UDPSTAT_ZONE_NAME
);
1642 if (dlif_udpstat_zone
== NULL
) {
1643 panic_plain("%s: failed allocating %s", __func__
,
1644 DLIF_UDPSTAT_ZONE_NAME
);
1647 zone_change(dlif_udpstat_zone
, Z_EXPAND
, TRUE
);
1648 zone_change(dlif_udpstat_zone
, Z_CALLERACCT
, FALSE
);
1650 ifnet_llreach_init();
1651 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt
);
1653 TAILQ_INIT(&dlil_ifnet_head
);
1654 TAILQ_INIT(&ifnet_head
);
1655 TAILQ_INIT(&ifnet_detaching_head
);
1656 TAILQ_INIT(&ifnet_ordered_head
);
1658 /* Setup the lock groups we will use */
1659 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1661 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1662 dlil_grp_attributes
);
1663 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1664 dlil_grp_attributes
);
1665 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1666 dlil_grp_attributes
);
1667 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1668 dlil_grp_attributes
);
1669 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1670 dlil_grp_attributes
);
1672 /* Setup the lock attributes we will use */
1673 dlil_lck_attributes
= lck_attr_alloc_init();
1675 ifnet_lock_attr
= lck_attr_alloc_init();
1677 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1678 dlil_lck_attributes
);
1679 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1681 /* Setup interface flow control related items */
1682 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1684 ifnet_fc_zone_size
= sizeof (struct ifnet_fc_entry
);
1685 ifnet_fc_zone
= zinit(ifnet_fc_zone_size
,
1686 IFNET_FC_ZONE_MAX
* ifnet_fc_zone_size
, 0, IFNET_FC_ZONE_NAME
);
1687 if (ifnet_fc_zone
== NULL
) {
1688 panic_plain("%s: failed allocating %s", __func__
,
1689 IFNET_FC_ZONE_NAME
);
1692 zone_change(ifnet_fc_zone
, Z_EXPAND
, TRUE
);
1693 zone_change(ifnet_fc_zone
, Z_CALLERACCT
, FALSE
);
1695 /* Initialize interface address subsystem */
1699 /* Initialize the packet filter */
1703 /* Initialize queue algorithms */
1706 /* Initialize packet schedulers */
1709 /* Initialize flow advisory subsystem */
1712 /* Initialize the pktap virtual interface */
1715 /* Initialize the service class to dscp map */
1718 /* Initialize the interface port list */
1719 if_ports_used_init();
1721 #if DEBUG || DEVELOPMENT
1722 /* Run self-tests */
1723 dlil_verify_sum16();
1724 #endif /* DEBUG || DEVELOPMENT */
1726 /* Initialize link layer table */
1727 lltable_glbl_init();
1730 * Create and start up the main DLIL input thread and the interface
1731 * detacher threads once everything is initialized.
1733 dlil_create_input_thread(NULL
, dlil_main_input_thread
);
1735 if (kernel_thread_start(ifnet_detacher_thread_func
,
1736 NULL
, &thread
) != KERN_SUCCESS
) {
1737 panic_plain("%s: couldn't create detacher thread", __func__
);
1740 thread_deallocate(thread
);
1745 if_flt_monitor_busy(struct ifnet
*ifp
)
1747 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1750 VERIFY(ifp
->if_flt_busy
!= 0);
1754 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1756 if_flt_monitor_leave(ifp
);
1760 if_flt_monitor_enter(struct ifnet
*ifp
)
1762 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1764 while (ifp
->if_flt_busy
) {
1765 ++ifp
->if_flt_waiters
;
1766 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1767 (PZERO
- 1), "if_flt_monitor", NULL
);
1769 if_flt_monitor_busy(ifp
);
1773 if_flt_monitor_leave(struct ifnet
*ifp
)
1775 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1777 VERIFY(ifp
->if_flt_busy
!= 0);
1780 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1781 ifp
->if_flt_waiters
= 0;
1782 wakeup(&ifp
->if_flt_head
);
1786 __private_extern__
int
1787 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1788 interface_filter_t
*filter_ref
, u_int32_t flags
)
1791 struct ifnet_filter
*filter
= NULL
;
1793 ifnet_head_lock_shared();
1794 /* Check that the interface is in the global list */
1795 if (!ifnet_lookup(ifp
)) {
1800 filter
= zalloc(dlif_filt_zone
);
1801 if (filter
== NULL
) {
1805 bzero(filter
, dlif_filt_size
);
1807 /* refcnt held above during lookup */
1808 filter
->filt_flags
= flags
;
1809 filter
->filt_ifp
= ifp
;
1810 filter
->filt_cookie
= if_filter
->iff_cookie
;
1811 filter
->filt_name
= if_filter
->iff_name
;
1812 filter
->filt_protocol
= if_filter
->iff_protocol
;
1814 * Do not install filter callbacks for internal coproc interface
1816 if (!IFNET_IS_INTCOPROC(ifp
)) {
1817 filter
->filt_input
= if_filter
->iff_input
;
1818 filter
->filt_output
= if_filter
->iff_output
;
1819 filter
->filt_event
= if_filter
->iff_event
;
1820 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1822 filter
->filt_detached
= if_filter
->iff_detached
;
1824 lck_mtx_lock(&ifp
->if_flt_lock
);
1825 if_flt_monitor_enter(ifp
);
1827 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1828 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1830 if_flt_monitor_leave(ifp
);
1831 lck_mtx_unlock(&ifp
->if_flt_lock
);
1833 *filter_ref
= filter
;
1836 * Bump filter count and route_generation ID to let TCP
1837 * know it shouldn't do TSO on this connection
1839 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1840 OSAddAtomic(1, &dlil_filter_disable_tso_count
);
1841 routegenid_update();
1843 OSIncrementAtomic64(&net_api_stats
.nas_iflt_attach_count
);
1844 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_total
);
1845 if ((filter
->filt_flags
& DLIL_IFF_INTERNAL
)) {
1846 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_os_total
);
1849 printf("%s: %s filter attached\n", if_name(ifp
),
1850 if_filter
->iff_name
);
1854 if (retval
!= 0 && ifp
!= NULL
) {
1855 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1856 if_name(ifp
), if_filter
->iff_name
, retval
);
1858 if (retval
!= 0 && filter
!= NULL
)
1859 zfree(dlif_filt_zone
, filter
);
1865 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1869 if (detached
== 0) {
1872 ifnet_head_lock_shared();
1873 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1874 interface_filter_t entry
= NULL
;
1876 lck_mtx_lock(&ifp
->if_flt_lock
);
1877 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1878 if (entry
!= filter
|| entry
->filt_skip
)
1881 * We've found a match; since it's possible
1882 * that the thread gets blocked in the monitor,
1883 * we do the lock dance. Interface should
1884 * not be detached since we still have a use
1885 * count held during filter attach.
1887 entry
->filt_skip
= 1; /* skip input/output */
1888 lck_mtx_unlock(&ifp
->if_flt_lock
);
1891 lck_mtx_lock(&ifp
->if_flt_lock
);
1892 if_flt_monitor_enter(ifp
);
1893 LCK_MTX_ASSERT(&ifp
->if_flt_lock
,
1894 LCK_MTX_ASSERT_OWNED
);
1896 /* Remove the filter from the list */
1897 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
1900 if_flt_monitor_leave(ifp
);
1901 lck_mtx_unlock(&ifp
->if_flt_lock
);
1903 printf("%s: %s filter detached\n",
1904 if_name(ifp
), filter
->filt_name
);
1908 lck_mtx_unlock(&ifp
->if_flt_lock
);
1912 /* filter parameter is not a valid filter ref */
1918 printf("%s filter detached\n", filter
->filt_name
);
1922 /* Call the detached function if there is one */
1923 if (filter
->filt_detached
)
1924 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
1927 * Decrease filter count and route_generation ID to let TCP
1928 * know it should reevalute doing TSO or not
1930 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1931 OSAddAtomic(-1, &dlil_filter_disable_tso_count
);
1932 routegenid_update();
1935 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_iflt_attach_count
) > 0);
1937 /* Free the filter */
1938 zfree(dlif_filt_zone
, filter
);
1941 if (retval
!= 0 && filter
!= NULL
) {
1942 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1943 filter
->filt_name
, retval
);
1949 __private_extern__
void
1950 dlil_detach_filter(interface_filter_t filter
)
1954 dlil_detach_filter_internal(filter
, 0);
1958 * Main input thread:
1960 * a) handles all inbound packets for lo0
1961 * b) handles all inbound packets for interfaces with no dedicated
1962 * input thread (e.g. anything but Ethernet/PDP or those that support
1963 * opportunistic polling.)
1964 * c) protocol registrations
1965 * d) packet injections
1967 __attribute__((noreturn
))
1969 dlil_main_input_thread_func(void *v
, wait_result_t w
)
1972 struct dlil_main_threading_info
*inpm
= v
;
1973 struct dlil_threading_info
*inp
= v
;
1975 VERIFY(inp
== dlil_main_input_thread
);
1976 VERIFY(inp
->ifp
== NULL
);
1977 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
1980 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
1981 u_int32_t m_cnt
, m_cnt_loop
;
1982 boolean_t proto_req
;
1984 lck_mtx_lock_spin(&inp
->input_lck
);
1986 /* Wait until there is work to be done */
1987 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1988 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1989 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1990 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1993 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1994 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1996 /* Main input thread cannot be terminated */
1997 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_TERMINATE
));
1999 proto_req
= (inp
->input_waiting
&
2000 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
2002 /* Packets for non-dedicated interfaces other than lo0 */
2003 m_cnt
= qlen(&inp
->rcvq_pkts
);
2004 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2006 /* Packets exclusive to lo0 */
2007 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
2008 m_loop
= _getq_all(&inpm
->lo_rcvq_pkts
, NULL
, NULL
, NULL
);
2012 lck_mtx_unlock(&inp
->input_lck
);
2015 * NOTE warning %%% attention !!!!
2016 * We should think about putting some thread starvation
2017 * safeguards if we deal with long chains of packets.
2020 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
2021 m_cnt_loop
, inp
->mode
);
2024 dlil_input_packet_list_extended(NULL
, m
,
2032 VERIFY(0); /* we should never get here */
2036 * Input thread for interfaces with legacy input model.
2039 dlil_input_thread_func(void *v
, wait_result_t w
)
2042 char thread_name
[MAXTHREADNAMESIZE
];
2043 struct dlil_threading_info
*inp
= v
;
2044 struct ifnet
*ifp
= inp
->ifp
;
2046 /* Construct the name for this thread, and then apply it. */
2047 bzero(thread_name
, sizeof(thread_name
));
2048 snprintf(thread_name
, sizeof(thread_name
), "dlil_input_%s", ifp
->if_xname
);
2049 thread_set_thread_name(inp
->input_thr
, thread_name
);
2051 VERIFY(inp
!= dlil_main_input_thread
);
2052 VERIFY(ifp
!= NULL
);
2053 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
);
2054 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
2057 struct mbuf
*m
= NULL
;
2060 lck_mtx_lock_spin(&inp
->input_lck
);
2062 /* Wait until there is work to be done */
2063 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2064 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2065 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2066 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2069 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2070 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2073 * Protocol registration and injection must always use
2074 * the main input thread; in theory the latter can utilize
2075 * the corresponding input thread where the packet arrived
2076 * on, but that requires our knowing the interface in advance
2077 * (and the benefits might not worth the trouble.)
2079 VERIFY(!(inp
->input_waiting
&
2080 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
2082 /* Packets for this interface */
2083 m_cnt
= qlen(&inp
->rcvq_pkts
);
2084 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2086 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2087 lck_mtx_unlock(&inp
->input_lck
);
2089 /* Free up pending packets */
2093 dlil_terminate_input_thread(inp
);
2100 dlil_input_stats_sync(ifp
, inp
);
2102 lck_mtx_unlock(&inp
->input_lck
);
2105 * NOTE warning %%% attention !!!!
2106 * We should think about putting some thread starvation
2107 * safeguards if we deal with long chains of packets.
2110 dlil_input_packet_list_extended(NULL
, m
,
2115 VERIFY(0); /* we should never get here */
2119 * Input thread for interfaces with opportunistic polling input model.
2122 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
2125 struct dlil_threading_info
*inp
= v
;
2126 struct ifnet
*ifp
= inp
->ifp
;
2129 VERIFY(inp
!= dlil_main_input_thread
);
2130 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
));
2133 struct mbuf
*m
= NULL
;
2134 u_int32_t m_cnt
, m_size
, poll_req
= 0;
2136 struct timespec now
, delta
;
2139 lck_mtx_lock_spin(&inp
->input_lck
);
2141 if ((ival
= inp
->rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
)
2142 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2144 /* Link parameters changed? */
2145 if (ifp
->if_poll_update
!= 0) {
2146 ifp
->if_poll_update
= 0;
2147 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
2150 /* Current operating mode */
2153 /* Wait until there is work to be done */
2154 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2155 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2156 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2157 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2160 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2161 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2164 * Protocol registration and injection must always use
2165 * the main input thread; in theory the latter can utilize
2166 * the corresponding input thread where the packet arrived
2167 * on, but that requires our knowing the interface in advance
2168 * (and the benefits might not worth the trouble.)
2170 VERIFY(!(inp
->input_waiting
&
2171 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
2173 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2174 /* Free up pending packets */
2175 lck_mtx_convert_spin(&inp
->input_lck
);
2176 _flushq(&inp
->rcvq_pkts
);
2177 if (inp
->input_mit_tcall
!= NULL
) {
2178 if (thread_call_isactive(inp
->input_mit_tcall
))
2179 thread_call_cancel(inp
->input_mit_tcall
);
2181 lck_mtx_unlock(&inp
->input_lck
);
2183 dlil_terminate_input_thread(inp
);
2188 /* Total count of all packets */
2189 m_cnt
= qlen(&inp
->rcvq_pkts
);
2191 /* Total bytes of all packets */
2192 m_size
= qsize(&inp
->rcvq_pkts
);
2194 /* Packets for this interface */
2195 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2196 VERIFY(m
!= NULL
|| m_cnt
== 0);
2199 if (!net_timerisset(&inp
->sample_lasttime
))
2200 *(&inp
->sample_lasttime
) = *(&now
);
2202 net_timersub(&now
, &inp
->sample_lasttime
, &delta
);
2203 if (if_rxpoll
&& net_timerisset(&inp
->sample_holdtime
)) {
2204 u_int32_t ptot
, btot
;
2206 /* Accumulate statistics for current sampling */
2207 PKTCNTR_ADD(&inp
->sstats
, m_cnt
, m_size
);
2209 if (net_timercmp(&delta
, &inp
->sample_holdtime
, <))
2212 *(&inp
->sample_lasttime
) = *(&now
);
2214 /* Calculate min/max of inbound bytes */
2215 btot
= (u_int32_t
)inp
->sstats
.bytes
;
2216 if (inp
->rxpoll_bmin
== 0 || inp
->rxpoll_bmin
> btot
)
2217 inp
->rxpoll_bmin
= btot
;
2218 if (btot
> inp
->rxpoll_bmax
)
2219 inp
->rxpoll_bmax
= btot
;
2221 /* Calculate EWMA of inbound bytes */
2222 DLIL_EWMA(inp
->rxpoll_bavg
, btot
, if_rxpoll_decay
);
2224 /* Calculate min/max of inbound packets */
2225 ptot
= (u_int32_t
)inp
->sstats
.packets
;
2226 if (inp
->rxpoll_pmin
== 0 || inp
->rxpoll_pmin
> ptot
)
2227 inp
->rxpoll_pmin
= ptot
;
2228 if (ptot
> inp
->rxpoll_pmax
)
2229 inp
->rxpoll_pmax
= ptot
;
2231 /* Calculate EWMA of inbound packets */
2232 DLIL_EWMA(inp
->rxpoll_pavg
, ptot
, if_rxpoll_decay
);
2234 /* Reset sampling statistics */
2235 PKTCNTR_CLEAR(&inp
->sstats
);
2237 /* Calculate EWMA of wakeup requests */
2238 DLIL_EWMA(inp
->rxpoll_wavg
, inp
->wtot
, if_rxpoll_decay
);
2242 if (!net_timerisset(&inp
->dbg_lasttime
))
2243 *(&inp
->dbg_lasttime
) = *(&now
);
2244 net_timersub(&now
, &inp
->dbg_lasttime
, &delta
);
2245 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
2246 *(&inp
->dbg_lasttime
) = *(&now
);
2247 printf("%s: [%s] pkts avg %d max %d "
2248 "limits [%d/%d], wreq avg %d "
2249 "limits [%d/%d], bytes avg %d "
2250 "limits [%d/%d]\n", if_name(ifp
),
2252 IFNET_MODEL_INPUT_POLL_ON
) ?
2253 "ON" : "OFF", inp
->rxpoll_pavg
,
2262 inp
->rxpoll_bhiwat
);
2266 /* Perform mode transition, if necessary */
2267 if (!net_timerisset(&inp
->mode_lasttime
))
2268 *(&inp
->mode_lasttime
) = *(&now
);
2270 net_timersub(&now
, &inp
->mode_lasttime
, &delta
);
2271 if (net_timercmp(&delta
, &inp
->mode_holdtime
, <))
2274 if (inp
->rxpoll_pavg
<= inp
->rxpoll_plowat
&&
2275 inp
->rxpoll_bavg
<= inp
->rxpoll_blowat
&&
2276 inp
->mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2277 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2278 } else if (inp
->rxpoll_pavg
>= inp
->rxpoll_phiwat
&&
2279 (inp
->rxpoll_bavg
>= inp
->rxpoll_bhiwat
||
2280 inp
->rxpoll_wavg
>= inp
->rxpoll_whiwat
) &&
2281 inp
->mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2282 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2285 if (mode
!= inp
->mode
) {
2287 *(&inp
->mode_lasttime
) = *(&now
);
2292 dlil_input_stats_sync(ifp
, inp
);
2294 lck_mtx_unlock(&inp
->input_lck
);
2297 * If there's a mode change and interface is still attached,
2298 * perform a downcall to the driver for the new mode. Also
2299 * hold an IO refcnt on the interface to prevent it from
2300 * being detached (will be release below.)
2302 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2303 struct ifnet_model_params p
= { mode
, { 0 } };
2307 printf("%s: polling is now %s, "
2308 "pkts avg %d max %d limits [%d/%d], "
2309 "wreq avg %d limits [%d/%d], "
2310 "bytes avg %d limits [%d/%d]\n",
2312 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2313 "ON" : "OFF", inp
->rxpoll_pavg
,
2314 inp
->rxpoll_pmax
, inp
->rxpoll_plowat
,
2315 inp
->rxpoll_phiwat
, inp
->rxpoll_wavg
,
2316 inp
->rxpoll_wlowat
, inp
->rxpoll_whiwat
,
2317 inp
->rxpoll_bavg
, inp
->rxpoll_blowat
,
2318 inp
->rxpoll_bhiwat
);
2321 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2322 IFNET_CTL_SET_INPUT_MODEL
, sizeof (p
), &p
))) != 0) {
2323 printf("%s: error setting polling mode "
2324 "to %s (%d)\n", if_name(ifp
),
2325 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2330 case IFNET_MODEL_INPUT_POLL_OFF
:
2331 ifnet_set_poll_cycle(ifp
, NULL
);
2332 inp
->rxpoll_offreq
++;
2334 inp
->rxpoll_offerr
++;
2337 case IFNET_MODEL_INPUT_POLL_ON
:
2338 net_nsectimer(&ival
, &ts
);
2339 ifnet_set_poll_cycle(ifp
, &ts
);
2341 inp
->rxpoll_onreq
++;
2343 inp
->rxpoll_onerr
++;
2351 /* Release the IO refcnt */
2352 ifnet_decr_iorefcnt(ifp
);
2356 * NOTE warning %%% attention !!!!
2357 * We should think about putting some thread starvation
2358 * safeguards if we deal with long chains of packets.
2361 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2365 VERIFY(0); /* we should never get here */
2369 * Must be called on an attached ifnet (caller is expected to check.)
2370 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2373 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2376 struct dlil_threading_info
*inp
;
2377 u_int64_t sample_holdtime
, inbw
;
2379 VERIFY(ifp
!= NULL
);
2380 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2384 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2385 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0))
2387 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2388 p
->packets_lowat
>= p
->packets_hiwat
)
2390 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2391 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0))
2393 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2394 p
->bytes_lowat
>= p
->bytes_hiwat
)
2396 if (p
->interval_time
!= 0 &&
2397 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
)
2398 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2402 lck_mtx_lock(&inp
->input_lck
);
2404 LCK_MTX_ASSERT(&inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
2407 * Normally, we'd reset the parameters to the auto-tuned values
2408 * if the the input thread detects a change in link rate. If the
2409 * driver provides its own parameters right after a link rate
2410 * changes, but before the input thread gets to run, we want to
2411 * make sure to keep the driver's values. Clearing if_poll_update
2412 * will achieve that.
2414 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0)
2415 ifp
->if_poll_update
= 0;
2417 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2418 sample_holdtime
= 0; /* polling is disabled */
2419 inp
->rxpoll_wlowat
= inp
->rxpoll_plowat
=
2420 inp
->rxpoll_blowat
= 0;
2421 inp
->rxpoll_whiwat
= inp
->rxpoll_phiwat
=
2422 inp
->rxpoll_bhiwat
= (u_int32_t
)-1;
2423 inp
->rxpoll_plim
= 0;
2424 inp
->rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2426 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2430 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2431 if (inbw
< rxpoll_tbl
[i
].speed
)
2435 /* auto-tune if caller didn't specify a value */
2436 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2437 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2438 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2439 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2440 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2441 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2442 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2443 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2444 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2445 if_rxpoll_max
: p
->packets_limit
);
2446 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2447 if_rxpoll_interval_time
: p
->interval_time
);
2449 VERIFY(plowat
!= 0 && phiwat
!= 0);
2450 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2451 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2453 sample_holdtime
= if_rxpoll_sample_holdtime
;
2454 inp
->rxpoll_wlowat
= if_rxpoll_wlowat
;
2455 inp
->rxpoll_whiwat
= if_rxpoll_whiwat
;
2456 inp
->rxpoll_plowat
= plowat
;
2457 inp
->rxpoll_phiwat
= phiwat
;
2458 inp
->rxpoll_blowat
= blowat
;
2459 inp
->rxpoll_bhiwat
= bhiwat
;
2460 inp
->rxpoll_plim
= plim
;
2461 inp
->rxpoll_ival
= ival
;
2464 net_nsectimer(&if_rxpoll_mode_holdtime
, &inp
->mode_holdtime
);
2465 net_nsectimer(&sample_holdtime
, &inp
->sample_holdtime
);
2468 printf("%s: speed %llu bps, sample per %llu nsec, "
2469 "poll interval %llu nsec, pkts per poll %u, "
2470 "pkt limits [%u/%u], wreq limits [%u/%u], "
2471 "bytes limits [%u/%u]\n", if_name(ifp
),
2472 inbw
, sample_holdtime
, inp
->rxpoll_ival
, inp
->rxpoll_plim
,
2473 inp
->rxpoll_plowat
, inp
->rxpoll_phiwat
, inp
->rxpoll_wlowat
,
2474 inp
->rxpoll_whiwat
, inp
->rxpoll_blowat
, inp
->rxpoll_bhiwat
);
2478 lck_mtx_unlock(&inp
->input_lck
);
2484 * Must be called on an attached ifnet (caller is expected to check.)
2487 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2489 struct dlil_threading_info
*inp
;
2491 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2492 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2495 bzero(p
, sizeof (*p
));
2497 lck_mtx_lock(&inp
->input_lck
);
2498 p
->packets_limit
= inp
->rxpoll_plim
;
2499 p
->packets_lowat
= inp
->rxpoll_plowat
;
2500 p
->packets_hiwat
= inp
->rxpoll_phiwat
;
2501 p
->bytes_lowat
= inp
->rxpoll_blowat
;
2502 p
->bytes_hiwat
= inp
->rxpoll_bhiwat
;
2503 p
->interval_time
= inp
->rxpoll_ival
;
2504 lck_mtx_unlock(&inp
->input_lck
);
2510 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2511 const struct ifnet_stat_increment_param
*s
)
2513 return (ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
));
2517 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2518 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2520 return (ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
));
2524 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2525 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2527 dlil_input_func input_func
;
2528 struct ifnet_stat_increment_param _s
;
2529 u_int32_t m_cnt
= 0, m_size
= 0;
2533 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2535 mbuf_freem_list(m_head
);
2539 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2540 VERIFY(m_tail
== NULL
|| ext
);
2541 VERIFY(s
!= NULL
|| !ext
);
2544 * Drop the packet(s) if the parameters are invalid, or if the
2545 * interface is no longer attached; else hold an IO refcnt to
2546 * prevent it from being detached (will be released below.)
2548 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_is_attached(ifp
, 1))) {
2550 mbuf_freem_list(m_head
);
2554 input_func
= ifp
->if_input_dlil
;
2555 VERIFY(input_func
!= NULL
);
2557 if (m_tail
== NULL
) {
2559 while (m_head
!= NULL
) {
2560 #if IFNET_INPUT_SANITY_CHK
2561 if (dlil_input_sanity_check
!= 0)
2562 DLIL_INPUT_CHECK(last
, ifp
);
2563 #endif /* IFNET_INPUT_SANITY_CHK */
2565 m_size
+= m_length(last
);
2566 if (mbuf_nextpkt(last
) == NULL
)
2568 last
= mbuf_nextpkt(last
);
2572 #if IFNET_INPUT_SANITY_CHK
2573 if (dlil_input_sanity_check
!= 0) {
2576 DLIL_INPUT_CHECK(last
, ifp
);
2578 m_size
+= m_length(last
);
2579 if (mbuf_nextpkt(last
) == NULL
)
2581 last
= mbuf_nextpkt(last
);
2584 m_cnt
= s
->packets_in
;
2585 m_size
= s
->bytes_in
;
2589 m_cnt
= s
->packets_in
;
2590 m_size
= s
->bytes_in
;
2592 #endif /* IFNET_INPUT_SANITY_CHK */
2595 if (last
!= m_tail
) {
2596 panic_plain("%s: invalid input packet chain for %s, "
2597 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2602 * Assert packet count only for the extended variant, for backwards
2603 * compatibility, since this came directly from the device driver.
2604 * Relax this assertion for input bytes, as the driver may have
2605 * included the link-layer headers in the computation; hence
2606 * m_size is just an approximation.
2608 if (ext
&& s
->packets_in
!= m_cnt
) {
2609 panic_plain("%s: input packet count mismatch for %s, "
2610 "%d instead of %d\n", __func__
, if_name(ifp
),
2611 s
->packets_in
, m_cnt
);
2615 bzero(&_s
, sizeof (_s
));
2620 _s
.packets_in
= m_cnt
;
2621 _s
.bytes_in
= m_size
;
2623 err
= (*input_func
)(ifp
, m_head
, m_tail
, s
, poll
, current_thread());
2625 if (ifp
!= lo_ifp
) {
2626 /* Release the IO refcnt */
2627 ifnet_decr_iorefcnt(ifp
);
2635 dlil_output_handler(struct ifnet
*ifp
, struct mbuf
*m
)
2637 return (ifp
->if_output(ifp
, m
));
2641 dlil_input_handler(struct ifnet
*ifp
, struct mbuf
*m_head
,
2642 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
2643 boolean_t poll
, struct thread
*tp
)
2645 struct dlil_threading_info
*inp
;
2646 u_int32_t m_cnt
= s
->packets_in
;
2647 u_int32_t m_size
= s
->bytes_in
;
2649 if ((inp
= ifp
->if_inp
) == NULL
)
2650 inp
= dlil_main_input_thread
;
2653 * If there is a matching DLIL input thread associated with an
2654 * affinity set, associate this thread with the same set. We
2655 * will only do this once.
2657 lck_mtx_lock_spin(&inp
->input_lck
);
2658 if (inp
!= dlil_main_input_thread
&& inp
->net_affinity
&& tp
!= NULL
&&
2659 ((!poll
&& inp
->wloop_thr
== THREAD_NULL
) ||
2660 (poll
&& inp
->poll_thr
== THREAD_NULL
))) {
2661 u_int32_t tag
= inp
->tag
;
2664 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2667 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2668 inp
->wloop_thr
= tp
;
2670 lck_mtx_unlock(&inp
->input_lck
);
2672 /* Associate the current thread with the new affinity tag */
2673 (void) dlil_affinity_set(tp
, tag
);
2676 * Take a reference on the current thread; during detach,
2677 * we will need to refer to it in order to tear down its
2680 thread_reference(tp
);
2681 lck_mtx_lock_spin(&inp
->input_lck
);
2684 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
2687 * Because of loopbacked multicast we cannot stuff the ifp in
2688 * the rcvif of the packet header: loopback (lo0) packets use a
2689 * dedicated list so that we can later associate them with lo_ifp
2690 * on their way up the stack. Packets for other interfaces without
2691 * dedicated input threads go to the regular list.
2693 if (m_head
!= NULL
) {
2694 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
2695 struct dlil_main_threading_info
*inpm
=
2696 (struct dlil_main_threading_info
*)inp
;
2697 _addq_multi(&inpm
->lo_rcvq_pkts
, m_head
, m_tail
,
2700 _addq_multi(&inp
->rcvq_pkts
, m_head
, m_tail
,
2705 #if IFNET_INPUT_SANITY_CHK
2706 if (dlil_input_sanity_check
!= 0) {
2710 for (m0
= m_head
, count
= 0; m0
; m0
= mbuf_nextpkt(m0
))
2713 if (count
!= m_cnt
) {
2714 panic_plain("%s: invalid packet count %d "
2715 "(expected %d)\n", if_name(ifp
),
2720 inp
->input_mbuf_cnt
+= m_cnt
;
2722 #endif /* IFNET_INPUT_SANITY_CHK */
2724 dlil_input_stats_add(s
, inp
, poll
);
2726 * If we're using the main input thread, synchronize the
2727 * stats now since we have the interface context. All
2728 * other cases involving dedicated input threads will
2729 * have their stats synchronized there.
2731 if (inp
== dlil_main_input_thread
)
2732 dlil_input_stats_sync(ifp
, inp
);
2734 if (inp
->input_mit_tcall
&&
2735 qlen(&inp
->rcvq_pkts
) >= dlil_rcv_mit_pkts_min
&&
2736 qlen(&inp
->rcvq_pkts
) < dlil_rcv_mit_pkts_max
&&
2737 (ifp
->if_family
== IFNET_FAMILY_ETHERNET
||
2738 ifp
->if_type
== IFT_CELLULAR
)
2740 if (!thread_call_isactive(inp
->input_mit_tcall
)) {
2742 clock_interval_to_deadline(dlil_rcv_mit_interval
,
2744 (void) thread_call_enter_delayed(
2745 inp
->input_mit_tcall
, deadline
);
2748 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
2749 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
2751 wakeup_one((caddr_t
)&inp
->input_waiting
);
2754 lck_mtx_unlock(&inp
->input_lck
);
2761 ifnet_start_common(struct ifnet
*ifp
, boolean_t resetfc
)
2763 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2766 * If the starter thread is inactive, signal it to do work,
2767 * unless the interface is being flow controlled from below,
2768 * e.g. a virtual interface being flow controlled by a real
2769 * network interface beneath it, or it's been disabled via
2770 * a call to ifnet_disable_output().
2772 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2774 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
2775 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
2776 lck_mtx_unlock(&ifp
->if_start_lock
);
2779 ifp
->if_start_req
++;
2780 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
&&
2781 (resetfc
|| !(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) ||
2782 IFCQ_LEN(&ifp
->if_snd
) >= ifp
->if_start_delay_qlen
||
2783 ifp
->if_start_delayed
== 0)) {
2784 (void) thread_wakeup_thread((caddr_t
)&ifp
->if_start_thread
,
2785 ifp
->if_start_thread
);
2787 lck_mtx_unlock(&ifp
->if_start_lock
);
2791 ifnet_start(struct ifnet
*ifp
)
2793 ifnet_start_common(ifp
, FALSE
);
2797 ifnet_start_thread_fn(void *v
, wait_result_t w
)
2800 struct ifnet
*ifp
= v
;
2801 char ifname
[IFNAMSIZ
+ 1];
2802 char thread_name
[MAXTHREADNAMESIZE
];
2803 struct timespec
*ts
= NULL
;
2804 struct ifclassq
*ifq
= &ifp
->if_snd
;
2805 struct timespec delay_start_ts
;
2807 /* Construct the name for this thread, and then apply it. */
2808 bzero(thread_name
, sizeof(thread_name
));
2809 (void) snprintf(thread_name
, sizeof (thread_name
),
2810 "ifnet_start_%s", ifp
->if_xname
);
2811 thread_set_thread_name(ifp
->if_start_thread
, thread_name
);
2814 * Treat the dedicated starter thread for lo0 as equivalent to
2815 * the driver workloop thread; if net_affinity is enabled for
2816 * the main input thread, associate this starter thread to it
2817 * by binding them with the same affinity tag. This is done
2818 * only once (as we only have one lo_ifp which never goes away.)
2820 if (ifp
== lo_ifp
) {
2821 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
2822 struct thread
*tp
= current_thread();
2824 lck_mtx_lock(&inp
->input_lck
);
2825 if (inp
->net_affinity
) {
2826 u_int32_t tag
= inp
->tag
;
2828 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2829 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2830 inp
->wloop_thr
= tp
;
2831 lck_mtx_unlock(&inp
->input_lck
);
2833 /* Associate this thread with the affinity tag */
2834 (void) dlil_affinity_set(tp
, tag
);
2836 lck_mtx_unlock(&inp
->input_lck
);
2840 (void) snprintf(ifname
, sizeof (ifname
), "%s_starter", if_name(ifp
));
2842 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2845 if (ifp
->if_start_thread
!= NULL
) {
2846 (void) msleep(&ifp
->if_start_thread
,
2847 &ifp
->if_start_lock
,
2848 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2850 /* interface is detached? */
2851 if (ifp
->if_start_thread
== THREAD_NULL
) {
2852 ifnet_set_start_cycle(ifp
, NULL
);
2853 lck_mtx_unlock(&ifp
->if_start_lock
);
2857 printf("%s: starter thread terminated\n",
2861 /* for the extra refcnt from kernel_thread_start() */
2862 thread_deallocate(current_thread());
2863 /* this is the end */
2864 thread_terminate(current_thread());
2869 ifp
->if_start_active
= 1;
2872 u_int32_t req
= ifp
->if_start_req
;
2873 if (!IFCQ_IS_EMPTY(ifq
) &&
2874 (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
2875 ifp
->if_start_delayed
== 0 &&
2876 IFCQ_LEN(ifq
) < ifp
->if_start_delay_qlen
&&
2877 (ifp
->if_eflags
& IFEF_DELAY_START
)) {
2878 ifp
->if_start_delayed
= 1;
2879 ifnet_start_delayed
++;
2882 ifp
->if_start_delayed
= 0;
2884 lck_mtx_unlock(&ifp
->if_start_lock
);
2887 * If no longer attached, don't call start because ifp
2888 * is being destroyed; else hold an IO refcnt to
2889 * prevent the interface from being detached (will be
2892 if (!ifnet_is_attached(ifp
, 1)) {
2893 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2897 /* invoke the driver's start routine */
2898 ((*ifp
->if_start
)(ifp
));
2901 * Release the io ref count taken by ifnet_is_attached.
2903 ifnet_decr_iorefcnt(ifp
);
2905 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2908 * If there's no pending request or if the
2909 * interface has been disabled, we're done.
2911 if (req
== ifp
->if_start_req
||
2912 (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
)) {
2917 ifp
->if_start_req
= 0;
2918 ifp
->if_start_active
= 0;
2921 * Wakeup N ns from now if rate-controlled by TBR, and if
2922 * there are still packets in the send queue which haven't
2923 * been dequeued so far; else sleep indefinitely (ts = NULL)
2924 * until ifnet_start() is called again.
2926 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
2927 &ifp
->if_start_cycle
: NULL
);
2929 if (ts
== NULL
&& ifp
->if_start_delayed
== 1) {
2930 delay_start_ts
.tv_sec
= 0;
2931 delay_start_ts
.tv_nsec
= ifp
->if_start_delay_timeout
;
2932 ts
= &delay_start_ts
;
2935 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
2943 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
2946 bzero(&ifp
->if_start_cycle
, sizeof (ifp
->if_start_cycle
));
2948 *(&ifp
->if_start_cycle
) = *ts
;
2950 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
2951 printf("%s: restart interval set to %lu nsec\n",
2952 if_name(ifp
), ts
->tv_nsec
);
2956 ifnet_poll(struct ifnet
*ifp
)
2959 * If the poller thread is inactive, signal it to do work.
2961 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2963 if (!ifp
->if_poll_active
&& ifp
->if_poll_thread
!= THREAD_NULL
) {
2964 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
2966 lck_mtx_unlock(&ifp
->if_poll_lock
);
2970 ifnet_poll_thread_fn(void *v
, wait_result_t w
)
2973 struct dlil_threading_info
*inp
;
2974 struct ifnet
*ifp
= v
;
2975 char ifname
[IFNAMSIZ
+ 1];
2976 struct timespec
*ts
= NULL
;
2977 struct ifnet_stat_increment_param s
;
2979 snprintf(ifname
, sizeof (ifname
), "%s_poller",
2981 bzero(&s
, sizeof (s
));
2983 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2986 VERIFY(inp
!= NULL
);
2989 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
2990 (void) msleep(&ifp
->if_poll_thread
, &ifp
->if_poll_lock
,
2991 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2994 /* interface is detached (maybe while asleep)? */
2995 if (ifp
->if_poll_thread
== THREAD_NULL
) {
2996 ifnet_set_poll_cycle(ifp
, NULL
);
2997 lck_mtx_unlock(&ifp
->if_poll_lock
);
3000 printf("%s: poller thread terminated\n",
3004 /* for the extra refcnt from kernel_thread_start() */
3005 thread_deallocate(current_thread());
3006 /* this is the end */
3007 thread_terminate(current_thread());
3012 ifp
->if_poll_active
= 1;
3014 struct mbuf
*m_head
, *m_tail
;
3015 u_int32_t m_lim
, m_cnt
, m_totlen
;
3016 u_int16_t req
= ifp
->if_poll_req
;
3018 lck_mtx_unlock(&ifp
->if_poll_lock
);
3021 * If no longer attached, there's nothing to do;
3022 * else hold an IO refcnt to prevent the interface
3023 * from being detached (will be released below.)
3025 if (!ifnet_is_attached(ifp
, 1)) {
3026 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3030 m_lim
= (inp
->rxpoll_plim
!= 0) ? inp
->rxpoll_plim
:
3031 MAX((qlimit(&inp
->rcvq_pkts
)),
3032 (inp
->rxpoll_phiwat
<< 2));
3034 if (dlil_verbose
> 1) {
3035 printf("%s: polling up to %d pkts, "
3036 "pkts avg %d max %d, wreq avg %d, "
3038 if_name(ifp
), m_lim
,
3039 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
3040 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
3043 /* invoke the driver's input poll routine */
3044 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
3045 &m_cnt
, &m_totlen
));
3047 if (m_head
!= NULL
) {
3048 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
3050 if (dlil_verbose
> 1) {
3051 printf("%s: polled %d pkts, "
3052 "pkts avg %d max %d, wreq avg %d, "
3054 if_name(ifp
), m_cnt
,
3055 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
3056 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
3059 /* stats are required for extended variant */
3060 s
.packets_in
= m_cnt
;
3061 s
.bytes_in
= m_totlen
;
3063 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
3066 if (dlil_verbose
> 1) {
3067 printf("%s: no packets, "
3068 "pkts avg %d max %d, wreq avg %d, "
3070 if_name(ifp
), inp
->rxpoll_pavg
,
3071 inp
->rxpoll_pmax
, inp
->rxpoll_wavg
,
3075 (void) ifnet_input_common(ifp
, NULL
, NULL
,
3079 /* Release the io ref count */
3080 ifnet_decr_iorefcnt(ifp
);
3082 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3084 /* if there's no pending request, we're done */
3085 if (req
== ifp
->if_poll_req
) {
3089 ifp
->if_poll_req
= 0;
3090 ifp
->if_poll_active
= 0;
3093 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3094 * until ifnet_poll() is called again.
3096 ts
= &ifp
->if_poll_cycle
;
3097 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
3105 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3108 bzero(&ifp
->if_poll_cycle
, sizeof (ifp
->if_poll_cycle
));
3110 *(&ifp
->if_poll_cycle
) = *ts
;
3112 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
3113 printf("%s: poll interval set to %lu nsec\n",
3114 if_name(ifp
), ts
->tv_nsec
);
3118 ifnet_purge(struct ifnet
*ifp
)
3120 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
))
3125 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
3127 IFCQ_LOCK_ASSERT_HELD(ifq
);
3129 if (!(IFCQ_IS_READY(ifq
)))
3132 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
3133 struct tb_profile tb
= { ifq
->ifcq_tbr
.tbr_rate_raw
,
3134 ifq
->ifcq_tbr
.tbr_percent
, 0 };
3135 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
3138 ifclassq_update(ifq
, ev
);
3142 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
3145 case CLASSQ_EV_LINK_BANDWIDTH
:
3146 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
))
3147 ifp
->if_poll_update
++;
3156 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
3158 struct ifclassq
*ifq
;
3162 if (ifp
== NULL
|| model
>= IFNET_SCHED_MODEL_MAX
)
3164 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3169 omodel
= ifp
->if_output_sched_model
;
3170 ifp
->if_output_sched_model
= model
;
3171 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0)
3172 ifp
->if_output_sched_model
= omodel
;
3179 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3183 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3186 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
3192 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3194 if (ifp
== NULL
|| maxqlen
== NULL
)
3196 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3199 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
3205 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
3209 if (ifp
== NULL
|| pkts
== NULL
)
3211 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3214 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
3221 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3222 u_int32_t
*pkts
, u_int32_t
*bytes
)
3226 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
3227 (pkts
== NULL
&& bytes
== NULL
))
3229 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3232 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
3238 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3240 struct dlil_threading_info
*inp
;
3244 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3248 maxqlen
= if_rcvq_maxlen
;
3249 else if (maxqlen
< IF_RCVQ_MINLEN
)
3250 maxqlen
= IF_RCVQ_MINLEN
;
3253 lck_mtx_lock(&inp
->input_lck
);
3254 qlimit(&inp
->rcvq_pkts
) = maxqlen
;
3255 lck_mtx_unlock(&inp
->input_lck
);
3261 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3263 struct dlil_threading_info
*inp
;
3265 if (ifp
== NULL
|| maxqlen
== NULL
)
3267 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3271 lck_mtx_lock(&inp
->input_lck
);
3272 *maxqlen
= qlimit(&inp
->rcvq_pkts
);
3273 lck_mtx_unlock(&inp
->input_lck
);
3278 ifnet_enqueue_multi_setup(struct ifnet
*ifp
, uint16_t delay_qlen
,
3279 uint16_t delay_timeout
)
3281 if (delay_qlen
> 0 && delay_timeout
> 0) {
3282 ifp
->if_eflags
|= IFEF_ENQUEUE_MULTI
;
3283 ifp
->if_start_delay_qlen
= min(100, delay_qlen
);
3284 ifp
->if_start_delay_timeout
= min(20000, delay_timeout
);
3285 /* convert timeout to nanoseconds */
3286 ifp
->if_start_delay_timeout
*= 1000;
3287 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3288 ifp
->if_xname
, (uint32_t)delay_qlen
,
3289 (uint32_t)delay_timeout
);
3291 ifp
->if_eflags
&= ~IFEF_ENQUEUE_MULTI
;
3295 static inline errno_t
3296 ifnet_enqueue_common(struct ifnet
*ifp
, void *p
, classq_pkt_type_t ptype
,
3297 boolean_t flush
, boolean_t
*pdrop
)
3299 volatile uint64_t *fg_ts
= NULL
;
3300 volatile uint64_t *rt_ts
= NULL
;
3302 struct timespec now
;
3303 u_int64_t now_nsec
= 0;
3306 ASSERT(ifp
->if_eflags
& IFEF_TXSTART
);
3309 * If packet already carries a timestamp, either from dlil_output()
3310 * or from flowswitch, use it here. Otherwise, record timestamp.
3311 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3312 * the timestamp value is used internally there.
3316 ASSERT(m
->m_flags
& M_PKTHDR
);
3317 ASSERT(m
->m_nextpkt
== NULL
);
3319 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_TS_VALID
) ||
3320 m
->m_pkthdr
.pkt_timestamp
== 0) {
3322 net_timernsec(&now
, &now_nsec
);
3323 m
->m_pkthdr
.pkt_timestamp
= now_nsec
;
3325 m
->m_pkthdr
.pkt_flags
&= ~PKTF_TS_VALID
;
3327 * If the packet service class is not background,
3328 * update the timestamp to indicate recent activity
3329 * on a foreground socket.
3331 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
3332 m
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
) {
3333 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_SO_BACKGROUND
)) {
3334 ifp
->if_fg_sendts
= _net_uptime
;
3336 *fg_ts
= _net_uptime
;
3338 if (m
->m_pkthdr
.pkt_flags
& PKTF_SO_REALTIME
) {
3339 ifp
->if_rt_sendts
= _net_uptime
;
3341 *rt_ts
= _net_uptime
;
3352 if (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) {
3353 if (now_nsec
== 0) {
3355 net_timernsec(&now
, &now_nsec
);
3358 * If the driver chose to delay start callback for
3359 * coalescing multiple packets, Then use the following
3360 * heuristics to make sure that start callback will
3361 * be delayed only when bulk data transfer is detected.
3362 * 1. number of packets enqueued in (delay_win * 2) is
3363 * greater than or equal to the delay qlen.
3364 * 2. If delay_start is enabled it will stay enabled for
3365 * another 10 idle windows. This is to take into account
3366 * variable RTT and burst traffic.
3367 * 3. If the time elapsed since last enqueue is more
3368 * than 200ms we disable delaying start callback. This is
3369 * is to take idle time into account.
3371 u_int64_t dwin
= (ifp
->if_start_delay_timeout
<< 1);
3372 if (ifp
->if_start_delay_swin
> 0) {
3373 if ((ifp
->if_start_delay_swin
+ dwin
) > now_nsec
) {
3374 ifp
->if_start_delay_cnt
++;
3375 } else if ((now_nsec
- ifp
->if_start_delay_swin
)
3376 >= (200 * 1000 * 1000)) {
3377 ifp
->if_start_delay_swin
= now_nsec
;
3378 ifp
->if_start_delay_cnt
= 1;
3379 ifp
->if_start_delay_idle
= 0;
3380 if (ifp
->if_eflags
& IFEF_DELAY_START
) {
3382 ~(IFEF_DELAY_START
);
3383 ifnet_delay_start_disabled
++;
3386 if (ifp
->if_start_delay_cnt
>=
3387 ifp
->if_start_delay_qlen
) {
3388 ifp
->if_eflags
|= IFEF_DELAY_START
;
3389 ifp
->if_start_delay_idle
= 0;
3391 if (ifp
->if_start_delay_idle
>= 10) {
3392 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3393 ifnet_delay_start_disabled
++;
3395 ifp
->if_start_delay_idle
++;
3398 ifp
->if_start_delay_swin
= now_nsec
;
3399 ifp
->if_start_delay_cnt
= 1;
3402 ifp
->if_start_delay_swin
= now_nsec
;
3403 ifp
->if_start_delay_cnt
= 1;
3404 ifp
->if_start_delay_idle
= 0;
3405 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3408 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3413 /* enqueue the packet (caller consumes object) */
3414 error
= ifclassq_enqueue(&ifp
->if_snd
, m
, QP_MBUF
, pdrop
);
3424 * Tell the driver to start dequeueing; do this even when the queue
3425 * for the packet is suspended (EQSUSPENDED), as the driver could still
3426 * be dequeueing from other unsuspended queues.
3428 if (!(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3429 ((error
== 0 && flush
) || error
== EQFULL
|| error
== EQSUSPENDED
))
3436 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
3439 return (ifnet_enqueue_mbuf(ifp
, m
, TRUE
, &pdrop
));
3443 ifnet_enqueue_mbuf(struct ifnet
*ifp
, struct mbuf
*m
, boolean_t flush
,
3446 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
3447 m
->m_nextpkt
!= NULL
) {
3453 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3454 !IF_FULLY_ATTACHED(ifp
)) {
3455 /* flag tested without lock for performance */
3459 } else if (!(ifp
->if_flags
& IFF_UP
)) {
3465 return (ifnet_enqueue_common(ifp
, m
, QP_MBUF
, flush
, pdrop
));
3470 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
3473 classq_pkt_type_t ptype
;
3474 if (ifp
== NULL
|| mp
== NULL
)
3476 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3477 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3479 if (!ifnet_is_attached(ifp
, 1))
3482 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
3483 (void **)mp
, NULL
, NULL
, NULL
, &ptype
);
3484 VERIFY((*mp
== NULL
) || (ptype
== QP_MBUF
));
3485 ifnet_decr_iorefcnt(ifp
);
3491 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3495 classq_pkt_type_t ptype
;
3496 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
))
3498 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3499 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3501 if (!ifnet_is_attached(ifp
, 1))
3504 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1,
3505 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)mp
, NULL
, NULL
,
3507 VERIFY((*mp
== NULL
) || (ptype
== QP_MBUF
));
3508 ifnet_decr_iorefcnt(ifp
);
3513 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t pkt_limit
,
3514 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3517 classq_pkt_type_t ptype
;
3518 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1)
3520 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3521 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3523 if (!ifnet_is_attached(ifp
, 1))
3526 rc
= ifclassq_dequeue(&ifp
->if_snd
, pkt_limit
,
3527 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)head
, (void **)tail
, cnt
,
3529 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3530 ifnet_decr_iorefcnt(ifp
);
3535 ifnet_dequeue_multi_bytes(struct ifnet
*ifp
, u_int32_t byte_limit
,
3536 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3539 classq_pkt_type_t ptype
;
3540 if (ifp
== NULL
|| head
== NULL
|| byte_limit
< 1)
3542 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3543 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3545 if (!ifnet_is_attached(ifp
, 1))
3548 rc
= ifclassq_dequeue(&ifp
->if_snd
, CLASSQ_DEQUEUE_MAX_PKT_LIMIT
,
3549 byte_limit
, (void **)head
, (void **)tail
, cnt
, len
, &ptype
);
3550 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3551 ifnet_decr_iorefcnt(ifp
);
3556 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3557 u_int32_t pkt_limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
3561 classq_pkt_type_t ptype
;
3562 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1 ||
3565 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3566 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3568 if (!ifnet_is_attached(ifp
, 1))
3571 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, pkt_limit
,
3572 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)head
,
3573 (void **)tail
, cnt
, len
, &ptype
);
3574 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3575 ifnet_decr_iorefcnt(ifp
);
3579 #if !CONFIG_EMBEDDED
3581 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
3582 const struct sockaddr
*dest
, const char *dest_linkaddr
,
3583 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
3590 return (ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
));
3592 #endif /* !CONFIG_EMBEDDED */
3595 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
3596 char **frame_header_p
, protocol_family_t protocol_family
)
3598 struct ifnet_filter
*filter
;
3601 * Pass the inbound packet to the interface filters
3603 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3604 /* prevent filter list from changing in case we drop the lock */
3605 if_flt_monitor_busy(ifp
);
3606 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3609 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
3610 (filter
->filt_protocol
== 0 ||
3611 filter
->filt_protocol
== protocol_family
)) {
3612 lck_mtx_unlock(&ifp
->if_flt_lock
);
3614 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
3615 ifp
, protocol_family
, m_p
, frame_header_p
);
3617 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3619 /* we're done with the filter list */
3620 if_flt_monitor_unbusy(ifp
);
3621 lck_mtx_unlock(&ifp
->if_flt_lock
);
3626 /* we're done with the filter list */
3627 if_flt_monitor_unbusy(ifp
);
3628 lck_mtx_unlock(&ifp
->if_flt_lock
);
3631 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3632 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3635 (*m_p
)->m_flags
&= ~M_PROTO1
;
3641 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
3642 protocol_family_t protocol_family
)
3644 struct ifnet_filter
*filter
;
3647 * Pass the outbound packet to the interface filters
3649 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3650 /* prevent filter list from changing in case we drop the lock */
3651 if_flt_monitor_busy(ifp
);
3652 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3655 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
3656 (filter
->filt_protocol
== 0 ||
3657 filter
->filt_protocol
== protocol_family
)) {
3658 lck_mtx_unlock(&ifp
->if_flt_lock
);
3660 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
3661 protocol_family
, m_p
);
3663 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3665 /* we're done with the filter list */
3666 if_flt_monitor_unbusy(ifp
);
3667 lck_mtx_unlock(&ifp
->if_flt_lock
);
3672 /* we're done with the filter list */
3673 if_flt_monitor_unbusy(ifp
);
3674 lck_mtx_unlock(&ifp
->if_flt_lock
);
3680 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
3684 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
3685 /* Version 1 protocols get one packet at a time */
3687 char * frame_header
;
3690 next_packet
= m
->m_nextpkt
;
3691 m
->m_nextpkt
= NULL
;
3692 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3693 m
->m_pkthdr
.pkt_hdr
= NULL
;
3694 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
3695 ifproto
->protocol_family
, m
, frame_header
);
3696 if (error
!= 0 && error
!= EJUSTRETURN
)
3700 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
3701 /* Version 2 protocols support packet lists */
3702 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
3703 ifproto
->protocol_family
, m
);
3704 if (error
!= 0 && error
!= EJUSTRETURN
)
3710 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
3711 struct dlil_threading_info
*inp
, boolean_t poll
)
3713 struct ifnet_stat_increment_param
*d
= &inp
->stats
;
3715 if (s
->packets_in
!= 0)
3716 d
->packets_in
+= s
->packets_in
;
3717 if (s
->bytes_in
!= 0)
3718 d
->bytes_in
+= s
->bytes_in
;
3719 if (s
->errors_in
!= 0)
3720 d
->errors_in
+= s
->errors_in
;
3722 if (s
->packets_out
!= 0)
3723 d
->packets_out
+= s
->packets_out
;
3724 if (s
->bytes_out
!= 0)
3725 d
->bytes_out
+= s
->bytes_out
;
3726 if (s
->errors_out
!= 0)
3727 d
->errors_out
+= s
->errors_out
;
3729 if (s
->collisions
!= 0)
3730 d
->collisions
+= s
->collisions
;
3731 if (s
->dropped
!= 0)
3732 d
->dropped
+= s
->dropped
;
3735 PKTCNTR_ADD(&inp
->tstats
, s
->packets_in
, s
->bytes_in
);
3739 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
3741 struct ifnet_stat_increment_param
*s
= &inp
->stats
;
3744 * Use of atomic operations is unavoidable here because
3745 * these stats may also be incremented elsewhere via KPIs.
3747 if (s
->packets_in
!= 0) {
3748 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
3751 if (s
->bytes_in
!= 0) {
3752 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
3755 if (s
->errors_in
!= 0) {
3756 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
3760 if (s
->packets_out
!= 0) {
3761 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
3764 if (s
->bytes_out
!= 0) {
3765 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
3768 if (s
->errors_out
!= 0) {
3769 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
3773 if (s
->collisions
!= 0) {
3774 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
3777 if (s
->dropped
!= 0) {
3778 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
3782 if (ifp
->if_data_threshold
!= 0) {
3783 lck_mtx_convert_spin(&inp
->input_lck
);
3784 ifnet_notify_data_threshold(ifp
);
3788 * No need for atomic operations as they are modified here
3789 * only from within the DLIL input thread context.
3791 if (inp
->tstats
.packets
!= 0) {
3792 inp
->pstats
.ifi_poll_packets
+= inp
->tstats
.packets
;
3793 inp
->tstats
.packets
= 0;
3795 if (inp
->tstats
.bytes
!= 0) {
3796 inp
->pstats
.ifi_poll_bytes
+= inp
->tstats
.bytes
;
3797 inp
->tstats
.bytes
= 0;
3801 __private_extern__
void
3802 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
3804 return (dlil_input_packet_list_common(ifp
, m
, 0,
3805 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
));
3808 __private_extern__
void
3809 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
3810 u_int32_t cnt
, ifnet_model_t mode
)
3812 return (dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
));
3816 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
3817 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
3820 protocol_family_t protocol_family
;
3822 ifnet_t ifp
= ifp_param
;
3823 char * frame_header
;
3824 struct if_proto
* last_ifproto
= NULL
;
3825 mbuf_t pkt_first
= NULL
;
3826 mbuf_t
* pkt_next
= NULL
;
3827 u_int32_t poll_thresh
= 0, poll_ival
= 0;
3829 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
3831 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
3832 (poll_ival
= if_rxpoll_interval_pkts
) > 0)
3836 struct if_proto
*ifproto
= NULL
;
3838 uint32_t pktf_mask
; /* pkt flags to preserve */
3840 if (ifp_param
== NULL
)
3841 ifp
= m
->m_pkthdr
.rcvif
;
3843 if ((ifp
->if_eflags
& IFEF_RXPOLL
) && poll_thresh
!= 0 &&
3844 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0)
3847 /* Check if this mbuf looks valid */
3848 MBUF_INPUT_CHECK(m
, ifp
);
3850 next_packet
= m
->m_nextpkt
;
3851 m
->m_nextpkt
= NULL
;
3852 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3853 m
->m_pkthdr
.pkt_hdr
= NULL
;
3856 * Get an IO reference count if the interface is not
3857 * loopback (lo0) and it is attached; lo0 never goes
3858 * away, so optimize for that.
3860 if (ifp
!= lo_ifp
) {
3861 if (!ifnet_is_attached(ifp
, 1)) {
3867 * Preserve the time stamp if it was set.
3869 pktf_mask
= PKTF_TS_VALID
;
3872 * If this arrived on lo0, preserve interface addr
3873 * info to allow for connectivity between loopback
3874 * and local interface addresses.
3876 pktf_mask
= (PKTF_LOOP
|PKTF_IFAINFO
);
3879 /* make sure packet comes in clean */
3880 m_classifier_init(m
, pktf_mask
);
3882 ifp_inc_traffic_class_in(ifp
, m
);
3884 /* find which protocol family this packet is for */
3885 ifnet_lock_shared(ifp
);
3886 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
3888 ifnet_lock_done(ifp
);
3890 if (error
== EJUSTRETURN
)
3892 protocol_family
= 0;
3895 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
3896 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
))
3897 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
3901 * For partial checksum offload, we expect the driver to
3902 * set the start offset indicating the start of the span
3903 * that is covered by the hardware-computed checksum;
3904 * adjust this start offset accordingly because the data
3905 * pointer has been advanced beyond the link-layer header.
3907 * Don't adjust if the interface is a bridge member, as
3908 * the adjustment will occur from the context of the
3909 * bridge interface during input.
3911 if (ifp
->if_bridge
== NULL
&& (m
->m_pkthdr
.csum_flags
&
3912 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
3913 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
3916 if (frame_header
== NULL
||
3917 frame_header
< (char *)mbuf_datastart(m
) ||
3918 frame_header
> (char *)m
->m_data
||
3919 (adj
= (m
->m_data
- frame_header
)) >
3920 m
->m_pkthdr
.csum_rx_start
) {
3921 m
->m_pkthdr
.csum_data
= 0;
3922 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
3923 hwcksum_in_invalidated
++;
3925 m
->m_pkthdr
.csum_rx_start
-= adj
;
3929 pktap_input(ifp
, protocol_family
, m
, frame_header
);
3931 if (m
->m_flags
& (M_BCAST
|M_MCAST
))
3932 atomic_add_64(&ifp
->if_imcasts
, 1);
3934 /* run interface filters, exclude VLAN packets PR-3586856 */
3935 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
3936 error
= dlil_interface_filters_input(ifp
, &m
,
3937 &frame_header
, protocol_family
);
3939 if (error
!= EJUSTRETURN
)
3944 if (error
!= 0 || ((m
->m_flags
& M_PROMISC
) != 0)) {
3949 /* Lookup the protocol attachment to this interface */
3950 if (protocol_family
== 0) {
3952 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
3953 (last_ifproto
->protocol_family
== protocol_family
)) {
3954 VERIFY(ifproto
== NULL
);
3955 ifproto
= last_ifproto
;
3956 if_proto_ref(last_ifproto
);
3958 VERIFY(ifproto
== NULL
);
3959 ifnet_lock_shared(ifp
);
3960 /* callee holds a proto refcnt upon success */
3961 ifproto
= find_attached_proto(ifp
, protocol_family
);
3962 ifnet_lock_done(ifp
);
3964 if (ifproto
== NULL
) {
3965 /* no protocol for this packet, discard */
3969 if (ifproto
!= last_ifproto
) {
3970 if (last_ifproto
!= NULL
) {
3971 /* pass up the list for the previous protocol */
3972 dlil_ifproto_input(last_ifproto
, pkt_first
);
3974 if_proto_free(last_ifproto
);
3976 last_ifproto
= ifproto
;
3977 if_proto_ref(ifproto
);
3979 /* extend the list */
3980 m
->m_pkthdr
.pkt_hdr
= frame_header
;
3981 if (pkt_first
== NULL
) {
3986 pkt_next
= &m
->m_nextpkt
;
3989 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
3990 /* pass up the last list of packets */
3991 dlil_ifproto_input(last_ifproto
, pkt_first
);
3992 if_proto_free(last_ifproto
);
3993 last_ifproto
= NULL
;
3995 if (ifproto
!= NULL
) {
3996 if_proto_free(ifproto
);
4002 /* update the driver's multicast filter, if needed */
4003 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
4004 ifp
->if_updatemcasts
= 0;
4006 ifnet_decr_iorefcnt(ifp
);
4009 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4013 if_mcasts_update(struct ifnet
*ifp
)
4017 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
4018 if (err
== EAFNOSUPPORT
)
4020 printf("%s: %s %d suspended link-layer multicast membership(s) "
4021 "(err=%d)\n", if_name(ifp
),
4022 (err
== 0 ? "successfully restored" : "failed to restore"),
4023 ifp
->if_updatemcasts
, err
);
4025 /* just return success */
4029 /* If ifp is set, we will increment the generation for the interface */
4031 dlil_post_complete_msg(struct ifnet
*ifp
, struct kev_msg
*event
)
4034 ifnet_increment_generation(ifp
);
4038 necp_update_all_clients();
4041 return (kev_post_msg(event
));
4044 __private_extern__
void
4045 dlil_post_sifflags_msg(struct ifnet
* ifp
)
4047 struct kev_msg ev_msg
;
4048 struct net_event_data ev_data
;
4050 bzero(&ev_data
, sizeof (ev_data
));
4051 bzero(&ev_msg
, sizeof (ev_msg
));
4052 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
4053 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
4054 ev_msg
.kev_subclass
= KEV_DL_SUBCLASS
;
4055 ev_msg
.event_code
= KEV_DL_SIFFLAGS
;
4056 strlcpy(&ev_data
.if_name
[0], ifp
->if_name
, IFNAMSIZ
);
4057 ev_data
.if_family
= ifp
->if_family
;
4058 ev_data
.if_unit
= (u_int32_t
) ifp
->if_unit
;
4059 ev_msg
.dv
[0].data_length
= sizeof(struct net_event_data
);
4060 ev_msg
.dv
[0].data_ptr
= &ev_data
;
4061 ev_msg
.dv
[1].data_length
= 0;
4062 dlil_post_complete_msg(ifp
, &ev_msg
);
4065 #define TMP_IF_PROTO_ARR_SIZE 10
4067 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
, bool update_generation
)
4069 struct ifnet_filter
*filter
= NULL
;
4070 struct if_proto
*proto
= NULL
;
4071 int if_proto_count
= 0;
4072 struct if_proto
**tmp_ifproto_arr
= NULL
;
4073 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
4074 int tmp_ifproto_arr_idx
= 0;
4075 bool tmp_malloc
= false;
4078 * Pass the event to the interface filters
4080 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4081 /* prevent filter list from changing in case we drop the lock */
4082 if_flt_monitor_busy(ifp
);
4083 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4084 if (filter
->filt_event
!= NULL
) {
4085 lck_mtx_unlock(&ifp
->if_flt_lock
);
4087 filter
->filt_event(filter
->filt_cookie
, ifp
,
4088 filter
->filt_protocol
, event
);
4090 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4093 /* we're done with the filter list */
4094 if_flt_monitor_unbusy(ifp
);
4095 lck_mtx_unlock(&ifp
->if_flt_lock
);
4097 /* Get an io ref count if the interface is attached */
4098 if (!ifnet_is_attached(ifp
, 1))
4102 * An embedded tmp_list_entry in if_proto may still get
4103 * over-written by another thread after giving up ifnet lock,
4104 * therefore we are avoiding embedded pointers here.
4106 ifnet_lock_shared(ifp
);
4107 if_proto_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
4108 if (if_proto_count
) {
4110 VERIFY(ifp
->if_proto_hash
!= NULL
);
4111 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
4112 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
4114 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
4115 sizeof (*tmp_ifproto_arr
) * if_proto_count
,
4117 if (tmp_ifproto_arr
== NULL
) {
4118 ifnet_lock_done(ifp
);
4124 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
4125 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
4127 if_proto_ref(proto
);
4128 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
4129 tmp_ifproto_arr_idx
++;
4132 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
4134 ifnet_lock_done(ifp
);
4136 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
4137 tmp_ifproto_arr_idx
++) {
4138 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
4139 VERIFY(proto
!= NULL
);
4140 proto_media_event eventp
=
4141 (proto
->proto_kpi
== kProtoKPI_v1
?
4142 proto
->kpi
.v1
.event
:
4143 proto
->kpi
.v2
.event
);
4145 if (eventp
!= NULL
) {
4146 eventp(ifp
, proto
->protocol_family
,
4149 if_proto_free(proto
);
4154 FREE(tmp_ifproto_arr
, M_TEMP
);
4157 /* Pass the event to the interface */
4158 if (ifp
->if_event
!= NULL
)
4159 ifp
->if_event(ifp
, event
);
4161 /* Release the io ref count */
4162 ifnet_decr_iorefcnt(ifp
);
4164 return (dlil_post_complete_msg(update_generation
? ifp
: NULL
, event
));
4168 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
4170 struct kev_msg kev_msg
;
4173 if (ifp
== NULL
|| event
== NULL
)
4176 bzero(&kev_msg
, sizeof (kev_msg
));
4177 kev_msg
.vendor_code
= event
->vendor_code
;
4178 kev_msg
.kev_class
= event
->kev_class
;
4179 kev_msg
.kev_subclass
= event
->kev_subclass
;
4180 kev_msg
.event_code
= event
->event_code
;
4181 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
4182 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
4183 kev_msg
.dv
[1].data_length
= 0;
4185 result
= dlil_event_internal(ifp
, &kev_msg
, TRUE
);
4191 #include <netinet/ip6.h>
4192 #include <netinet/ip.h>
4194 dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
4198 struct ip6_hdr
*ip6
;
4199 int type
= SOCK_RAW
;
4204 m
= m_pullup(*mp
, sizeof(struct ip
));
4208 ip
= mtod(m
, struct ip
*);
4209 if (ip
->ip_p
== IPPROTO_TCP
)
4211 else if (ip
->ip_p
== IPPROTO_UDP
)
4215 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
4219 ip6
= mtod(m
, struct ip6_hdr
*);
4220 if (ip6
->ip6_nxt
== IPPROTO_TCP
)
4222 else if (ip6
->ip6_nxt
== IPPROTO_UDP
)
4233 dlil_count_chain_len(mbuf_t m
, struct chain_len_stats
*cls
)
4246 atomic_add_64(&cls
->cls_one
, 1);
4249 atomic_add_64(&cls
->cls_two
, 1);
4252 atomic_add_64(&cls
->cls_three
, 1);
4255 atomic_add_64(&cls
->cls_four
, 1);
4259 atomic_add_64(&cls
->cls_five_or_more
, 1);
4267 * Caller should have a lock on the protocol domain if the protocol
4268 * doesn't support finer grained locking. In most cases, the lock
4269 * will be held from the socket layer and won't be released until
4270 * we return back to the socket layer.
4272 * This does mean that we must take a protocol lock before we take
4273 * an interface lock if we're going to take both. This makes sense
4274 * because a protocol is likely to interact with an ifp while it
4275 * is under the protocol lock.
4277 * An advisory code will be returned if adv is not null. This
4278 * can be used to provide feedback about interface queues to the
4282 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
4283 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
4285 char *frame_type
= NULL
;
4286 char *dst_linkaddr
= NULL
;
4288 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
4289 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
4290 struct if_proto
*proto
= NULL
;
4292 mbuf_t send_head
= NULL
;
4293 mbuf_t
*send_tail
= &send_head
;
4295 u_int32_t pre
= 0, post
= 0;
4296 u_int32_t fpkts
= 0, fbytes
= 0;
4298 struct timespec now
;
4301 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4304 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4305 * from happening while this operation is in progress
4307 if (!ifnet_is_attached(ifp
, 1)) {
4313 VERIFY(ifp
->if_output_dlil
!= NULL
);
4315 /* update the driver's multicast filter, if needed */
4316 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
4317 ifp
->if_updatemcasts
= 0;
4319 frame_type
= frame_type_buffer
;
4320 dst_linkaddr
= dst_linkaddr_buffer
;
4323 ifnet_lock_shared(ifp
);
4324 /* callee holds a proto refcnt upon success */
4325 proto
= find_attached_proto(ifp
, proto_family
);
4326 if (proto
== NULL
) {
4327 ifnet_lock_done(ifp
);
4331 ifnet_lock_done(ifp
);
4335 if (packetlist
== NULL
)
4339 packetlist
= packetlist
->m_nextpkt
;
4340 m
->m_nextpkt
= NULL
;
4343 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4344 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
4346 if (preoutp
!= NULL
) {
4347 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
4348 frame_type
, dst_linkaddr
);
4351 if (retval
== EJUSTRETURN
)
4360 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
4361 dlil_get_socket_type(&m
, proto_family
, raw
));
4370 if (!raw
&& proto_family
== PF_INET
) {
4371 struct ip
*ip
= mtod(m
, struct ip
*);
4372 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4373 struct ip
*, ip
, struct ifnet
*, ifp
,
4374 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
4376 } else if (!raw
&& proto_family
== PF_INET6
) {
4377 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
4378 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4379 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
4380 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
4382 #endif /* CONFIG_DTRACE */
4384 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
4388 * If this is a broadcast packet that needs to be
4389 * looped back into the system, set the inbound ifp
4390 * to that of the outbound ifp. This will allow
4391 * us to determine that it is a legitimate packet
4392 * for the system. Only set the ifp if it's not
4393 * already set, just to be safe.
4395 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
4396 m
->m_pkthdr
.rcvif
== NULL
) {
4397 m
->m_pkthdr
.rcvif
= ifp
;
4401 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
4402 frame_type
, &pre
, &post
);
4404 if (retval
!= EJUSTRETURN
)
4410 * For partial checksum offload, adjust the start
4411 * and stuff offsets based on the prepended header.
4413 if ((m
->m_pkthdr
.csum_flags
&
4414 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4415 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4416 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
4417 m
->m_pkthdr
.csum_tx_start
+= pre
;
4420 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
))
4421 dlil_output_cksum_dbg(ifp
, m
, pre
,
4425 * Clear the ifp if it was set above, and to be
4426 * safe, only if it is still the same as the
4427 * outbound ifp we have in context. If it was
4428 * looped back, then a copy of it was sent to the
4429 * loopback interface with the rcvif set, and we
4430 * are clearing the one that will go down to the
4433 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
)
4434 m
->m_pkthdr
.rcvif
= NULL
;
4438 * Let interface filters (if any) do their thing ...
4440 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4441 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
4442 retval
= dlil_interface_filters_output(ifp
,
4445 if (retval
!= EJUSTRETURN
)
4451 * Strip away M_PROTO1 bit prior to sending packet
4452 * to the driver as this field may be used by the driver
4454 m
->m_flags
&= ~M_PROTO1
;
4457 * If the underlying interface is not capable of handling a
4458 * packet whose data portion spans across physically disjoint
4459 * pages, we need to "normalize" the packet so that we pass
4460 * down a chain of mbufs where each mbuf points to a span that
4461 * resides in the system page boundary. If the packet does
4462 * not cross page(s), the following is a no-op.
4464 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
4465 if ((m
= m_normalize(m
)) == NULL
)
4470 * If this is a TSO packet, make sure the interface still
4471 * advertise TSO capability.
4473 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
4479 ifp_inc_traffic_class_out(ifp
, m
);
4480 pktap_output(ifp
, proto_family
, m
, pre
, post
);
4483 * Count the number of elements in the mbuf chain
4485 if (tx_chain_len_count
) {
4486 dlil_count_chain_len(m
, &tx_chain_len_stats
);
4490 * Record timestamp; ifnet_enqueue() will use this info
4491 * rather than redoing the work. An optimization could
4492 * involve doing this just once at the top, if there are
4493 * no interface filters attached, but that's probably
4497 net_timernsec(&now
, &now_nsec
);
4498 (void) mbuf_set_timestamp(m
, now_nsec
, TRUE
);
4501 * Discard partial sum information if this packet originated
4502 * from another interface; the packet would already have the
4503 * final checksum and we shouldn't recompute it.
4505 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) &&
4506 (m
->m_pkthdr
.csum_flags
& (CSUM_DATA_VALID
|CSUM_PARTIAL
)) ==
4507 (CSUM_DATA_VALID
|CSUM_PARTIAL
)) {
4508 m
->m_pkthdr
.csum_flags
&= ~CSUM_TX_FLAGS
;
4509 m
->m_pkthdr
.csum_data
= 0;
4513 * Finally, call the driver.
4515 if (ifp
->if_eflags
& (IFEF_SENDLIST
| IFEF_ENQUEUE_MULTI
)) {
4516 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4517 flen
+= (m_pktlen(m
) - (pre
+ post
));
4518 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4521 send_tail
= &m
->m_nextpkt
;
4523 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4524 flen
= (m_pktlen(m
) - (pre
+ post
));
4525 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4529 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4531 retval
= (*ifp
->if_output_dlil
)(ifp
, m
);
4532 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4533 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
4534 adv
->code
= (retval
== EQFULL
?
4535 FADV_FLOW_CONTROLLED
:
4540 if (retval
== 0 && flen
> 0) {
4544 if (retval
!= 0 && dlil_verbose
) {
4545 printf("%s: output error on %s retval = %d\n",
4546 __func__
, if_name(ifp
),
4549 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
4552 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4557 packetlist
= packetlist
->m_nextpkt
;
4558 m
->m_nextpkt
= NULL
;
4560 } while (m
!= NULL
);
4562 if (send_head
!= NULL
) {
4563 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4565 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
4566 retval
= (*ifp
->if_output_dlil
)(ifp
, send_head
);
4567 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4569 adv
->code
= (retval
== EQFULL
?
4570 FADV_FLOW_CONTROLLED
:
4575 if (retval
== 0 && flen
> 0) {
4579 if (retval
!= 0 && dlil_verbose
) {
4580 printf("%s: output error on %s retval = %d\n",
4581 __func__
, if_name(ifp
), retval
);
4584 struct mbuf
*send_m
;
4586 VERIFY(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
);
4587 while (send_head
!= NULL
) {
4589 send_head
= send_m
->m_nextpkt
;
4590 send_m
->m_nextpkt
= NULL
;
4591 retval
= (*ifp
->if_output_dlil
)(ifp
, send_m
);
4592 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4594 adv
->code
= (retval
== EQFULL
?
4595 FADV_FLOW_CONTROLLED
:
4605 if (retval
!= 0 && dlil_verbose
) {
4606 printf("%s: output error on %s "
4608 __func__
, if_name(ifp
), retval
);
4616 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4619 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4623 ifp
->if_fbytes
+= fbytes
;
4625 ifp
->if_fpackets
+= fpkts
;
4627 if_proto_free(proto
);
4628 if (packetlist
) /* if any packets are left, clean up */
4629 mbuf_freem_list(packetlist
);
4630 if (retval
== EJUSTRETURN
)
4633 ifnet_decr_iorefcnt(ifp
);
4639 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
4642 struct ifnet_filter
*filter
;
4643 int retval
= EOPNOTSUPP
;
4646 if (ifp
== NULL
|| ioctl_code
== 0)
4649 /* Get an io ref count if the interface is attached */
4650 if (!ifnet_is_attached(ifp
, 1))
4651 return (EOPNOTSUPP
);
4654 * Run the interface filters first.
4655 * We want to run all filters before calling the protocol,
4656 * interface family, or interface.
4658 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4659 /* prevent filter list from changing in case we drop the lock */
4660 if_flt_monitor_busy(ifp
);
4661 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4662 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
4663 filter
->filt_protocol
== proto_fam
)) {
4664 lck_mtx_unlock(&ifp
->if_flt_lock
);
4666 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
4667 proto_fam
, ioctl_code
, ioctl_arg
);
4669 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4671 /* Only update retval if no one has handled the ioctl */
4672 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4673 if (result
== ENOTSUP
)
4674 result
= EOPNOTSUPP
;
4676 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
4677 /* we're done with the filter list */
4678 if_flt_monitor_unbusy(ifp
);
4679 lck_mtx_unlock(&ifp
->if_flt_lock
);
4685 /* we're done with the filter list */
4686 if_flt_monitor_unbusy(ifp
);
4687 lck_mtx_unlock(&ifp
->if_flt_lock
);
4689 /* Allow the protocol to handle the ioctl */
4690 if (proto_fam
!= 0) {
4691 struct if_proto
*proto
;
4693 /* callee holds a proto refcnt upon success */
4694 ifnet_lock_shared(ifp
);
4695 proto
= find_attached_proto(ifp
, proto_fam
);
4696 ifnet_lock_done(ifp
);
4697 if (proto
!= NULL
) {
4698 proto_media_ioctl ioctlp
=
4699 (proto
->proto_kpi
== kProtoKPI_v1
?
4700 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
4701 result
= EOPNOTSUPP
;
4703 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
4705 if_proto_free(proto
);
4707 /* Only update retval if no one has handled the ioctl */
4708 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4709 if (result
== ENOTSUP
)
4710 result
= EOPNOTSUPP
;
4712 if (retval
&& retval
!= EOPNOTSUPP
)
4718 /* retval is either 0 or EOPNOTSUPP */
4721 * Let the interface handle this ioctl.
4722 * If it returns EOPNOTSUPP, ignore that, we may have
4723 * already handled this in the protocol or family.
4726 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
4728 /* Only update retval if no one has handled the ioctl */
4729 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4730 if (result
== ENOTSUP
)
4731 result
= EOPNOTSUPP
;
4733 if (retval
&& retval
!= EOPNOTSUPP
) {
4739 if (retval
== EJUSTRETURN
)
4742 ifnet_decr_iorefcnt(ifp
);
4747 __private_extern__ errno_t
4748 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
4753 if (ifp
->if_set_bpf_tap
) {
4754 /* Get an io reference on the interface if it is attached */
4755 if (!ifnet_is_attached(ifp
, 1))
4757 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
4758 ifnet_decr_iorefcnt(ifp
);
4764 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
4765 struct sockaddr
*ll_addr
, size_t ll_len
)
4767 errno_t result
= EOPNOTSUPP
;
4768 struct if_proto
*proto
;
4769 const struct sockaddr
*verify
;
4770 proto_media_resolve_multi resolvep
;
4772 if (!ifnet_is_attached(ifp
, 1))
4775 bzero(ll_addr
, ll_len
);
4777 /* Call the protocol first; callee holds a proto refcnt upon success */
4778 ifnet_lock_shared(ifp
);
4779 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
4780 ifnet_lock_done(ifp
);
4781 if (proto
!= NULL
) {
4782 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
4783 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
4784 if (resolvep
!= NULL
)
4785 result
= resolvep(ifp
, proto_addr
,
4786 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
4787 if_proto_free(proto
);
4790 /* Let the interface verify the multicast address */
4791 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
4795 verify
= proto_addr
;
4796 result
= ifp
->if_check_multi(ifp
, verify
);
4799 ifnet_decr_iorefcnt(ifp
);
4803 __private_extern__ errno_t
4804 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
4805 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
4806 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
4808 struct if_proto
*proto
;
4811 /* callee holds a proto refcnt upon success */
4812 ifnet_lock_shared(ifp
);
4813 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
4814 ifnet_lock_done(ifp
);
4815 if (proto
== NULL
) {
4818 proto_media_send_arp arpp
;
4819 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4820 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
4826 arpstat
.txrequests
++;
4827 if (target_hw
!= NULL
)
4828 arpstat
.txurequests
++;
4831 arpstat
.txreplies
++;
4834 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
4835 target_hw
, target_proto
);
4837 if_proto_free(proto
);
4843 struct net_thread_marks
{ };
4844 static const struct net_thread_marks net_thread_marks_base
= { };
4846 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
4847 &net_thread_marks_base
;
4849 __private_extern__ net_thread_marks_t
4850 net_thread_marks_push(u_int32_t push
)
4852 static const char *const base
= (const void*)&net_thread_marks_base
;
4856 struct uthread
*uth
= get_bsdthread_info(current_thread());
4858 pop
= push
& ~uth
->uu_network_marks
;
4860 uth
->uu_network_marks
|= pop
;
4863 return ((net_thread_marks_t
)&base
[pop
]);
4866 __private_extern__ net_thread_marks_t
4867 net_thread_unmarks_push(u_int32_t unpush
)
4869 static const char *const base
= (const void*)&net_thread_marks_base
;
4870 u_int32_t unpop
= 0;
4873 struct uthread
*uth
= get_bsdthread_info(current_thread());
4875 unpop
= unpush
& uth
->uu_network_marks
;
4877 uth
->uu_network_marks
&= ~unpop
;
4880 return ((net_thread_marks_t
)&base
[unpop
]);
4883 __private_extern__
void
4884 net_thread_marks_pop(net_thread_marks_t popx
)
4886 static const char *const base
= (const void*)&net_thread_marks_base
;
4887 const ptrdiff_t pop
= (const char *)popx
- (const char *)base
;
4890 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4891 struct uthread
*uth
= get_bsdthread_info(current_thread());
4893 VERIFY((pop
& ones
) == pop
);
4894 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
4895 uth
->uu_network_marks
&= ~pop
;
4899 __private_extern__
void
4900 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
4902 static const char *const base
= (const void*)&net_thread_marks_base
;
4903 ptrdiff_t unpop
= (const char *)unpopx
- (const char *)base
;
4906 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4907 struct uthread
*uth
= get_bsdthread_info(current_thread());
4909 VERIFY((unpop
& ones
) == unpop
);
4910 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
4911 uth
->uu_network_marks
|= unpop
;
4915 __private_extern__ u_int32_t
4916 net_thread_is_marked(u_int32_t check
)
4919 struct uthread
*uth
= get_bsdthread_info(current_thread());
4920 return (uth
->uu_network_marks
& check
);
4926 __private_extern__ u_int32_t
4927 net_thread_is_unmarked(u_int32_t check
)
4930 struct uthread
*uth
= get_bsdthread_info(current_thread());
4931 return (~uth
->uu_network_marks
& check
);
4937 static __inline__
int
4938 _is_announcement(const struct sockaddr_in
* sender_sin
,
4939 const struct sockaddr_in
* target_sin
)
4941 if (sender_sin
== NULL
) {
4944 return (sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
);
4947 __private_extern__ errno_t
4948 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
*sender_hw
,
4949 const struct sockaddr
*sender_proto
, const struct sockaddr_dl
*target_hw
,
4950 const struct sockaddr
*target_proto0
, u_int32_t rtflags
)
4953 const struct sockaddr_in
* sender_sin
;
4954 const struct sockaddr_in
* target_sin
;
4955 struct sockaddr_inarp target_proto_sinarp
;
4956 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
4958 if (target_proto
== NULL
|| (sender_proto
!= NULL
&&
4959 sender_proto
->sa_family
!= target_proto
->sa_family
))
4963 * If the target is a (default) router, provide that
4964 * information to the send_arp callback routine.
4966 if (rtflags
& RTF_ROUTER
) {
4967 bcopy(target_proto
, &target_proto_sinarp
,
4968 sizeof (struct sockaddr_in
));
4969 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
4970 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
4974 * If this is an ARP request and the target IP is IPv4LL,
4975 * send the request on all interfaces. The exception is
4976 * an announcement, which must only appear on the specific
4979 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
4980 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
4981 if (target_proto
->sa_family
== AF_INET
&&
4982 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
4983 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
4984 !_is_announcement(target_sin
, sender_sin
)) {
4991 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
4992 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
4994 ifaddr_t source_hw
= NULL
;
4995 ifaddr_t source_ip
= NULL
;
4996 struct sockaddr_in source_ip_copy
;
4997 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
5000 * Only arp on interfaces marked for IPv4LL
5001 * ARPing. This may mean that we don't ARP on
5002 * the interface the subnet route points to.
5004 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
))
5007 /* Find the source IP address */
5008 ifnet_lock_shared(cur_ifp
);
5009 source_hw
= cur_ifp
->if_lladdr
;
5010 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
5012 IFA_LOCK(source_ip
);
5013 if (source_ip
->ifa_addr
!= NULL
&&
5014 source_ip
->ifa_addr
->sa_family
==
5016 /* Copy the source IP address */
5018 *(struct sockaddr_in
*)
5019 (void *)source_ip
->ifa_addr
;
5020 IFA_UNLOCK(source_ip
);
5023 IFA_UNLOCK(source_ip
);
5026 /* No IP Source, don't arp */
5027 if (source_ip
== NULL
) {
5028 ifnet_lock_done(cur_ifp
);
5032 IFA_ADDREF(source_hw
);
5033 ifnet_lock_done(cur_ifp
);
5036 new_result
= dlil_send_arp_internal(cur_ifp
,
5037 arpop
, (struct sockaddr_dl
*)(void *)
5038 source_hw
->ifa_addr
,
5039 (struct sockaddr
*)&source_ip_copy
, NULL
,
5042 IFA_REMREF(source_hw
);
5043 if (result
== ENOTSUP
) {
5044 result
= new_result
;
5047 ifnet_list_free(ifp_list
);
5050 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
5051 sender_proto
, target_hw
, target_proto
);
5058 * Caller must hold ifnet head lock.
5061 ifnet_lookup(struct ifnet
*ifp
)
5065 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
5066 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
5070 return (_ifp
!= NULL
);
5074 * Caller has to pass a non-zero refio argument to get a
5075 * IO reference count. This will prevent ifnet_detach from
5076 * being called when there are outstanding io reference counts.
5079 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
5083 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5084 if ((ret
= IF_FULLY_ATTACHED(ifp
))) {
5088 lck_mtx_unlock(&ifp
->if_ref_lock
);
5094 * Caller must ensure the interface is attached; the assumption is that
5095 * there is at least an outstanding IO reference count held already.
5096 * Most callers would call ifnet_is_attached() instead.
5099 ifnet_incr_iorefcnt(struct ifnet
*ifp
)
5101 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5102 VERIFY(IF_FULLY_ATTACHED(ifp
));
5103 VERIFY(ifp
->if_refio
> 0);
5105 lck_mtx_unlock(&ifp
->if_ref_lock
);
5109 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
5111 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5112 VERIFY(ifp
->if_refio
> 0);
5113 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
5117 * if there are no more outstanding io references, wakeup the
5118 * ifnet_detach thread if detaching flag is set.
5120 if (ifp
->if_refio
== 0 && (ifp
->if_refflags
& IFRF_DETACHING
))
5121 wakeup(&(ifp
->if_refio
));
5123 lck_mtx_unlock(&ifp
->if_ref_lock
);
5127 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
5129 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
5134 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
5135 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
5140 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
5141 tr
= dl_if_dbg
->dldbg_if_refhold
;
5143 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
5144 tr
= dl_if_dbg
->dldbg_if_refrele
;
5147 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
5148 ctrace_record(&tr
[idx
]);
5152 dlil_if_ref(struct ifnet
*ifp
)
5154 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5159 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
5160 ++dl_if
->dl_if_refcnt
;
5161 if (dl_if
->dl_if_refcnt
== 0) {
5162 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
5165 if (dl_if
->dl_if_trace
!= NULL
)
5166 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
5167 lck_mtx_unlock(&dl_if
->dl_if_lock
);
5173 dlil_if_free(struct ifnet
*ifp
)
5175 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5176 bool need_release
= FALSE
;
5181 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
5182 switch (dl_if
->dl_if_refcnt
) {
5184 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
5188 if ((ifp
->if_refflags
& IFRF_EMBRYONIC
) != 0) {
5189 need_release
= TRUE
;
5195 --dl_if
->dl_if_refcnt
;
5196 if (dl_if
->dl_if_trace
!= NULL
)
5197 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
5198 lck_mtx_unlock(&dl_if
->dl_if_lock
);
5200 dlil_if_release(ifp
);
5206 dlil_attach_protocol_internal(struct if_proto
*proto
,
5207 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
,
5208 uint32_t * proto_count
)
5210 struct kev_dl_proto_data ev_pr_data
;
5211 struct ifnet
*ifp
= proto
->ifp
;
5213 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
5214 struct if_proto
*prev_proto
;
5215 struct if_proto
*_proto
;
5217 /* callee holds a proto refcnt upon success */
5218 ifnet_lock_exclusive(ifp
);
5219 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
5220 if (_proto
!= NULL
) {
5221 ifnet_lock_done(ifp
);
5222 if_proto_free(_proto
);
5227 * Call family module add_proto routine so it can refine the
5228 * demux descriptors as it wishes.
5230 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
5233 ifnet_lock_done(ifp
);
5238 * Insert the protocol in the hash
5240 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
5241 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
)
5242 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
5244 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
5246 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
5249 /* hold a proto refcnt for attach */
5250 if_proto_ref(proto
);
5253 * The reserved field carries the number of protocol still attached
5254 * (subject to change)
5256 ev_pr_data
.proto_family
= proto
->protocol_family
;
5257 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
5259 ifnet_lock_done(ifp
);
5261 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
5262 (struct net_event_data
*)&ev_pr_data
,
5263 sizeof (struct kev_dl_proto_data
));
5264 if (proto_count
!= NULL
) {
5265 *proto_count
= ev_pr_data
.proto_remaining_count
;
5271 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
5272 const struct ifnet_attach_proto_param
*proto_details
)
5275 struct if_proto
*ifproto
= NULL
;
5276 uint32_t proto_count
= 0;
5278 ifnet_head_lock_shared();
5279 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5283 /* Check that the interface is in the global list */
5284 if (!ifnet_lookup(ifp
)) {
5289 ifproto
= zalloc(dlif_proto_zone
);
5290 if (ifproto
== NULL
) {
5294 bzero(ifproto
, dlif_proto_size
);
5296 /* refcnt held above during lookup */
5298 ifproto
->protocol_family
= protocol
;
5299 ifproto
->proto_kpi
= kProtoKPI_v1
;
5300 ifproto
->kpi
.v1
.input
= proto_details
->input
;
5301 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
5302 ifproto
->kpi
.v1
.event
= proto_details
->event
;
5303 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
5304 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
5305 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
5306 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
5308 retval
= dlil_attach_protocol_internal(ifproto
,
5309 proto_details
->demux_list
, proto_details
->demux_count
,
5313 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5314 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5315 if_name(ifp
), protocol
, retval
);
5318 printf("%s: attached v1 protocol %d (count = %d)\n",
5320 protocol
, proto_count
);
5326 * A protocol has been attached, mark the interface up.
5327 * This used to be done by configd.KernelEventMonitor, but that
5328 * is inherently prone to races (rdar://problem/30810208).
5330 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
5331 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
5332 dlil_post_sifflags_msg(ifp
);
5333 } else if (ifproto
!= NULL
) {
5334 zfree(dlif_proto_zone
, ifproto
);
5340 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
5341 const struct ifnet_attach_proto_param_v2
*proto_details
)
5344 struct if_proto
*ifproto
= NULL
;
5345 uint32_t proto_count
= 0;
5347 ifnet_head_lock_shared();
5348 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5352 /* Check that the interface is in the global list */
5353 if (!ifnet_lookup(ifp
)) {
5358 ifproto
= zalloc(dlif_proto_zone
);
5359 if (ifproto
== NULL
) {
5363 bzero(ifproto
, sizeof(*ifproto
));
5365 /* refcnt held above during lookup */
5367 ifproto
->protocol_family
= protocol
;
5368 ifproto
->proto_kpi
= kProtoKPI_v2
;
5369 ifproto
->kpi
.v2
.input
= proto_details
->input
;
5370 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
5371 ifproto
->kpi
.v2
.event
= proto_details
->event
;
5372 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
5373 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
5374 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
5375 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
5377 retval
= dlil_attach_protocol_internal(ifproto
,
5378 proto_details
->demux_list
, proto_details
->demux_count
,
5382 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5383 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5384 if_name(ifp
), protocol
, retval
);
5387 printf("%s: attached v2 protocol %d (count = %d)\n",
5389 protocol
, proto_count
);
5395 * A protocol has been attached, mark the interface up.
5396 * This used to be done by configd.KernelEventMonitor, but that
5397 * is inherently prone to races (rdar://problem/30810208).
5399 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
5400 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
5401 dlil_post_sifflags_msg(ifp
);
5402 } else if (ifproto
!= NULL
) {
5403 zfree(dlif_proto_zone
, ifproto
);
5409 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
5411 struct if_proto
*proto
= NULL
;
5414 if (ifp
== NULL
|| proto_family
== 0) {
5419 ifnet_lock_exclusive(ifp
);
5420 /* callee holds a proto refcnt upon success */
5421 proto
= find_attached_proto(ifp
, proto_family
);
5422 if (proto
== NULL
) {
5424 ifnet_lock_done(ifp
);
5428 /* call family module del_proto */
5429 if (ifp
->if_del_proto
)
5430 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
5432 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
5433 proto
, if_proto
, next_hash
);
5435 if (proto
->proto_kpi
== kProtoKPI_v1
) {
5436 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
5437 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
5438 proto
->kpi
.v1
.event
= ifproto_media_event
;
5439 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
5440 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
5441 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
5443 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
5444 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
5445 proto
->kpi
.v2
.event
= ifproto_media_event
;
5446 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
5447 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
5448 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
5450 proto
->detached
= 1;
5451 ifnet_lock_done(ifp
);
5454 printf("%s: detached %s protocol %d\n", if_name(ifp
),
5455 (proto
->proto_kpi
== kProtoKPI_v1
) ?
5456 "v1" : "v2", proto_family
);
5459 /* release proto refcnt held during protocol attach */
5460 if_proto_free(proto
);
5463 * Release proto refcnt held during lookup; the rest of
5464 * protocol detach steps will happen when the last proto
5465 * reference is released.
5467 if_proto_free(proto
);
5475 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
5476 struct mbuf
*packet
, char *header
)
5478 #pragma unused(ifp, protocol, packet, header)
5483 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
5484 struct mbuf
*packet
)
5486 #pragma unused(ifp, protocol, packet)
5492 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
5493 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
5494 char *link_layer_dest
)
5496 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5502 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
5503 const struct kev_msg
*event
)
5505 #pragma unused(ifp, protocol, event)
5509 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
5510 unsigned long command
, void *argument
)
5512 #pragma unused(ifp, protocol, command, argument)
5517 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
5518 struct sockaddr_dl
*out_ll
, size_t ll_len
)
5520 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5525 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
5526 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
5527 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
5529 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5533 extern int if_next_index(void);
5534 extern int tcp_ecn_outbound
;
5537 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
5539 struct ifnet
*tmp_if
;
5541 struct if_data_internal if_data_saved
;
5542 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5543 struct dlil_threading_info
*dl_inp
;
5544 u_int32_t sflags
= 0;
5551 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5552 * prevent the interface from being configured while it is
5553 * embryonic, as ifnet_head_lock is dropped and reacquired
5554 * below prior to marking the ifnet with IFRF_ATTACHED.
5557 ifnet_head_lock_exclusive();
5558 /* Verify we aren't already on the list */
5559 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
5560 if (tmp_if
== ifp
) {
5567 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5568 if (!(ifp
->if_refflags
& IFRF_EMBRYONIC
)) {
5569 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
5573 lck_mtx_unlock(&ifp
->if_ref_lock
);
5575 ifnet_lock_exclusive(ifp
);
5578 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
5579 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
5581 if (ll_addr
!= NULL
) {
5582 if (ifp
->if_addrlen
== 0) {
5583 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
5584 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
5585 ifnet_lock_done(ifp
);
5593 * Allow interfaces without protocol families to attach
5594 * only if they have the necessary fields filled out.
5596 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
5597 DLIL_PRINTF("%s: Attempt to attach interface without "
5598 "family module - %d\n", __func__
, ifp
->if_family
);
5599 ifnet_lock_done(ifp
);
5605 /* Allocate protocol hash table */
5606 VERIFY(ifp
->if_proto_hash
== NULL
);
5607 ifp
->if_proto_hash
= zalloc(dlif_phash_zone
);
5608 if (ifp
->if_proto_hash
== NULL
) {
5609 ifnet_lock_done(ifp
);
5614 bzero(ifp
->if_proto_hash
, dlif_phash_size
);
5616 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5617 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
5618 TAILQ_INIT(&ifp
->if_flt_head
);
5619 VERIFY(ifp
->if_flt_busy
== 0);
5620 VERIFY(ifp
->if_flt_waiters
== 0);
5621 lck_mtx_unlock(&ifp
->if_flt_lock
);
5623 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
5624 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
5625 LIST_INIT(&ifp
->if_multiaddrs
);
5628 VERIFY(ifp
->if_allhostsinm
== NULL
);
5629 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
5630 TAILQ_INIT(&ifp
->if_addrhead
);
5632 if (ifp
->if_index
== 0) {
5633 int idx
= if_next_index();
5637 ifnet_lock_done(ifp
);
5642 ifp
->if_index
= idx
;
5644 /* There should not be anything occupying this slot */
5645 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
5647 /* allocate (if needed) and initialize a link address */
5648 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
5650 ifnet_lock_done(ifp
);
5656 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
5657 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
5659 /* make this address the first on the list */
5661 /* hold a reference for ifnet_addrs[] */
5662 IFA_ADDREF_LOCKED(ifa
);
5663 /* if_attach_link_ifa() holds a reference for ifa_link */
5664 if_attach_link_ifa(ifp
, ifa
);
5668 mac_ifnet_label_associate(ifp
);
5671 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
5672 ifindex2ifnet
[ifp
->if_index
] = ifp
;
5674 /* Hold a reference to the underlying dlil_ifnet */
5675 ifnet_reference(ifp
);
5677 /* Clear stats (save and restore other fields that we care) */
5678 if_data_saved
= ifp
->if_data
;
5679 bzero(&ifp
->if_data
, sizeof (ifp
->if_data
));
5680 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
5681 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
5682 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
5683 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
5684 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
5685 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
5686 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
5687 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
5688 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
5689 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
5690 ifnet_touch_lastchange(ifp
);
5692 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
5693 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
||
5694 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_FQ_CODEL
);
5696 /* By default, use SFB and enable flow advisory */
5697 sflags
= PKTSCHEDF_QALG_SFB
;
5699 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
5701 if (if_delaybased_queue
)
5702 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
5704 if (ifp
->if_output_sched_model
==
5705 IFNET_SCHED_MODEL_DRIVER_MANAGED
)
5706 sflags
|= PKTSCHEDF_QALG_DRIVER_MANAGED
;
5708 /* Initialize transmit queue(s) */
5709 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5711 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5712 "err=%d", __func__
, ifp
, err
);
5716 /* Sanity checks on the input thread storage */
5717 dl_inp
= &dl_if
->dl_if_inpstorage
;
5718 bzero(&dl_inp
->stats
, sizeof (dl_inp
->stats
));
5719 VERIFY(dl_inp
->input_waiting
== 0);
5720 VERIFY(dl_inp
->wtot
== 0);
5721 VERIFY(dl_inp
->ifp
== NULL
);
5722 VERIFY(qhead(&dl_inp
->rcvq_pkts
) == NULL
&& qempty(&dl_inp
->rcvq_pkts
));
5723 VERIFY(qlimit(&dl_inp
->rcvq_pkts
) == 0);
5724 VERIFY(!dl_inp
->net_affinity
);
5725 VERIFY(ifp
->if_inp
== NULL
);
5726 VERIFY(dl_inp
->input_thr
== THREAD_NULL
);
5727 VERIFY(dl_inp
->wloop_thr
== THREAD_NULL
);
5728 VERIFY(dl_inp
->poll_thr
== THREAD_NULL
);
5729 VERIFY(dl_inp
->tag
== 0);
5730 VERIFY(dl_inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
5731 bzero(&dl_inp
->tstats
, sizeof (dl_inp
->tstats
));
5732 bzero(&dl_inp
->pstats
, sizeof (dl_inp
->pstats
));
5733 bzero(&dl_inp
->sstats
, sizeof (dl_inp
->sstats
));
5734 #if IFNET_INPUT_SANITY_CHK
5735 VERIFY(dl_inp
->input_mbuf_cnt
== 0);
5736 #endif /* IFNET_INPUT_SANITY_CHK */
5739 * A specific DLIL input thread is created per Ethernet/cellular
5740 * interface or for an interface which supports opportunistic
5741 * input polling. Pseudo interfaces or other types of interfaces
5742 * use the main input thread instead.
5744 if ((net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) ||
5745 ifp
->if_type
== IFT_ETHER
|| ifp
->if_type
== IFT_CELLULAR
) {
5746 ifp
->if_inp
= dl_inp
;
5747 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
);
5749 panic_plain("%s: ifp=%p couldn't get an input thread; "
5750 "err=%d", __func__
, ifp
, err
);
5755 if (ifp
->if_inp
!= NULL
&& ifp
->if_inp
->input_mit_tcall
== NULL
) {
5756 ifp
->if_inp
->input_mit_tcall
=
5757 thread_call_allocate_with_priority(dlil_mit_tcall_fn
,
5758 ifp
, THREAD_CALL_PRIORITY_KERNEL
);
5762 * If the driver supports the new transmit model, calculate flow hash
5763 * and create a workloop starter thread to invoke the if_start callback
5764 * where the packets may be dequeued and transmitted.
5766 if (ifp
->if_eflags
& IFEF_TXSTART
) {
5767 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
5768 VERIFY(ifp
->if_flowhash
!= 0);
5769 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
5771 ifnet_set_start_cycle(ifp
, NULL
);
5772 ifp
->if_start_active
= 0;
5773 ifp
->if_start_req
= 0;
5774 ifp
->if_start_flags
= 0;
5775 VERIFY(ifp
->if_start
!= NULL
);
5776 if ((err
= kernel_thread_start(ifnet_start_thread_fn
,
5777 ifp
, &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
5779 "ifp=%p couldn't get a start thread; "
5780 "err=%d", __func__
, ifp
, err
);
5783 ml_thread_policy(ifp
->if_start_thread
, MACHINE_GROUP
,
5784 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5786 ifp
->if_flowhash
= 0;
5790 * If the driver supports the new receive model, create a poller
5791 * thread to invoke if_input_poll callback where the packets may
5792 * be dequeued from the driver and processed for reception.
5794 if (ifp
->if_eflags
& IFEF_RXPOLL
) {
5795 VERIFY(ifp
->if_input_poll
!= NULL
);
5796 VERIFY(ifp
->if_input_ctl
!= NULL
);
5797 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
5799 ifnet_set_poll_cycle(ifp
, NULL
);
5800 ifp
->if_poll_update
= 0;
5801 ifp
->if_poll_active
= 0;
5802 ifp
->if_poll_req
= 0;
5803 if ((err
= kernel_thread_start(ifnet_poll_thread_fn
, ifp
,
5804 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
5805 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5806 "err=%d", __func__
, ifp
, err
);
5809 ml_thread_policy(ifp
->if_poll_thread
, MACHINE_GROUP
,
5810 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5813 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
5814 VERIFY(ifp
->if_desc
.ifd_len
== 0);
5815 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
5817 /* Record attach PC stacktrace */
5818 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
5820 ifp
->if_updatemcasts
= 0;
5821 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
5822 struct ifmultiaddr
*ifma
;
5823 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
5825 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
5826 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
)
5827 ifp
->if_updatemcasts
++;
5831 printf("%s: attached with %d suspended link-layer multicast "
5832 "membership(s)\n", if_name(ifp
),
5833 ifp
->if_updatemcasts
);
5836 /* Clear logging parameters */
5837 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
5839 /* Clear foreground/realtime activity timestamps */
5840 ifp
->if_fg_sendts
= 0;
5841 ifp
->if_rt_sendts
= 0;
5843 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
5844 VERIFY(ifp
->if_delegated
.type
== 0);
5845 VERIFY(ifp
->if_delegated
.family
== 0);
5846 VERIFY(ifp
->if_delegated
.subfamily
== 0);
5847 VERIFY(ifp
->if_delegated
.expensive
== 0);
5849 VERIFY(ifp
->if_agentids
== NULL
);
5850 VERIFY(ifp
->if_agentcount
== 0);
5852 /* Reset interface state */
5853 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
5854 ifp
->if_interface_state
.valid_bitmask
|=
5855 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
5856 ifp
->if_interface_state
.interface_availability
=
5857 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
;
5859 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5860 if (ifp
== lo_ifp
) {
5861 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_GOOD
;
5862 ifp
->if_interface_state
.valid_bitmask
|=
5863 IF_INTERFACE_STATE_LQM_STATE_VALID
;
5865 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_UNKNOWN
;
5869 * Enable ECN capability on this interface depending on the
5870 * value of ECN global setting
5872 if (tcp_ecn_outbound
== 2 && !IFNET_IS_CELLULAR(ifp
)) {
5873 ifp
->if_eflags
|= IFEF_ECN_ENABLE
;
5874 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
5878 * Built-in Cyclops always on policy for WiFi infra
5880 if (IFNET_IS_WIFI_INFRA(ifp
) && net_qos_policy_wifi_enabled
!= 0) {
5883 error
= if_set_qosmarking_mode(ifp
,
5884 IFRTYPE_QOSMARKING_FASTLANE
);
5886 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5887 __func__
, ifp
->if_xname
, error
);
5889 ifp
->if_eflags
|= IFEF_QOSMARKING_ENABLED
;
5890 #if (DEVELOPMENT || DEBUG)
5891 printf("%s fastlane enabled on %s\n",
5892 __func__
, ifp
->if_xname
);
5893 #endif /* (DEVELOPMENT || DEBUG) */
5897 ifnet_lock_done(ifp
);
5901 lck_mtx_lock(&ifp
->if_cached_route_lock
);
5902 /* Enable forwarding cached route */
5903 ifp
->if_fwd_cacheok
= 1;
5904 /* Clean up any existing cached routes */
5905 ROUTE_RELEASE(&ifp
->if_fwd_route
);
5906 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
5907 ROUTE_RELEASE(&ifp
->if_src_route
);
5908 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
5909 ROUTE_RELEASE(&ifp
->if_src_route6
);
5910 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
5911 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
5913 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5916 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5917 * and trees; do this before the ifnet is marked as attached.
5918 * The ifnet keeps the reference to the info structures even after
5919 * the ifnet is detached, since the network-layer records still
5920 * refer to the info structures even after that. This also
5921 * makes it possible for them to still function after the ifnet
5922 * is recycled or reattached.
5925 if (IGMP_IFINFO(ifp
) == NULL
) {
5926 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, M_WAITOK
);
5927 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
5929 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
5930 igmp_domifreattach(IGMP_IFINFO(ifp
));
5934 if (MLD_IFINFO(ifp
) == NULL
) {
5935 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, M_WAITOK
);
5936 VERIFY(MLD_IFINFO(ifp
) != NULL
);
5938 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
5939 mld_domifreattach(MLD_IFINFO(ifp
));
5943 VERIFY(ifp
->if_data_threshold
== 0);
5944 VERIFY(ifp
->if_dt_tcall
!= NULL
);
5947 * Finally, mark this ifnet as attached.
5949 lck_mtx_lock(rnh_lock
);
5950 ifnet_lock_exclusive(ifp
);
5951 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5952 ifp
->if_refflags
= IFRF_ATTACHED
; /* clears embryonic */
5953 lck_mtx_unlock(&ifp
->if_ref_lock
);
5955 /* boot-args override; enable idle notification */
5956 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
5959 /* apply previous request(s) to set the idle flags, if any */
5960 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
5961 ifp
->if_idle_new_flags_mask
);
5964 ifnet_lock_done(ifp
);
5965 lck_mtx_unlock(rnh_lock
);
5970 * Attach packet filter to this interface, if enabled.
5972 pf_ifnet_hook(ifp
, 1);
5975 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
5978 printf("%s: attached%s\n", if_name(ifp
),
5979 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
5986 * Prepare the storage for the first/permanent link address, which must
5987 * must have the same lifetime as the ifnet itself. Although the link
5988 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5989 * its location in memory must never change as it may still be referred
5990 * to by some parts of the system afterwards (unfortunate implementation
5991 * artifacts inherited from BSD.)
5993 * Caller must hold ifnet lock as writer.
5995 static struct ifaddr
*
5996 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
5998 struct ifaddr
*ifa
, *oifa
;
5999 struct sockaddr_dl
*asdl
, *msdl
;
6000 char workbuf
[IFNAMSIZ
*2];
6001 int namelen
, masklen
, socksize
;
6002 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6004 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
6005 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
6007 namelen
= snprintf(workbuf
, sizeof (workbuf
), "%s",
6009 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0])
6010 + ((namelen
> 0) ? namelen
: 0);
6011 socksize
= masklen
+ ifp
->if_addrlen
;
6012 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
6013 if ((u_int32_t
)socksize
< sizeof (struct sockaddr_dl
))
6014 socksize
= sizeof(struct sockaddr_dl
);
6015 socksize
= ROUNDUP(socksize
);
6018 ifa
= ifp
->if_lladdr
;
6019 if (socksize
> DLIL_SDLMAXLEN
||
6020 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
6022 * Rare, but in the event that the link address requires
6023 * more storage space than DLIL_SDLMAXLEN, allocate the
6024 * largest possible storages for address and mask, such
6025 * that we can reuse the same space when if_addrlen grows.
6026 * This same space will be used when if_addrlen shrinks.
6028 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
6029 int ifasize
= sizeof (*ifa
) + 2 * SOCK_MAXADDRLEN
;
6030 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
6034 /* Don't set IFD_ALLOC, as this is permanent */
6035 ifa
->ifa_debug
= IFD_LINK
;
6038 /* address and mask sockaddr_dl locations */
6039 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
6040 bzero(asdl
, SOCK_MAXADDRLEN
);
6041 msdl
= (struct sockaddr_dl
*)(void *)
6042 ((char *)asdl
+ SOCK_MAXADDRLEN
);
6043 bzero(msdl
, SOCK_MAXADDRLEN
);
6045 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
6047 * Use the storage areas for address and mask within the
6048 * dlil_ifnet structure. This is the most common case.
6051 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
6053 /* Don't set IFD_ALLOC, as this is permanent */
6054 ifa
->ifa_debug
= IFD_LINK
;
6057 /* address and mask sockaddr_dl locations */
6058 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
6059 bzero(asdl
, sizeof (dl_if
->dl_if_lladdr
.asdl
));
6060 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
6061 bzero(msdl
, sizeof (dl_if
->dl_if_lladdr
.msdl
));
6064 /* hold a permanent reference for the ifnet itself */
6065 IFA_ADDREF_LOCKED(ifa
);
6066 oifa
= ifp
->if_lladdr
;
6067 ifp
->if_lladdr
= ifa
;
6069 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
6071 ifa
->ifa_rtrequest
= link_rtrequest
;
6072 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
6073 asdl
->sdl_len
= socksize
;
6074 asdl
->sdl_family
= AF_LINK
;
6076 bcopy(workbuf
, asdl
->sdl_data
, min(namelen
,
6077 sizeof (asdl
->sdl_data
)));
6078 asdl
->sdl_nlen
= namelen
;
6082 asdl
->sdl_index
= ifp
->if_index
;
6083 asdl
->sdl_type
= ifp
->if_type
;
6084 if (ll_addr
!= NULL
) {
6085 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
6086 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
6090 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
6091 msdl
->sdl_len
= masklen
;
6093 msdl
->sdl_data
[--namelen
] = 0xff;
6103 if_purgeaddrs(struct ifnet
*ifp
)
6109 in6_purgeaddrs(ifp
);
6114 ifnet_detach(ifnet_t ifp
)
6116 struct ifnet
*delegated_ifp
;
6117 struct nd_ifinfo
*ndi
= NULL
;
6122 ndi
= ND_IFINFO(ifp
);
6124 ndi
->cga_initialized
= FALSE
;
6126 lck_mtx_lock(rnh_lock
);
6127 ifnet_head_lock_exclusive();
6128 ifnet_lock_exclusive(ifp
);
6131 * Check to see if this interface has previously triggered
6132 * aggressive protocol draining; if so, decrement the global
6133 * refcnt and clear PR_AGGDRAIN on the route domain if
6134 * there are no more of such an interface around.
6136 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
6138 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6139 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
6140 lck_mtx_unlock(&ifp
->if_ref_lock
);
6141 ifnet_lock_done(ifp
);
6143 lck_mtx_unlock(rnh_lock
);
6145 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
6146 /* Interface has already been detached */
6147 lck_mtx_unlock(&ifp
->if_ref_lock
);
6148 ifnet_lock_done(ifp
);
6150 lck_mtx_unlock(rnh_lock
);
6153 VERIFY(!(ifp
->if_refflags
& IFRF_EMBRYONIC
));
6154 /* Indicate this interface is being detached */
6155 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
6156 ifp
->if_refflags
|= IFRF_DETACHING
;
6157 lck_mtx_unlock(&ifp
->if_ref_lock
);
6160 printf("%s: detaching\n", if_name(ifp
));
6163 /* clean up flow control entry object if there's any */
6164 if (ifp
->if_eflags
& IFEF_TXSTART
) {
6165 ifnet_flowadv(ifp
->if_flowhash
);
6168 /* Reset ECN enable/disable flags */
6169 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
6170 ifp
->if_eflags
&= ~IFEF_ECN_ENABLE
;
6173 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6174 * no longer be visible during lookups from this point.
6176 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
6177 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
6178 ifp
->if_link
.tqe_next
= NULL
;
6179 ifp
->if_link
.tqe_prev
= NULL
;
6180 if (ifp
->if_ordered_link
.tqe_next
!= NULL
||
6181 ifp
->if_ordered_link
.tqe_prev
!= NULL
) {
6182 ifnet_remove_from_ordered_list(ifp
);
6184 ifindex2ifnet
[ifp
->if_index
] = NULL
;
6186 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6187 ifp
->if_eflags
&= ~(IFEF_IPV4_ROUTER
| IFEF_IPV6_ROUTER
);
6189 /* Record detach PC stacktrace */
6190 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
6192 /* Clear logging parameters */
6193 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
6195 /* Clear delegated interface info (reference released below) */
6196 delegated_ifp
= ifp
->if_delegated
.ifp
;
6197 bzero(&ifp
->if_delegated
, sizeof (ifp
->if_delegated
));
6199 /* Reset interface state */
6200 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
6202 ifnet_lock_done(ifp
);
6204 lck_mtx_unlock(rnh_lock
);
6207 /* Release reference held on the delegated interface */
6208 if (delegated_ifp
!= NULL
)
6209 ifnet_release(delegated_ifp
);
6211 /* Reset Link Quality Metric (unless loopback [lo0]) */
6213 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
, 0);
6215 /* Reset TCP local statistics */
6216 if (ifp
->if_tcp_stat
!= NULL
)
6217 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
6219 /* Reset UDP local statistics */
6220 if (ifp
->if_udp_stat
!= NULL
)
6221 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
6223 /* Reset ifnet IPv4 stats */
6224 if (ifp
->if_ipv4_stat
!= NULL
)
6225 bzero(ifp
->if_ipv4_stat
, sizeof(*ifp
->if_ipv4_stat
));
6227 /* Reset ifnet IPv6 stats */
6228 if (ifp
->if_ipv6_stat
!= NULL
)
6229 bzero(ifp
->if_ipv6_stat
, sizeof(*ifp
->if_ipv6_stat
));
6231 /* Release memory held for interface link status report */
6232 if (ifp
->if_link_status
!= NULL
) {
6233 FREE(ifp
->if_link_status
, M_TEMP
);
6234 ifp
->if_link_status
= NULL
;
6237 /* Clear agent IDs */
6238 if (ifp
->if_agentids
!= NULL
) {
6239 FREE(ifp
->if_agentids
, M_NETAGENT
);
6240 ifp
->if_agentids
= NULL
;
6242 ifp
->if_agentcount
= 0;
6245 /* Let BPF know we're detaching */
6248 /* Mark the interface as DOWN */
6251 /* Disable forwarding cached route */
6252 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6253 ifp
->if_fwd_cacheok
= 0;
6254 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6256 /* Disable data threshold and wait for any pending event posting */
6257 ifp
->if_data_threshold
= 0;
6258 VERIFY(ifp
->if_dt_tcall
!= NULL
);
6259 (void) thread_call_cancel_wait(ifp
->if_dt_tcall
);
6262 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6263 * references to the info structures and leave them attached to
6267 igmp_domifdetach(ifp
);
6270 mld_domifdetach(ifp
);
6273 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
6275 /* Let worker thread take care of the rest, to avoid reentrancy */
6277 ifnet_detaching_enqueue(ifp
);
6284 ifnet_detaching_enqueue(struct ifnet
*ifp
)
6286 dlil_if_lock_assert();
6288 ++ifnet_detaching_cnt
;
6289 VERIFY(ifnet_detaching_cnt
!= 0);
6290 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6291 wakeup((caddr_t
)&ifnet_delayed_run
);
6294 static struct ifnet
*
6295 ifnet_detaching_dequeue(void)
6299 dlil_if_lock_assert();
6301 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
6302 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
6304 VERIFY(ifnet_detaching_cnt
!= 0);
6305 --ifnet_detaching_cnt
;
6306 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6307 ifp
->if_detaching_link
.tqe_next
= NULL
;
6308 ifp
->if_detaching_link
.tqe_prev
= NULL
;
6314 ifnet_detacher_thread_cont(int err
)
6320 dlil_if_lock_assert();
6321 while (ifnet_detaching_cnt
== 0) {
6322 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6323 (PZERO
- 1), "ifnet_detacher_cont", 0,
6324 ifnet_detacher_thread_cont
);
6328 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
6330 /* Take care of detaching ifnet */
6331 ifp
= ifnet_detaching_dequeue();
6334 ifnet_detach_final(ifp
);
6341 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
6343 #pragma unused(v, w)
6345 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6346 (PZERO
- 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont
);
6348 * msleep0() shouldn't have returned as PCATCH was not set;
6349 * therefore assert in this case.
6356 ifnet_detach_final(struct ifnet
*ifp
)
6358 struct ifnet_filter
*filter
, *filter_next
;
6359 struct ifnet_filter_head fhead
;
6360 struct dlil_threading_info
*inp
;
6362 ifnet_detached_func if_free
;
6365 lck_mtx_lock(&ifp
->if_ref_lock
);
6366 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6367 panic("%s: flags mismatch (detaching not set) ifp=%p",
6373 * Wait until the existing IO references get released
6374 * before we proceed with ifnet_detach. This is not a
6375 * common case, so block without using a continuation.
6377 while (ifp
->if_refio
> 0) {
6378 printf("%s: Waiting for IO references on %s interface "
6379 "to be released\n", __func__
, if_name(ifp
));
6380 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
6381 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
6383 lck_mtx_unlock(&ifp
->if_ref_lock
);
6385 /* Drain and destroy send queue */
6386 ifclassq_teardown(ifp
);
6388 /* Detach interface filters */
6389 lck_mtx_lock(&ifp
->if_flt_lock
);
6390 if_flt_monitor_enter(ifp
);
6392 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
6393 fhead
= ifp
->if_flt_head
;
6394 TAILQ_INIT(&ifp
->if_flt_head
);
6396 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
6397 filter_next
= TAILQ_NEXT(filter
, filt_next
);
6398 lck_mtx_unlock(&ifp
->if_flt_lock
);
6400 dlil_detach_filter_internal(filter
, 1);
6401 lck_mtx_lock(&ifp
->if_flt_lock
);
6403 if_flt_monitor_leave(ifp
);
6404 lck_mtx_unlock(&ifp
->if_flt_lock
);
6406 /* Tell upper layers to drop their network addresses */
6409 ifnet_lock_exclusive(ifp
);
6411 /* Uplumb all protocols */
6412 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
6413 struct if_proto
*proto
;
6415 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6416 while (proto
!= NULL
) {
6417 protocol_family_t family
= proto
->protocol_family
;
6418 ifnet_lock_done(ifp
);
6419 proto_unplumb(family
, ifp
);
6420 ifnet_lock_exclusive(ifp
);
6421 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6423 /* There should not be any protocols left */
6424 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
6426 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
6427 ifp
->if_proto_hash
= NULL
;
6429 /* Detach (permanent) link address from if_addrhead */
6430 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
6431 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
6433 if_detach_link_ifa(ifp
, ifa
);
6436 /* Remove (permanent) link address from ifnet_addrs[] */
6438 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
6440 /* This interface should not be on {ifnet_head,detaching} */
6441 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
6442 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
6443 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
6444 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
6445 VERIFY(ifp
->if_ordered_link
.tqe_next
== NULL
);
6446 VERIFY(ifp
->if_ordered_link
.tqe_prev
== NULL
);
6448 /* The slot should have been emptied */
6449 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
6451 /* There should not be any addresses left */
6452 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
6455 * Signal the starter thread to terminate itself.
6457 if (ifp
->if_start_thread
!= THREAD_NULL
) {
6458 lck_mtx_lock_spin(&ifp
->if_start_lock
);
6459 ifp
->if_start_flags
= 0;
6460 ifp
->if_start_thread
= THREAD_NULL
;
6461 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
6462 lck_mtx_unlock(&ifp
->if_start_lock
);
6466 * Signal the poller thread to terminate itself.
6468 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
6469 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
6470 ifp
->if_poll_thread
= THREAD_NULL
;
6471 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
6472 lck_mtx_unlock(&ifp
->if_poll_lock
);
6476 * If thread affinity was set for the workloop thread, we will need
6477 * to tear down the affinity and release the extra reference count
6478 * taken at attach time. Does not apply to lo0 or other interfaces
6479 * without dedicated input threads.
6481 if ((inp
= ifp
->if_inp
) != NULL
) {
6482 VERIFY(inp
!= dlil_main_input_thread
);
6484 if (inp
->net_affinity
) {
6485 struct thread
*tp
, *wtp
, *ptp
;
6487 lck_mtx_lock_spin(&inp
->input_lck
);
6488 wtp
= inp
->wloop_thr
;
6489 inp
->wloop_thr
= THREAD_NULL
;
6490 ptp
= inp
->poll_thr
;
6491 inp
->poll_thr
= THREAD_NULL
;
6492 tp
= inp
->input_thr
; /* don't nullify now */
6494 inp
->net_affinity
= FALSE
;
6495 lck_mtx_unlock(&inp
->input_lck
);
6497 /* Tear down poll thread affinity */
6499 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
6500 (void) dlil_affinity_set(ptp
,
6501 THREAD_AFFINITY_TAG_NULL
);
6502 thread_deallocate(ptp
);
6505 /* Tear down workloop thread affinity */
6507 (void) dlil_affinity_set(wtp
,
6508 THREAD_AFFINITY_TAG_NULL
);
6509 thread_deallocate(wtp
);
6512 /* Tear down DLIL input thread affinity */
6513 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
6514 thread_deallocate(tp
);
6517 /* disassociate ifp DLIL input thread */
6520 /* tell the input thread to terminate */
6521 lck_mtx_lock_spin(&inp
->input_lck
);
6522 inp
->input_waiting
|= DLIL_INPUT_TERMINATE
;
6523 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
6524 wakeup_one((caddr_t
)&inp
->input_waiting
);
6526 lck_mtx_unlock(&inp
->input_lck
);
6527 ifnet_lock_done(ifp
);
6529 /* wait for the input thread to terminate */
6530 lck_mtx_lock_spin(&inp
->input_lck
);
6531 while ((inp
->input_waiting
& DLIL_INPUT_TERMINATE_COMPLETE
)
6533 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
6534 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
6536 lck_mtx_unlock(&inp
->input_lck
);
6537 ifnet_lock_exclusive(ifp
);
6539 /* clean-up input thread state */
6540 dlil_clean_threading_info(inp
);
6544 /* The driver might unload, so point these to ourselves */
6545 if_free
= ifp
->if_free
;
6546 ifp
->if_output_dlil
= ifp_if_output
;
6547 ifp
->if_output
= ifp_if_output
;
6548 ifp
->if_pre_enqueue
= ifp_if_output
;
6549 ifp
->if_start
= ifp_if_start
;
6550 ifp
->if_output_ctl
= ifp_if_ctl
;
6551 ifp
->if_input_dlil
= ifp_if_input
;
6552 ifp
->if_input_poll
= ifp_if_input_poll
;
6553 ifp
->if_input_ctl
= ifp_if_ctl
;
6554 ifp
->if_ioctl
= ifp_if_ioctl
;
6555 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
6556 ifp
->if_free
= ifp_if_free
;
6557 ifp
->if_demux
= ifp_if_demux
;
6558 ifp
->if_event
= ifp_if_event
;
6559 ifp
->if_framer_legacy
= ifp_if_framer
;
6560 ifp
->if_framer
= ifp_if_framer_extended
;
6561 ifp
->if_add_proto
= ifp_if_add_proto
;
6562 ifp
->if_del_proto
= ifp_if_del_proto
;
6563 ifp
->if_check_multi
= ifp_if_check_multi
;
6565 /* wipe out interface description */
6566 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
6567 ifp
->if_desc
.ifd_len
= 0;
6568 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
6569 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
6571 /* there shouldn't be any delegation by now */
6572 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
6573 VERIFY(ifp
->if_delegated
.type
== 0);
6574 VERIFY(ifp
->if_delegated
.family
== 0);
6575 VERIFY(ifp
->if_delegated
.subfamily
== 0);
6576 VERIFY(ifp
->if_delegated
.expensive
== 0);
6578 /* QoS marking get cleared */
6579 ifp
->if_eflags
&= ~IFEF_QOSMARKING_ENABLED
;
6580 if_set_qosmarking_mode(ifp
, IFRTYPE_QOSMARKING_MODE_NONE
);
6583 ifnet_lock_done(ifp
);
6587 * Detach this interface from packet filter, if enabled.
6589 pf_ifnet_hook(ifp
, 0);
6592 /* Filter list should be empty */
6593 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
6594 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
6595 VERIFY(ifp
->if_flt_busy
== 0);
6596 VERIFY(ifp
->if_flt_waiters
== 0);
6597 lck_mtx_unlock(&ifp
->if_flt_lock
);
6599 /* Last chance to drain send queue */
6602 /* Last chance to cleanup any cached route */
6603 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6604 VERIFY(!ifp
->if_fwd_cacheok
);
6605 ROUTE_RELEASE(&ifp
->if_fwd_route
);
6606 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
6607 ROUTE_RELEASE(&ifp
->if_src_route
);
6608 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
6609 ROUTE_RELEASE(&ifp
->if_src_route6
);
6610 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
6611 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6613 VERIFY(ifp
->if_data_threshold
== 0);
6614 VERIFY(ifp
->if_dt_tcall
!= NULL
);
6615 VERIFY(!thread_call_isactive(ifp
->if_dt_tcall
));
6617 ifnet_llreach_ifdetach(ifp
);
6619 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
6622 * Finally, mark this ifnet as detached.
6624 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6625 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6626 panic("%s: flags mismatch (detaching not set) ifp=%p",
6630 ifp
->if_refflags
&= ~IFRF_DETACHING
;
6631 lck_mtx_unlock(&ifp
->if_ref_lock
);
6632 if (if_free
!= NULL
)
6636 printf("%s: detached\n", if_name(ifp
));
6638 /* Release reference held during ifnet attach */
6643 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
6651 ifp_if_start(struct ifnet
*ifp
)
6657 ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
6658 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
6659 boolean_t poll
, struct thread
*tp
)
6661 #pragma unused(ifp, m_tail, s, poll, tp)
6662 m_freem_list(m_head
);
6667 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
6668 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
6670 #pragma unused(ifp, flags, max_cnt)
6682 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
6684 #pragma unused(ifp, cmd, arglen, arg)
6685 return (EOPNOTSUPP
);
6689 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
6691 #pragma unused(ifp, fh, pf)
6693 return (EJUSTRETURN
);
6697 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
6698 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
6700 #pragma unused(ifp, pf, da, dc)
6705 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
6707 #pragma unused(ifp, pf)
6712 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
6714 #pragma unused(ifp, sa)
6715 return (EOPNOTSUPP
);
6720 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
6721 const struct sockaddr
*sa
, const char *ll
, const char *t
,
6722 u_int32_t
*pre
, u_int32_t
*post
)
6725 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
6726 const struct sockaddr
*sa
, const char *ll
, const char *t
)
6727 #endif /* !CONFIG_EMBEDDED */
6729 #pragma unused(ifp, m, sa, ll, t)
6731 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, pre
, post
));
6733 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
));
6734 #endif /* !CONFIG_EMBEDDED */
6738 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
6739 const struct sockaddr
*sa
, const char *ll
, const char *t
,
6740 u_int32_t
*pre
, u_int32_t
*post
)
6742 #pragma unused(ifp, sa, ll, t)
6751 return (EJUSTRETURN
);
6755 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
6757 #pragma unused(ifp, cmd, arg)
6758 return (EOPNOTSUPP
);
6762 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
6764 #pragma unused(ifp, tm, f)
6765 /* XXX not sure what to do here */
6770 ifp_if_free(struct ifnet
*ifp
)
6776 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
6778 #pragma unused(ifp, e)
6781 int dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
6782 size_t uniqueid_len
, const char *ifxname
, struct ifnet
**ifp
)
6784 struct ifnet
*ifp1
= NULL
;
6785 struct dlil_ifnet
*dlifp1
= NULL
;
6786 void *buf
, *base
, **pbuf
;
6789 VERIFY(*ifp
== NULL
);
6792 * We absolutely can't have an interface with the same name
6794 * To make sure of that list has to be traversed completely
6796 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
6797 ifp1
= (struct ifnet
*)dlifp1
;
6799 if (ifp1
->if_family
!= family
)
6803 * If interface is in use, return EBUSY if either unique id
6804 * or interface extended names are the same
6806 lck_mtx_lock(&dlifp1
->dl_if_lock
);
6807 if (strncmp(ifxname
, ifp1
->if_xname
, IFXNAMSIZ
) == 0) {
6808 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
6809 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6816 if (uniqueid_len
== dlifp1
->dl_if_uniqueid_len
&&
6817 bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
) == 0) {
6818 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
6819 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6823 dlifp1
->dl_if_flags
|= (DLIF_INUSE
|DLIF_REUSE
);
6824 /* Cache the first interface that can be recycled */
6828 * XXX Do not break or jump to end as we have to traverse
6829 * the whole list to ensure there are no name collisions
6834 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6837 /* If there's an interface that can be recycled, use that */
6841 /* no interface found, allocate a new one */
6842 buf
= zalloc(dlif_zone
);
6847 bzero(buf
, dlif_bufsize
);
6849 /* Get the 64-bit aligned base address for this object */
6850 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
6851 sizeof (u_int64_t
));
6852 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
6855 * Wind back a pointer size from the aligned base and
6856 * save the original address so we can free it later.
6858 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
6863 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
6865 if (dlifp1
->dl_if_uniqueid
== NULL
) {
6866 zfree(dlif_zone
, buf
);
6870 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
6871 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
6874 ifp1
= (struct ifnet
*)dlifp1
;
6875 dlifp1
->dl_if_flags
= DLIF_INUSE
;
6877 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
6878 dlifp1
->dl_if_trace
= dlil_if_trace
;
6880 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
6881 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
6883 /* initialize interface description */
6884 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
6885 ifp1
->if_desc
.ifd_len
= 0;
6886 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
6890 mac_ifnet_label_init(ifp1
);
6893 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
6894 DLIL_PRINTF("%s: failed to allocate if local stats, "
6895 "error: %d\n", __func__
, ret
);
6896 /* This probably shouldn't be fatal */
6900 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6901 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6902 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6903 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6904 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
6906 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6908 lck_rw_init(&ifp1
->if_inetdata_lock
, ifnet_lock_group
,
6910 ifp1
->if_inetdata
= NULL
;
6913 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
,
6915 ifp1
->if_inet6data
= NULL
;
6917 lck_rw_init(&ifp1
->if_link_status_lock
, ifnet_lock_group
,
6919 ifp1
->if_link_status
= NULL
;
6921 /* for send data paths */
6922 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
6924 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
6926 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
6929 /* for receive data paths */
6930 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
6933 /* thread call allocation is done with sleeping zalloc */
6934 ifp1
->if_dt_tcall
= thread_call_allocate_with_options(dlil_dt_tcall_fn
,
6935 ifp1
, THREAD_CALL_PRIORITY_KERNEL
, THREAD_CALL_OPTIONS_ONCE
);
6936 if (ifp1
->if_dt_tcall
== NULL
) {
6937 panic_plain("%s: couldn't create if_dt_tcall", __func__
);
6941 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
6948 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof (u_int64_t
)) &&
6949 IS_P2ALIGNED(&ifp1
->if_data
, sizeof (u_int64_t
))));
6954 __private_extern__
void
6955 dlil_if_release(ifnet_t ifp
)
6957 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
6959 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_count
) > 0);
6960 if (!(ifp
->if_xflags
& IFXF_ALLOC_KPI
)) {
6961 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_os_count
) > 0);
6964 ifnet_lock_exclusive(ifp
);
6965 lck_mtx_lock(&dlifp
->dl_if_lock
);
6966 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
6967 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
6968 ifp
->if_name
= dlifp
->dl_if_namestorage
;
6969 /* Reset external name (name + unit) */
6970 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
6971 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
6972 "%s?", ifp
->if_name
);
6973 lck_mtx_unlock(&dlifp
->dl_if_lock
);
6976 * We can either recycle the MAC label here or in dlil_if_acquire().
6977 * It seems logical to do it here but this means that anything that
6978 * still has a handle on ifp will now see it as unlabeled.
6979 * Since the interface is "dead" that may be OK. Revisit later.
6981 mac_ifnet_label_recycle(ifp
);
6983 ifnet_lock_done(ifp
);
6986 __private_extern__
void
6989 lck_mtx_lock(&dlil_ifnet_lock
);
6992 __private_extern__
void
6993 dlil_if_unlock(void)
6995 lck_mtx_unlock(&dlil_ifnet_lock
);
6998 __private_extern__
void
6999 dlil_if_lock_assert(void)
7001 LCK_MTX_ASSERT(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
7004 __private_extern__
void
7005 dlil_proto_unplumb_all(struct ifnet
*ifp
)
7008 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
7009 * each bucket contains exactly one entry; PF_VLAN does not need an
7012 * if_proto_hash[3] is for other protocols; we expect anything
7013 * in this bucket to respond to the DETACHING event (which would
7014 * have happened by now) and do the unplumb then.
7016 (void) proto_unplumb(PF_INET
, ifp
);
7018 (void) proto_unplumb(PF_INET6
, ifp
);
7023 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
7025 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7026 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7028 route_copyout(dst
, &ifp
->if_src_route
, sizeof (*dst
));
7030 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7034 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
7036 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7037 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7039 if (ifp
->if_fwd_cacheok
) {
7040 route_copyin(src
, &ifp
->if_src_route
, sizeof (*src
));
7044 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7049 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
7051 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7052 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7054 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
7057 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7061 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
7063 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7064 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7066 if (ifp
->if_fwd_cacheok
) {
7067 route_copyin((struct route
*)src
,
7068 (struct route
*)&ifp
->if_src_route6
, sizeof (*src
));
7072 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7077 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
7079 struct route src_rt
;
7080 struct sockaddr_in
*dst
;
7082 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
7084 ifp_src_route_copyout(ifp
, &src_rt
);
7086 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
7087 ROUTE_RELEASE(&src_rt
);
7088 if (dst
->sin_family
!= AF_INET
) {
7089 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
7090 dst
->sin_len
= sizeof (src_rt
.ro_dst
);
7091 dst
->sin_family
= AF_INET
;
7093 dst
->sin_addr
= src_ip
;
7095 VERIFY(src_rt
.ro_rt
== NULL
);
7096 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
7097 0, 0, ifp
->if_index
);
7099 if (src_rt
.ro_rt
!= NULL
) {
7100 /* retain a ref, copyin consumes one */
7101 struct rtentry
*rte
= src_rt
.ro_rt
;
7103 ifp_src_route_copyin(ifp
, &src_rt
);
7108 return (src_rt
.ro_rt
);
7113 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
7115 struct route_in6 src_rt
;
7117 ifp_src_route6_copyout(ifp
, &src_rt
);
7119 if (ROUTE_UNUSABLE(&src_rt
) ||
7120 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
7121 ROUTE_RELEASE(&src_rt
);
7122 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
7123 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
7124 src_rt
.ro_dst
.sin6_len
= sizeof (src_rt
.ro_dst
);
7125 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
7127 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
7128 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
7129 sizeof (src_rt
.ro_dst
.sin6_addr
));
7131 if (src_rt
.ro_rt
== NULL
) {
7132 src_rt
.ro_rt
= rtalloc1_scoped(
7133 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
7136 if (src_rt
.ro_rt
!= NULL
) {
7137 /* retain a ref, copyin consumes one */
7138 struct rtentry
*rte
= src_rt
.ro_rt
;
7140 ifp_src_route6_copyin(ifp
, &src_rt
);
7146 return (src_rt
.ro_rt
);
7151 if_lqm_update(struct ifnet
*ifp
, int lqm
, int locked
)
7153 struct kev_dl_link_quality_metric_data ev_lqm_data
;
7155 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
7157 /* Normalize to edge */
7158 if (lqm
>= 0 && lqm
<= IFNET_LQM_THRESH_ABORT
) {
7159 lqm
= IFNET_LQM_THRESH_ABORT
;
7160 atomic_bitset_32(&tcbinfo
.ipi_flags
,
7161 INPCBINFO_HANDLE_LQM_ABORT
);
7162 inpcb_timer_sched(&tcbinfo
, INPCB_TIMER_FAST
);
7163 } else if (lqm
> IFNET_LQM_THRESH_ABORT
&&
7164 lqm
<= IFNET_LQM_THRESH_MINIMALLY_VIABLE
) {
7165 lqm
= IFNET_LQM_THRESH_MINIMALLY_VIABLE
;
7166 } else if (lqm
> IFNET_LQM_THRESH_MINIMALLY_VIABLE
&&
7167 lqm
<= IFNET_LQM_THRESH_POOR
) {
7168 lqm
= IFNET_LQM_THRESH_POOR
;
7169 } else if (lqm
> IFNET_LQM_THRESH_POOR
&&
7170 lqm
<= IFNET_LQM_THRESH_GOOD
) {
7171 lqm
= IFNET_LQM_THRESH_GOOD
;
7175 * Take the lock if needed
7178 ifnet_lock_exclusive(ifp
);
7180 if (lqm
== ifp
->if_interface_state
.lqm_state
&&
7181 (ifp
->if_interface_state
.valid_bitmask
&
7182 IF_INTERFACE_STATE_LQM_STATE_VALID
)) {
7184 * Release the lock if was not held by the caller
7187 ifnet_lock_done(ifp
);
7188 return; /* nothing to update */
7190 ifp
->if_interface_state
.valid_bitmask
|=
7191 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7192 ifp
->if_interface_state
.lqm_state
= lqm
;
7195 * Don't want to hold the lock when issuing kernel events
7197 ifnet_lock_done(ifp
);
7199 bzero(&ev_lqm_data
, sizeof (ev_lqm_data
));
7200 ev_lqm_data
.link_quality_metric
= lqm
;
7202 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
7203 (struct net_event_data
*)&ev_lqm_data
, sizeof (ev_lqm_data
));
7206 * Reacquire the lock for the caller
7209 ifnet_lock_exclusive(ifp
);
7213 if_rrc_state_update(struct ifnet
*ifp
, unsigned int rrc_state
)
7215 struct kev_dl_rrc_state kev
;
7217 if (rrc_state
== ifp
->if_interface_state
.rrc_state
&&
7218 (ifp
->if_interface_state
.valid_bitmask
&
7219 IF_INTERFACE_STATE_RRC_STATE_VALID
))
7222 ifp
->if_interface_state
.valid_bitmask
|=
7223 IF_INTERFACE_STATE_RRC_STATE_VALID
;
7225 ifp
->if_interface_state
.rrc_state
= rrc_state
;
7228 * Don't want to hold the lock when issuing kernel events
7230 ifnet_lock_done(ifp
);
7232 bzero(&kev
, sizeof(struct kev_dl_rrc_state
));
7233 kev
.rrc_state
= rrc_state
;
7235 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_RRC_STATE_CHANGED
,
7236 (struct net_event_data
*)&kev
, sizeof(struct kev_dl_rrc_state
));
7238 ifnet_lock_exclusive(ifp
);
7242 if_state_update(struct ifnet
*ifp
,
7243 struct if_interface_state
*if_interface_state
)
7245 u_short if_index_available
= 0;
7247 ifnet_lock_exclusive(ifp
);
7249 if ((ifp
->if_type
!= IFT_CELLULAR
) &&
7250 (if_interface_state
->valid_bitmask
&
7251 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
7252 ifnet_lock_done(ifp
);
7255 if ((if_interface_state
->valid_bitmask
&
7256 IF_INTERFACE_STATE_LQM_STATE_VALID
) &&
7257 (if_interface_state
->lqm_state
< IFNET_LQM_MIN
||
7258 if_interface_state
->lqm_state
> IFNET_LQM_MAX
)) {
7259 ifnet_lock_done(ifp
);
7262 if ((if_interface_state
->valid_bitmask
&
7263 IF_INTERFACE_STATE_RRC_STATE_VALID
) &&
7264 if_interface_state
->rrc_state
!=
7265 IF_INTERFACE_STATE_RRC_STATE_IDLE
&&
7266 if_interface_state
->rrc_state
!=
7267 IF_INTERFACE_STATE_RRC_STATE_CONNECTED
) {
7268 ifnet_lock_done(ifp
);
7272 if (if_interface_state
->valid_bitmask
&
7273 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
7274 if_lqm_update(ifp
, if_interface_state
->lqm_state
, 1);
7276 if (if_interface_state
->valid_bitmask
&
7277 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
7278 if_rrc_state_update(ifp
, if_interface_state
->rrc_state
);
7280 if (if_interface_state
->valid_bitmask
&
7281 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
7282 ifp
->if_interface_state
.valid_bitmask
|=
7283 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7284 ifp
->if_interface_state
.interface_availability
=
7285 if_interface_state
->interface_availability
;
7287 if (ifp
->if_interface_state
.interface_availability
==
7288 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
) {
7289 if_index_available
= ifp
->if_index
;
7292 ifnet_lock_done(ifp
);
7295 * Check if the TCP connections going on this interface should be
7296 * forced to send probe packets instead of waiting for TCP timers
7297 * to fire. This will be done when there is an explicit
7298 * notification that the interface became available.
7300 if (if_index_available
> 0)
7301 tcp_interface_send_probe(if_index_available
);
7307 if_get_state(struct ifnet
*ifp
,
7308 struct if_interface_state
*if_interface_state
)
7310 ifnet_lock_shared(ifp
);
7312 if_interface_state
->valid_bitmask
= 0;
7314 if (ifp
->if_interface_state
.valid_bitmask
&
7315 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
7316 if_interface_state
->valid_bitmask
|=
7317 IF_INTERFACE_STATE_RRC_STATE_VALID
;
7318 if_interface_state
->rrc_state
=
7319 ifp
->if_interface_state
.rrc_state
;
7321 if (ifp
->if_interface_state
.valid_bitmask
&
7322 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
7323 if_interface_state
->valid_bitmask
|=
7324 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7325 if_interface_state
->lqm_state
=
7326 ifp
->if_interface_state
.lqm_state
;
7328 if (ifp
->if_interface_state
.valid_bitmask
&
7329 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
7330 if_interface_state
->valid_bitmask
|=
7331 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7332 if_interface_state
->interface_availability
=
7333 ifp
->if_interface_state
.interface_availability
;
7336 ifnet_lock_done(ifp
);
7340 if_probe_connectivity(struct ifnet
*ifp
, u_int32_t conn_probe
)
7342 ifnet_lock_exclusive(ifp
);
7343 if (conn_probe
> 1) {
7344 ifnet_lock_done(ifp
);
7347 if (conn_probe
== 0)
7348 ifp
->if_eflags
&= ~IFEF_PROBE_CONNECTIVITY
;
7350 ifp
->if_eflags
|= IFEF_PROBE_CONNECTIVITY
;
7351 ifnet_lock_done(ifp
);
7354 necp_update_all_clients();
7357 tcp_probe_connectivity(ifp
, conn_probe
);
7363 uuid_get_ethernet(u_int8_t
*node
)
7366 struct sockaddr_dl
*sdl
;
7368 ifnet_head_lock_shared();
7369 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
7370 ifnet_lock_shared(ifp
);
7371 IFA_LOCK_SPIN(ifp
->if_lladdr
);
7372 sdl
= (struct sockaddr_dl
*)(void *)ifp
->if_lladdr
->ifa_addr
;
7373 if (sdl
->sdl_type
== IFT_ETHER
) {
7374 memcpy(node
, LLADDR(sdl
), ETHER_ADDR_LEN
);
7375 IFA_UNLOCK(ifp
->if_lladdr
);
7376 ifnet_lock_done(ifp
);
7380 IFA_UNLOCK(ifp
->if_lladdr
);
7381 ifnet_lock_done(ifp
);
7389 sysctl_rxpoll SYSCTL_HANDLER_ARGS
7391 #pragma unused(arg1, arg2)
7397 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7398 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7401 if (net_rxpoll
== 0)
7409 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7411 #pragma unused(arg1, arg2)
7415 q
= if_rxpoll_mode_holdtime
;
7417 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7418 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7421 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
)
7422 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
7424 if_rxpoll_mode_holdtime
= q
;
7430 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7432 #pragma unused(arg1, arg2)
7436 q
= if_rxpoll_sample_holdtime
;
7438 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7439 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7442 if (q
< IF_RXPOLL_SAMPLETIME_MIN
)
7443 q
= IF_RXPOLL_SAMPLETIME_MIN
;
7445 if_rxpoll_sample_holdtime
= q
;
7451 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7453 #pragma unused(arg1, arg2)
7457 q
= if_rxpoll_interval_time
;
7459 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7460 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7463 if (q
< IF_RXPOLL_INTERVALTIME_MIN
)
7464 q
= IF_RXPOLL_INTERVALTIME_MIN
;
7466 if_rxpoll_interval_time
= q
;
7472 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7474 #pragma unused(arg1, arg2)
7478 i
= if_rxpoll_wlowat
;
7480 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7481 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7484 if (i
== 0 || i
>= if_rxpoll_whiwat
)
7487 if_rxpoll_wlowat
= i
;
7492 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7494 #pragma unused(arg1, arg2)
7498 i
= if_rxpoll_whiwat
;
7500 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7501 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7504 if (i
<= if_rxpoll_wlowat
)
7507 if_rxpoll_whiwat
= i
;
7512 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
7514 #pragma unused(arg1, arg2)
7519 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7520 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7523 if (i
< IF_SNDQ_MINLEN
)
7531 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
7533 #pragma unused(arg1, arg2)
7538 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7539 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7542 if (i
< IF_RCVQ_MINLEN
)
7550 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
7551 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
7553 struct kev_dl_node_presence kev
;
7554 struct sockaddr_dl
*sdl
;
7555 struct sockaddr_in6
*sin6
;
7559 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
7561 bzero(&kev
, sizeof (kev
));
7562 sin6
= &kev
.sin6_node_address
;
7563 sdl
= &kev
.sdl_node_address
;
7564 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
7566 kev
.link_quality_metric
= lqm
;
7567 kev
.node_proximity_metric
= npm
;
7568 bcopy(srvinfo
, kev
.node_service_info
, sizeof (kev
.node_service_info
));
7570 nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
7571 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
7572 &kev
.link_data
, sizeof (kev
));
7576 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
7578 struct kev_dl_node_absence kev
;
7579 struct sockaddr_in6
*sin6
;
7580 struct sockaddr_dl
*sdl
;
7584 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
7586 bzero(&kev
, sizeof (kev
));
7587 sin6
= &kev
.sin6_node_address
;
7588 sdl
= &kev
.sdl_node_address
;
7589 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
7591 nd6_alt_node_absent(ifp
, sin6
);
7592 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
7593 &kev
.link_data
, sizeof (kev
));
7597 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
7598 kauth_cred_t
*credp
)
7600 const u_int8_t
*bytes
;
7603 bytes
= CONST_LLADDR(sdl
);
7604 size
= sdl
->sdl_alen
;
7607 if (dlil_lladdr_ckreq
) {
7608 switch (sdl
->sdl_type
) {
7617 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
7618 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
7626 #pragma unused(credp)
7629 if (sizep
!= NULL
) *sizep
= size
;
7634 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
7635 u_int8_t info
[DLIL_MODARGLEN
])
7637 struct kev_dl_issues kev
;
7640 VERIFY(ifp
!= NULL
);
7641 VERIFY(modid
!= NULL
);
7642 _CASSERT(sizeof (kev
.modid
) == DLIL_MODIDLEN
);
7643 _CASSERT(sizeof (kev
.info
) == DLIL_MODARGLEN
);
7645 bzero(&kev
, sizeof (kev
));
7648 kev
.timestamp
= tv
.tv_sec
;
7649 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
7651 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
7653 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
7654 &kev
.link_data
, sizeof (kev
));
7658 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7661 u_int32_t level
= IFNET_THROTTLE_OFF
;
7664 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
7666 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
7668 * XXX: Use priv_check_cred() instead of root check?
7670 if ((result
= proc_suser(p
)) != 0)
7673 if (ifr
->ifr_opportunistic
.ifo_flags
==
7674 IFRIFOF_BLOCK_OPPORTUNISTIC
)
7675 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
7676 else if (ifr
->ifr_opportunistic
.ifo_flags
== 0)
7677 level
= IFNET_THROTTLE_OFF
;
7682 result
= ifnet_set_throttle(ifp
, level
);
7683 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
7684 ifr
->ifr_opportunistic
.ifo_flags
= 0;
7685 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
7686 ifr
->ifr_opportunistic
.ifo_flags
|=
7687 IFRIFOF_BLOCK_OPPORTUNISTIC
;
7692 * Return the count of current opportunistic connections
7693 * over the interface.
7697 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
7698 INPCB_OPPORTUNISTIC_SETCMD
: 0;
7699 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
7700 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
7701 ifr
->ifr_opportunistic
.ifo_inuse
=
7702 udp_count_opportunistic(ifp
->if_index
, flags
) +
7703 tcp_count_opportunistic(ifp
->if_index
, flags
);
7706 if (result
== EALREADY
)
7713 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
7715 struct ifclassq
*ifq
;
7718 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7721 *level
= IFNET_THROTTLE_OFF
;
7725 /* Throttling works only for IFCQ, not ALTQ instances */
7726 if (IFCQ_IS_ENABLED(ifq
))
7727 IFCQ_GET_THROTTLE(ifq
, *level
, err
);
7734 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
7736 struct ifclassq
*ifq
;
7739 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7745 case IFNET_THROTTLE_OFF
:
7746 case IFNET_THROTTLE_OPPORTUNISTIC
:
7753 if (IFCQ_IS_ENABLED(ifq
))
7754 IFCQ_SET_THROTTLE(ifq
, level
, err
);
7758 printf("%s: throttling level set to %d\n", if_name(ifp
),
7760 if (level
== IFNET_THROTTLE_OFF
)
7768 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7774 int level
, category
, subcategory
;
7776 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
7778 if (cmd
== SIOCSIFLOG
) {
7779 if ((result
= priv_check_cred(kauth_cred_get(),
7780 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0)
7783 level
= ifr
->ifr_log
.ifl_level
;
7784 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
)
7787 flags
= ifr
->ifr_log
.ifl_flags
;
7788 if ((flags
&= IFNET_LOGF_MASK
) == 0)
7791 category
= ifr
->ifr_log
.ifl_category
;
7792 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
7795 result
= ifnet_set_log(ifp
, level
, flags
,
7796 category
, subcategory
);
7798 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
7801 ifr
->ifr_log
.ifl_level
= level
;
7802 ifr
->ifr_log
.ifl_flags
= flags
;
7803 ifr
->ifr_log
.ifl_category
= category
;
7804 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
7812 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
7813 int32_t category
, int32_t subcategory
)
7817 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
7818 VERIFY(flags
& IFNET_LOGF_MASK
);
7821 * The logging level applies to all facilities; make sure to
7822 * update them all with the most current level.
7824 flags
|= ifp
->if_log
.flags
;
7826 if (ifp
->if_output_ctl
!= NULL
) {
7827 struct ifnet_log_params l
;
7829 bzero(&l
, sizeof (l
));
7832 l
.flags
&= ~IFNET_LOGF_DLIL
;
7833 l
.category
= category
;
7834 l
.subcategory
= subcategory
;
7836 /* Send this request to lower layers */
7838 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
7841 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
7843 * If targeted to the lower layers without an output
7844 * control callback registered on the interface, just
7845 * silently ignore facilities other than ours.
7847 flags
&= IFNET_LOGF_DLIL
;
7848 if (flags
== 0 && (!(ifp
->if_log
.flags
& IFNET_LOGF_DLIL
)))
7853 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
)
7854 ifp
->if_log
.flags
= 0;
7856 ifp
->if_log
.flags
|= flags
;
7858 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
7859 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
7860 ifp
->if_log
.level
, ifp
->if_log
.flags
,
7861 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
7862 category
, subcategory
);
7869 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
7870 int32_t *category
, int32_t *subcategory
)
7873 *level
= ifp
->if_log
.level
;
7875 *flags
= ifp
->if_log
.flags
;
7876 if (category
!= NULL
)
7877 *category
= ifp
->if_log
.category
;
7878 if (subcategory
!= NULL
)
7879 *subcategory
= ifp
->if_log
.subcategory
;
7885 ifnet_notify_address(struct ifnet
*ifp
, int af
)
7887 struct ifnet_notify_address_params na
;
7890 (void) pf_ifaddr_hook(ifp
);
7893 if (ifp
->if_output_ctl
== NULL
)
7894 return (EOPNOTSUPP
);
7896 bzero(&na
, sizeof (na
));
7897 na
.address_family
= af
;
7899 return (ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
7904 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
7906 if (ifp
== NULL
|| flowid
== NULL
) {
7908 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7909 !IF_FULLY_ATTACHED(ifp
)) {
7913 *flowid
= ifp
->if_flowhash
;
7919 ifnet_disable_output(struct ifnet
*ifp
)
7925 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7926 !IF_FULLY_ATTACHED(ifp
)) {
7930 if ((err
= ifnet_fc_add(ifp
)) == 0) {
7931 lck_mtx_lock_spin(&ifp
->if_start_lock
);
7932 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
7933 lck_mtx_unlock(&ifp
->if_start_lock
);
7939 ifnet_enable_output(struct ifnet
*ifp
)
7943 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7944 !IF_FULLY_ATTACHED(ifp
)) {
7948 ifnet_start_common(ifp
, TRUE
);
7953 ifnet_flowadv(uint32_t flowhash
)
7955 struct ifnet_fc_entry
*ifce
;
7958 ifce
= ifnet_fc_get(flowhash
);
7962 VERIFY(ifce
->ifce_ifp
!= NULL
);
7963 ifp
= ifce
->ifce_ifp
;
7965 /* flow hash gets recalculated per attach, so check */
7966 if (ifnet_is_attached(ifp
, 1)) {
7967 if (ifp
->if_flowhash
== flowhash
)
7968 (void) ifnet_enable_output(ifp
);
7969 ifnet_decr_iorefcnt(ifp
);
7971 ifnet_fc_entry_free(ifce
);
7975 * Function to compare ifnet_fc_entries in ifnet flow control tree
7978 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
7980 return (fc1
->ifce_flowhash
- fc2
->ifce_flowhash
);
7984 ifnet_fc_add(struct ifnet
*ifp
)
7986 struct ifnet_fc_entry keyfc
, *ifce
;
7989 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
7990 VERIFY(ifp
->if_flowhash
!= 0);
7991 flowhash
= ifp
->if_flowhash
;
7993 bzero(&keyfc
, sizeof (keyfc
));
7994 keyfc
.ifce_flowhash
= flowhash
;
7996 lck_mtx_lock_spin(&ifnet_fc_lock
);
7997 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
7998 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
7999 /* Entry is already in ifnet_fc_tree, return */
8000 lck_mtx_unlock(&ifnet_fc_lock
);
8006 * There is a different fc entry with the same flow hash
8007 * but different ifp pointer. There can be a collision
8008 * on flow hash but the probability is low. Let's just
8009 * avoid adding a second one when there is a collision.
8011 lck_mtx_unlock(&ifnet_fc_lock
);
8015 /* become regular mutex */
8016 lck_mtx_convert_spin(&ifnet_fc_lock
);
8018 ifce
= zalloc(ifnet_fc_zone
);
8020 /* memory allocation failed */
8021 lck_mtx_unlock(&ifnet_fc_lock
);
8024 bzero(ifce
, ifnet_fc_zone_size
);
8026 ifce
->ifce_flowhash
= flowhash
;
8027 ifce
->ifce_ifp
= ifp
;
8029 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
8030 lck_mtx_unlock(&ifnet_fc_lock
);
8034 static struct ifnet_fc_entry
*
8035 ifnet_fc_get(uint32_t flowhash
)
8037 struct ifnet_fc_entry keyfc
, *ifce
;
8040 bzero(&keyfc
, sizeof (keyfc
));
8041 keyfc
.ifce_flowhash
= flowhash
;
8043 lck_mtx_lock_spin(&ifnet_fc_lock
);
8044 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
8046 /* Entry is not present in ifnet_fc_tree, return */
8047 lck_mtx_unlock(&ifnet_fc_lock
);
8051 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
8053 VERIFY(ifce
->ifce_ifp
!= NULL
);
8054 ifp
= ifce
->ifce_ifp
;
8056 /* become regular mutex */
8057 lck_mtx_convert_spin(&ifnet_fc_lock
);
8059 if (!ifnet_is_attached(ifp
, 0)) {
8061 * This ifp is not attached or in the process of being
8062 * detached; just don't process it.
8064 ifnet_fc_entry_free(ifce
);
8067 lck_mtx_unlock(&ifnet_fc_lock
);
8073 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
8075 zfree(ifnet_fc_zone
, ifce
);
8079 ifnet_calc_flowhash(struct ifnet
*ifp
)
8081 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
8082 uint32_t flowhash
= 0;
8084 if (ifnet_flowhash_seed
== 0)
8085 ifnet_flowhash_seed
= RandomULong();
8087 bzero(&fh
, sizeof (fh
));
8089 (void) snprintf(fh
.ifk_name
, sizeof (fh
.ifk_name
), "%s", ifp
->if_name
);
8090 fh
.ifk_unit
= ifp
->if_unit
;
8091 fh
.ifk_flags
= ifp
->if_flags
;
8092 fh
.ifk_eflags
= ifp
->if_eflags
;
8093 fh
.ifk_capabilities
= ifp
->if_capabilities
;
8094 fh
.ifk_capenable
= ifp
->if_capenable
;
8095 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
8096 fh
.ifk_rand1
= RandomULong();
8097 fh
.ifk_rand2
= RandomULong();
8100 flowhash
= net_flowhash(&fh
, sizeof (fh
), ifnet_flowhash_seed
);
8101 if (flowhash
== 0) {
8102 /* try to get a non-zero flowhash */
8103 ifnet_flowhash_seed
= RandomULong();
8111 ifnet_set_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t len
,
8112 uint16_t flags
, uint8_t *data
)
8114 #pragma unused(flags)
8119 if_inetdata_lock_exclusive(ifp
);
8120 if (IN_IFEXTRA(ifp
) != NULL
) {
8122 /* Allow clearing the signature */
8123 IN_IFEXTRA(ifp
)->netsig_len
= 0;
8124 bzero(IN_IFEXTRA(ifp
)->netsig
,
8125 sizeof (IN_IFEXTRA(ifp
)->netsig
));
8126 if_inetdata_lock_done(ifp
);
8128 } else if (len
> sizeof (IN_IFEXTRA(ifp
)->netsig
)) {
8130 if_inetdata_lock_done(ifp
);
8133 IN_IFEXTRA(ifp
)->netsig_len
= len
;
8134 bcopy(data
, IN_IFEXTRA(ifp
)->netsig
, len
);
8138 if_inetdata_lock_done(ifp
);
8142 if_inet6data_lock_exclusive(ifp
);
8143 if (IN6_IFEXTRA(ifp
) != NULL
) {
8145 /* Allow clearing the signature */
8146 IN6_IFEXTRA(ifp
)->netsig_len
= 0;
8147 bzero(IN6_IFEXTRA(ifp
)->netsig
,
8148 sizeof (IN6_IFEXTRA(ifp
)->netsig
));
8149 if_inet6data_lock_done(ifp
);
8151 } else if (len
> sizeof (IN6_IFEXTRA(ifp
)->netsig
)) {
8153 if_inet6data_lock_done(ifp
);
8156 IN6_IFEXTRA(ifp
)->netsig_len
= len
;
8157 bcopy(data
, IN6_IFEXTRA(ifp
)->netsig
, len
);
8161 if_inet6data_lock_done(ifp
);
8173 ifnet_get_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t *len
,
8174 uint16_t *flags
, uint8_t *data
)
8178 if (ifp
== NULL
|| len
== NULL
|| data
== NULL
)
8183 if_inetdata_lock_shared(ifp
);
8184 if (IN_IFEXTRA(ifp
) != NULL
) {
8185 if (*len
== 0 || *len
< IN_IFEXTRA(ifp
)->netsig_len
) {
8187 if_inetdata_lock_done(ifp
);
8190 if ((*len
= IN_IFEXTRA(ifp
)->netsig_len
) > 0)
8191 bcopy(IN_IFEXTRA(ifp
)->netsig
, data
, *len
);
8197 if_inetdata_lock_done(ifp
);
8201 if_inet6data_lock_shared(ifp
);
8202 if (IN6_IFEXTRA(ifp
) != NULL
) {
8203 if (*len
== 0 || *len
< IN6_IFEXTRA(ifp
)->netsig_len
) {
8205 if_inet6data_lock_done(ifp
);
8208 if ((*len
= IN6_IFEXTRA(ifp
)->netsig_len
) > 0)
8209 bcopy(IN6_IFEXTRA(ifp
)->netsig
, data
, *len
);
8215 if_inet6data_lock_done(ifp
);
8223 if (error
== 0 && flags
!= NULL
)
8231 ifnet_set_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
8233 int i
, error
= 0, one_set
= 0;
8235 if_inet6data_lock_exclusive(ifp
);
8237 if (IN6_IFEXTRA(ifp
) == NULL
) {
8242 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
8243 uint32_t prefix_len
=
8244 prefixes
[i
].prefix_len
;
8245 struct in6_addr
*prefix
=
8246 &prefixes
[i
].ipv6_prefix
;
8248 if (prefix_len
== 0) {
8249 /* Allow clearing the signature */
8250 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= 0;
8251 bzero(&IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
8252 sizeof(struct in6_addr
));
8255 } else if (prefix_len
!= NAT64_PREFIX_LEN_32
&&
8256 prefix_len
!= NAT64_PREFIX_LEN_40
&&
8257 prefix_len
!= NAT64_PREFIX_LEN_48
&&
8258 prefix_len
!= NAT64_PREFIX_LEN_56
&&
8259 prefix_len
!= NAT64_PREFIX_LEN_64
&&
8260 prefix_len
!= NAT64_PREFIX_LEN_96
) {
8265 if (IN6_IS_SCOPE_EMBED(prefix
)) {
8270 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= prefix_len
;
8271 bcopy(prefix
, &IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
8272 sizeof(struct in6_addr
));
8277 if_inet6data_lock_done(ifp
);
8279 if (error
== 0 && one_set
!= 0)
8280 necp_update_all_clients();
8286 ifnet_get_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
8288 int i
, found_one
= 0, error
= 0;
8293 if_inet6data_lock_shared(ifp
);
8295 if (IN6_IFEXTRA(ifp
) == NULL
) {
8300 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
8301 if (IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
!= 0)
8305 if (found_one
== 0) {
8311 bcopy(IN6_IFEXTRA(ifp
)->nat64_prefixes
, prefixes
,
8312 sizeof(IN6_IFEXTRA(ifp
)->nat64_prefixes
));
8315 if_inet6data_lock_done(ifp
);
8322 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
8323 protocol_family_t pf
)
8328 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
8329 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
|CSUM_TSO_IPV6
)))
8334 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
8335 if (did_sw
& CSUM_DELAY_IP
)
8336 hwcksum_dbg_finalized_hdr
++;
8337 if (did_sw
& CSUM_DELAY_DATA
)
8338 hwcksum_dbg_finalized_data
++;
8343 * Checksum offload should not have been enabled when
8344 * extension headers exist; that also means that we
8345 * cannot force-finalize packets with extension headers.
8346 * Indicate to the callee should it skip such case by
8347 * setting optlen to -1.
8349 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
8350 m
->m_pkthdr
.csum_flags
);
8351 if (did_sw
& CSUM_DELAY_IPV6_DATA
)
8352 hwcksum_dbg_finalized_data
++;
8361 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
8362 protocol_family_t pf
)
8367 if (frame_header
== NULL
||
8368 frame_header
< (char *)mbuf_datastart(m
) ||
8369 frame_header
> (char *)m
->m_data
) {
8370 printf("%s: frame header pointer 0x%llx out of range "
8371 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
8372 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
8373 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
8374 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
8375 (uint64_t)VM_KERNEL_ADDRPERM(m
));
8378 hlen
= (m
->m_data
- frame_header
);
8391 * Force partial checksum offload; useful to simulate cases
8392 * where the hardware does not support partial checksum offload,
8393 * in order to validate correctness throughout the layers above.
8395 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
8396 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
8398 if (foff
> (uint32_t)m
->m_pkthdr
.len
)
8401 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
8403 /* Compute 16-bit 1's complement sum from forced offset */
8404 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
8406 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
8407 m
->m_pkthdr
.csum_rx_val
= sum
;
8408 m
->m_pkthdr
.csum_rx_start
= (foff
+ hlen
);
8410 hwcksum_dbg_partial_forced
++;
8411 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
8415 * Partial checksum offload verification (and adjustment);
8416 * useful to validate and test cases where the hardware
8417 * supports partial checksum offload.
8419 if ((m
->m_pkthdr
.csum_flags
&
8420 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
8421 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
8424 /* Start offset must begin after frame header */
8425 rxoff
= m
->m_pkthdr
.csum_rx_start
;
8427 hwcksum_dbg_bad_rxoff
++;
8429 printf("%s: partial cksum start offset %d "
8430 "is less than frame header length %d for "
8431 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
8432 (uint64_t)VM_KERNEL_ADDRPERM(m
));
8438 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
8440 * Compute the expected 16-bit 1's complement sum;
8441 * skip this if we've already computed it above
8442 * when partial checksum offload is forced.
8444 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
8446 /* Hardware or driver is buggy */
8447 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
8448 hwcksum_dbg_bad_cksum
++;
8450 printf("%s: bad partial cksum value "
8451 "0x%x (expected 0x%x) for mbuf "
8452 "0x%llx [rx_start %d]\n",
8454 m
->m_pkthdr
.csum_rx_val
, sum
,
8455 (uint64_t)VM_KERNEL_ADDRPERM(m
),
8456 m
->m_pkthdr
.csum_rx_start
);
8461 hwcksum_dbg_verified
++;
8464 * This code allows us to emulate various hardwares that
8465 * perform 16-bit 1's complement sum beginning at various
8466 * start offset values.
8468 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
8469 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
8471 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
)
8474 sum
= m_adj_sum16(m
, rxoff
, aoff
,
8475 m_pktlen(m
) - aoff
, sum
);
8477 m
->m_pkthdr
.csum_rx_val
= sum
;
8478 m
->m_pkthdr
.csum_rx_start
= (aoff
+ hlen
);
8480 hwcksum_dbg_adjusted
++;
8486 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8488 #pragma unused(arg1, arg2)
8492 i
= hwcksum_dbg_mode
;
8494 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8495 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8498 if (hwcksum_dbg
== 0)
8501 if ((i
& ~HWCKSUM_DBG_MASK
) != 0)
8504 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
8510 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8512 #pragma unused(arg1, arg2)
8516 i
= hwcksum_dbg_partial_rxoff_forced
;
8518 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8519 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8522 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
))
8525 hwcksum_dbg_partial_rxoff_forced
= i
;
8531 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8533 #pragma unused(arg1, arg2)
8537 i
= hwcksum_dbg_partial_rxoff_adj
;
8539 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8540 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8543 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
))
8546 hwcksum_dbg_partial_rxoff_adj
= i
;
8552 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8554 #pragma unused(oidp, arg1, arg2)
8557 if (req
->oldptr
== USER_ADDR_NULL
) {
8560 if (req
->newptr
!= USER_ADDR_NULL
) {
8563 err
= SYSCTL_OUT(req
, &tx_chain_len_stats
,
8564 sizeof(struct chain_len_stats
));
8570 #if DEBUG || DEVELOPMENT
8571 /* Blob for sum16 verification */
8572 static uint8_t sumdata
[] = {
8573 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8574 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8575 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8576 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8577 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8578 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8579 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8580 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8581 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8582 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8583 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8584 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8585 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8586 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8587 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8588 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8589 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8590 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8591 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8592 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8593 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8594 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8595 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8596 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8597 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8598 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8599 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8600 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8601 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8602 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8603 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8604 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8605 0xc8, 0x28, 0x02, 0x00, 0x00
8608 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
8612 uint16_t sumr
; /* reference */
8613 uint16_t sumrp
; /* reference, precomputed */
8615 { FALSE
, 0, 0, 0x0000 },
8616 { FALSE
, 1, 0, 0x001f },
8617 { FALSE
, 2, 0, 0x8b1f },
8618 { FALSE
, 3, 0, 0x8b27 },
8619 { FALSE
, 7, 0, 0x790e },
8620 { FALSE
, 11, 0, 0xcb6d },
8621 { FALSE
, 20, 0, 0x20dd },
8622 { FALSE
, 27, 0, 0xbabd },
8623 { FALSE
, 32, 0, 0xf3e8 },
8624 { FALSE
, 37, 0, 0x197d },
8625 { FALSE
, 43, 0, 0x9eae },
8626 { FALSE
, 64, 0, 0x4678 },
8627 { FALSE
, 127, 0, 0x9399 },
8628 { FALSE
, 256, 0, 0xd147 },
8629 { FALSE
, 325, 0, 0x0358 },
8631 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8634 dlil_verify_sum16(void)
8640 /* Make sure test data plus extra room for alignment fits in cluster */
8641 _CASSERT((sizeof (sumdata
) + (sizeof (uint64_t) * 2)) <= MCLBYTES
);
8643 kprintf("DLIL: running SUM16 self-tests ... ");
8645 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
8646 MH_ALIGN(m
, sizeof (uint32_t)); /* 32-bit starting alignment */
8647 buf
= mtod(m
, uint8_t *); /* base address */
8649 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
8650 uint16_t len
= sumtbl
[n
].len
;
8653 /* Verify for all possible alignments */
8654 for (i
= 0; i
< (int)sizeof (uint64_t); i
++) {
8658 /* Copy over test data to mbuf */
8659 VERIFY(len
<= sizeof (sumdata
));
8661 bcopy(sumdata
, c
, len
);
8663 /* Zero-offset test (align by data pointer) */
8664 m
->m_data
= (caddr_t
)c
;
8666 sum
= m_sum16(m
, 0, len
);
8668 if (!sumtbl
[n
].init
) {
8669 sumr
= in_cksum_mbuf_ref(m
, len
, 0, 0);
8670 sumtbl
[n
].sumr
= sumr
;
8671 sumtbl
[n
].init
= TRUE
;
8673 sumr
= sumtbl
[n
].sumr
;
8676 /* Something is horribly broken; stop now */
8677 if (sumr
!= sumtbl
[n
].sumrp
) {
8678 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
8679 "for len=%d align=%d sum=0x%04x "
8680 "[expected=0x%04x]\n", __func__
,
8683 } else if (sum
!= sumr
) {
8684 panic_plain("\n%s: broken m_sum16() for len=%d "
8685 "align=%d sum=0x%04x [expected=0x%04x]\n",
8686 __func__
, len
, i
, sum
, sumr
);
8690 /* Alignment test by offset (fixed data pointer) */
8691 m
->m_data
= (caddr_t
)buf
;
8693 sum
= m_sum16(m
, i
, len
);
8695 /* Something is horribly broken; stop now */
8697 panic_plain("\n%s: broken m_sum16() for len=%d "
8698 "offset=%d sum=0x%04x [expected=0x%04x]\n",
8699 __func__
, len
, i
, sum
, sumr
);
8703 /* Simple sum16 contiguous buffer test by aligment */
8704 sum
= b_sum16(c
, len
);
8706 /* Something is horribly broken; stop now */
8708 panic_plain("\n%s: broken b_sum16() for len=%d "
8709 "align=%d sum=0x%04x [expected=0x%04x]\n",
8710 __func__
, len
, i
, sum
, sumr
);
8718 kprintf("PASSED\n");
8720 #endif /* DEBUG || DEVELOPMENT */
8722 #define CASE_STRINGIFY(x) case x: return #x
8724 __private_extern__
const char *
8725 dlil_kev_dl_code_str(u_int32_t event_code
)
8727 switch (event_code
) {
8728 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
8729 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
8730 CASE_STRINGIFY(KEV_DL_SIFMTU
);
8731 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
8732 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
8733 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
8734 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
8735 CASE_STRINGIFY(KEV_DL_DELMULTI
);
8736 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
8737 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
8738 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
8739 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
8740 CASE_STRINGIFY(KEV_DL_LINK_ON
);
8741 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
8742 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
8743 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
8744 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
8745 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
8746 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
8747 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
8748 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
8749 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
8750 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
8751 CASE_STRINGIFY(KEV_DL_ISSUES
);
8752 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);
8760 dlil_dt_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
8762 #pragma unused(arg1)
8763 struct ifnet
*ifp
= arg0
;
8765 if (ifnet_is_attached(ifp
, 1)) {
8766 nstat_ifnet_threshold_reached(ifp
->if_index
);
8767 ifnet_decr_iorefcnt(ifp
);
8772 ifnet_notify_data_threshold(struct ifnet
*ifp
)
8774 uint64_t bytes
= (ifp
->if_ibytes
+ ifp
->if_obytes
);
8775 uint64_t oldbytes
= ifp
->if_dt_bytes
;
8777 ASSERT(ifp
->if_dt_tcall
!= NULL
);
8780 * If we went over the threshold, notify NetworkStatistics.
8781 * We rate-limit it based on the threshold interval value.
8783 if (threshold_notify
&& (bytes
- oldbytes
) > ifp
->if_data_threshold
&&
8784 OSCompareAndSwap64(oldbytes
, bytes
, &ifp
->if_dt_bytes
) &&
8785 !thread_call_isactive(ifp
->if_dt_tcall
)) {
8786 uint64_t tival
= (threshold_interval
* NSEC_PER_SEC
);
8787 uint64_t now
= mach_absolute_time(), deadline
= now
;
8791 nanoseconds_to_absolutetime(tival
, &ival
);
8792 clock_deadline_for_periodic_event(ival
, now
, &deadline
);
8793 (void) thread_call_enter_delayed(ifp
->if_dt_tcall
,
8796 (void) thread_call_enter(ifp
->if_dt_tcall
);
8801 #if (DEVELOPMENT || DEBUG)
8803 * The sysctl variable name contains the input parameters of
8804 * ifnet_get_keepalive_offload_frames()
8805 * ifp (interface index): name[0]
8806 * frames_array_count: name[1]
8807 * frame_data_offset: name[2]
8808 * The return length gives used_frames_count
8811 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8813 #pragma unused(oidp)
8814 int *name
= (int *)arg1
;
8815 u_int namelen
= arg2
;
8818 u_int32_t frames_array_count
;
8819 size_t frame_data_offset
;
8820 u_int32_t used_frames_count
;
8821 struct ifnet_keepalive_offload_frame
*frames_array
= NULL
;
8826 * Only root can get look at other people TCP frames
8828 error
= proc_suser(current_proc());
8832 * Validate the input parameters
8834 if (req
->newptr
!= USER_ADDR_NULL
) {
8842 if (req
->oldptr
== USER_ADDR_NULL
) {
8846 if (req
->oldlen
== 0) {
8851 frames_array_count
= name
[1];
8852 frame_data_offset
= name
[2];
8854 /* Make sure the passed buffer is large enough */
8855 if (frames_array_count
* sizeof(struct ifnet_keepalive_offload_frame
) >
8861 ifnet_head_lock_shared();
8862 if (!IF_INDEX_IN_RANGE(idx
)) {
8867 ifp
= ifindex2ifnet
[idx
];
8870 frames_array
= _MALLOC(frames_array_count
*
8871 sizeof(struct ifnet_keepalive_offload_frame
), M_TEMP
, M_WAITOK
);
8872 if (frames_array
== NULL
) {
8877 error
= ifnet_get_keepalive_offload_frames(ifp
, frames_array
,
8878 frames_array_count
, frame_data_offset
, &used_frames_count
);
8880 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8885 for (i
= 0; i
< used_frames_count
; i
++) {
8886 error
= SYSCTL_OUT(req
, frames_array
+ i
,
8887 sizeof(struct ifnet_keepalive_offload_frame
));
8893 if (frames_array
!= NULL
)
8894 _FREE(frames_array
, M_TEMP
);
8897 #endif /* DEVELOPMENT || DEBUG */
8900 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow
*ifs
,
8903 tcp_update_stats_per_flow(ifs
, ifp
);
8907 dlil_mit_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
8909 #pragma unused(arg1)
8910 struct ifnet
*ifp
= (struct ifnet
*)arg0
;
8911 struct dlil_threading_info
*inp
= ifp
->if_inp
;
8913 ifnet_lock_shared(ifp
);
8914 if (!IF_FULLY_ATTACHED(ifp
) || inp
== NULL
) {
8915 ifnet_lock_done(ifp
);
8919 lck_mtx_lock_spin(&inp
->input_lck
);
8920 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
8921 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
) ||
8922 !qempty(&inp
->rcvq_pkts
)) {
8924 wakeup_one((caddr_t
)&inp
->input_waiting
);
8926 lck_mtx_unlock(&inp
->input_lck
);
8927 ifnet_lock_done(ifp
);