2 * Copyright (c) 1999-2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_llreach.h>
71 #include <net/kpi_interfacefilter.h>
72 #include <net/classq/classq.h>
73 #include <net/classq/classq_sfb.h>
74 #include <net/flowhash.h>
75 #include <net/ntstat.h>
76 #include <net/if_llatbl.h>
77 #include <net/net_api_stats.h>
78 #include <net/if_ports_used.h>
81 #include <netinet/in_var.h>
82 #include <netinet/igmp_var.h>
83 #include <netinet/ip_var.h>
84 #include <netinet/tcp.h>
85 #include <netinet/tcp_var.h>
86 #include <netinet/udp.h>
87 #include <netinet/udp_var.h>
88 #include <netinet/if_ether.h>
89 #include <netinet/in_pcb.h>
90 #include <netinet/in_tclass.h>
94 #include <netinet6/in6_var.h>
95 #include <netinet6/nd6.h>
96 #include <netinet6/mld6_var.h>
97 #include <netinet6/scope6_var.h>
100 #include <libkern/OSAtomic.h>
101 #include <libkern/tree.h>
103 #include <dev/random/randomdev.h>
104 #include <machine/machine_routines.h>
106 #include <mach/thread_act.h>
107 #include <mach/sdt.h>
110 #include <sys/kauth.h>
111 #include <security/mac_framework.h>
112 #include <net/ethernet.h>
113 #include <net/firewire.h>
117 #include <net/pfvar.h>
119 #include <net/pktsched/pktsched.h>
122 #include <net/necp.h>
126 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
127 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
128 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
129 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
130 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
132 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
133 #define MAX_LINKADDR 4 /* LONGWORDS */
134 #define M_NKE M_IFADDR
137 #define DLIL_PRINTF printf
139 #define DLIL_PRINTF kprintf
142 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
143 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
145 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
146 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
154 * List of if_proto structures in if_proto_hash[] is protected by
155 * the ifnet lock. The rest of the fields are initialized at protocol
156 * attach time and never change, thus no lock required as long as
157 * a reference to it is valid, via if_proto_ref().
160 SLIST_ENTRY(if_proto
) next_hash
;
164 protocol_family_t protocol_family
;
168 proto_media_input input
;
169 proto_media_preout pre_output
;
170 proto_media_event event
;
171 proto_media_ioctl ioctl
;
172 proto_media_detached detached
;
173 proto_media_resolve_multi resolve_multi
;
174 proto_media_send_arp send_arp
;
177 proto_media_input_v2 input
;
178 proto_media_preout pre_output
;
179 proto_media_event event
;
180 proto_media_ioctl ioctl
;
181 proto_media_detached detached
;
182 proto_media_resolve_multi resolve_multi
;
183 proto_media_send_arp send_arp
;
188 SLIST_HEAD(proto_hash_entry
, if_proto
);
190 #define DLIL_SDLDATALEN \
191 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
194 struct ifnet dl_if
; /* public ifnet */
196 * DLIL private fields, protected by dl_if_lock
198 decl_lck_mtx_data(, dl_if_lock
);
199 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
200 u_int32_t dl_if_flags
; /* flags (below) */
201 u_int32_t dl_if_refcnt
; /* refcnt */
202 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
203 void *dl_if_uniqueid
; /* unique interface id */
204 size_t dl_if_uniqueid_len
; /* length of the unique id */
205 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
206 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
208 struct ifaddr ifa
; /* lladdr ifa */
209 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
210 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
212 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
213 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
214 ctrace_t dl_if_attach
; /* attach PC stacktrace */
215 ctrace_t dl_if_detach
; /* detach PC stacktrace */
218 /* Values for dl_if_flags (private to DLIL) */
219 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
220 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
221 #define DLIF_DEBUG 0x4 /* has debugging info */
223 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
226 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
228 struct dlil_ifnet_dbg
{
229 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
230 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
231 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
233 * Circular lists of ifnet_{reference,release} callers.
235 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
236 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
239 #define DLIL_TO_IFP(s) (&s->dl_if)
240 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
242 struct ifnet_filter
{
243 TAILQ_ENTRY(ifnet_filter
) filt_next
;
245 u_int32_t filt_flags
;
247 const char *filt_name
;
249 protocol_family_t filt_protocol
;
250 iff_input_func filt_input
;
251 iff_output_func filt_output
;
252 iff_event_func filt_event
;
253 iff_ioctl_func filt_ioctl
;
254 iff_detached_func filt_detached
;
257 struct proto_input_entry
;
259 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
260 static lck_grp_t
*dlil_lock_group
;
261 lck_grp_t
*ifnet_lock_group
;
262 static lck_grp_t
*ifnet_head_lock_group
;
263 static lck_grp_t
*ifnet_snd_lock_group
;
264 static lck_grp_t
*ifnet_rcv_lock_group
;
265 lck_attr_t
*ifnet_lock_attr
;
266 decl_lck_rw_data(static, ifnet_head_lock
);
267 decl_lck_mtx_data(static, dlil_ifnet_lock
);
268 u_int32_t dlil_filter_disable_tso_count
= 0;
271 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
273 static unsigned int ifnet_debug
; /* debugging (disabled) */
275 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
276 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
277 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
279 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
280 #define DLIF_ZONE_NAME "ifnet" /* zone name */
282 static unsigned int dlif_filt_size
; /* size of ifnet_filter */
283 static struct zone
*dlif_filt_zone
; /* zone for ifnet_filter */
285 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
286 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
288 static unsigned int dlif_phash_size
; /* size of ifnet proto hash table */
289 static struct zone
*dlif_phash_zone
; /* zone for ifnet proto hash table */
291 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
292 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
294 static unsigned int dlif_proto_size
; /* size of if_proto */
295 static struct zone
*dlif_proto_zone
; /* zone for if_proto */
297 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
298 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
300 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
301 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
302 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
304 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
305 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
307 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
308 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
309 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
311 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
312 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
314 static u_int32_t net_rtref
;
316 static struct dlil_main_threading_info dlil_main_input_thread_info
;
317 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
318 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
320 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
, bool update_generation
);
321 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
322 static void dlil_if_trace(struct dlil_ifnet
*, int);
323 static void if_proto_ref(struct if_proto
*);
324 static void if_proto_free(struct if_proto
*);
325 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
326 static u_int32_t
dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
327 u_int32_t list_count
);
328 static void if_flt_monitor_busy(struct ifnet
*);
329 static void if_flt_monitor_unbusy(struct ifnet
*);
330 static void if_flt_monitor_enter(struct ifnet
*);
331 static void if_flt_monitor_leave(struct ifnet
*);
332 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
333 char **, protocol_family_t
);
334 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
336 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
337 const struct sockaddr_dl
*);
338 static int ifnet_lookup(struct ifnet
*);
339 static void if_purgeaddrs(struct ifnet
*);
341 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
342 struct mbuf
*, char *);
343 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
345 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
346 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
347 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
348 const struct kev_msg
*);
349 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
350 unsigned long, void *);
351 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
352 struct sockaddr_dl
*, size_t);
353 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
354 const struct sockaddr_dl
*, const struct sockaddr
*,
355 const struct sockaddr_dl
*, const struct sockaddr
*);
357 static errno_t
ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
358 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
359 boolean_t poll
, struct thread
*tp
);
360 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
361 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
362 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
363 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
364 protocol_family_t
*);
365 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
366 const struct ifnet_demux_desc
*, u_int32_t
);
367 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
368 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
370 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
371 const struct sockaddr
*, const char *, const char *,
372 u_int32_t
*, u_int32_t
*);
374 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
375 const struct sockaddr
*, const char *, const char *);
376 #endif /* CONFIG_EMBEDDED */
377 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
378 const struct sockaddr
*, const char *, const char *,
379 u_int32_t
*, u_int32_t
*);
380 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
381 static void ifp_if_free(struct ifnet
*);
382 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
383 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
384 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
386 static void dlil_main_input_thread_func(void *, wait_result_t
);
387 static void dlil_input_thread_func(void *, wait_result_t
);
388 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
389 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*);
390 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
391 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
392 struct dlil_threading_info
*, boolean_t
);
393 static void dlil_input_stats_sync(struct ifnet
*, struct dlil_threading_info
*);
394 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
395 u_int32_t
, ifnet_model_t
, boolean_t
);
396 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
397 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
399 #if DEBUG || DEVELOPMENT
400 static void dlil_verify_sum16(void);
401 #endif /* DEBUG || DEVELOPMENT */
402 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
404 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
407 static void ifnet_detacher_thread_func(void *, wait_result_t
);
408 static int ifnet_detacher_thread_cont(int);
409 static void ifnet_detach_final(struct ifnet
*);
410 static void ifnet_detaching_enqueue(struct ifnet
*);
411 static struct ifnet
*ifnet_detaching_dequeue(void);
413 static void ifnet_start_thread_fn(void *, wait_result_t
);
414 static void ifnet_poll_thread_fn(void *, wait_result_t
);
415 static void ifnet_poll(struct ifnet
*);
416 static errno_t
ifnet_enqueue_common(struct ifnet
*, void *,
417 classq_pkt_type_t
, boolean_t
, boolean_t
*);
419 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
420 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
422 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
423 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
426 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
427 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
428 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
429 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
430 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
431 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
432 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
433 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
434 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
435 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
436 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
438 struct chain_len_stats tx_chain_len_stats
;
439 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
;
441 #if TEST_INPUT_THREAD_TERMINATION
442 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
;
443 #endif /* TEST_INPUT_THREAD_TERMINATION */
445 /* The following are protected by dlil_ifnet_lock */
446 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
447 static u_int32_t ifnet_detaching_cnt
;
448 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
450 decl_lck_mtx_data(static, ifnet_fc_lock
);
452 static uint32_t ifnet_flowhash_seed
;
454 struct ifnet_flowhash_key
{
455 char ifk_name
[IFNAMSIZ
];
459 uint32_t ifk_capabilities
;
460 uint32_t ifk_capenable
;
461 uint32_t ifk_output_sched_model
;
466 /* Flow control entry per interface */
467 struct ifnet_fc_entry
{
468 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
469 u_int32_t ifce_flowhash
;
470 struct ifnet
*ifce_ifp
;
473 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
474 static int ifce_cmp(const struct ifnet_fc_entry
*,
475 const struct ifnet_fc_entry
*);
476 static int ifnet_fc_add(struct ifnet
*);
477 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
478 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
480 /* protected by ifnet_fc_lock */
481 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
482 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
483 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
485 static unsigned int ifnet_fc_zone_size
; /* sizeof ifnet_fc_entry */
486 static struct zone
*ifnet_fc_zone
; /* ifnet_fc_entry zone */
488 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
489 #define IFNET_FC_ZONE_MAX 32
491 extern void bpfdetach(struct ifnet
*);
492 extern void proto_input_run(void);
494 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
496 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
499 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
502 #ifdef CONFIG_EMBEDDED
503 int dlil_lladdr_ckreq
= 1;
505 int dlil_lladdr_ckreq
= 0;
510 int dlil_verbose
= 1;
512 int dlil_verbose
= 0;
514 #if IFNET_INPUT_SANITY_CHK
515 /* sanity checking of input packet lists received */
516 static u_int32_t dlil_input_sanity_check
= 0;
517 #endif /* IFNET_INPUT_SANITY_CHK */
518 /* rate limit debug messages */
519 struct timespec dlil_dbgrate
= { 1, 0 };
521 SYSCTL_DECL(_net_link_generic_system
);
523 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
524 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
526 #define IF_SNDQ_MINLEN 32
527 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
528 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
529 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
530 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
532 #define IF_RCVQ_MINLEN 32
533 #define IF_RCVQ_MAXLEN 256
534 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
535 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
536 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
537 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
539 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
540 static u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
541 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
542 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
543 "ilog2 of EWMA decay rate of avg inbound packets");
545 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
546 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
547 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
548 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
549 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
550 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
551 "Q", "input poll mode freeze time");
553 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
554 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
555 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
556 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
557 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
558 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
559 "Q", "input poll sampling time");
561 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
562 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
563 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
564 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
565 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
566 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
567 "Q", "input poll interval (time)");
569 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
570 static u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
571 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
572 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
573 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
575 #define IF_RXPOLL_WLOWAT 10
576 static u_int32_t if_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
577 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
578 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_wlowat
,
579 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
580 "I", "input poll wakeup low watermark");
582 #define IF_RXPOLL_WHIWAT 100
583 static u_int32_t if_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
584 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
585 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_whiwat
,
586 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
587 "I", "input poll wakeup high watermark");
589 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
590 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
591 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
592 "max packets per poll call");
594 static u_int32_t if_rxpoll
= 1;
595 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
596 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
597 sysctl_rxpoll
, "I", "enable opportunistic input polling");
599 #if TEST_INPUT_THREAD_TERMINATION
600 static u_int32_t if_input_thread_termination_spin
= 0;
601 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, input_thread_termination_spin
,
602 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
603 &if_input_thread_termination_spin
, 0,
604 sysctl_input_thread_termination_spin
,
605 "I", "input thread termination spin limit");
606 #endif /* TEST_INPUT_THREAD_TERMINATION */
608 static u_int32_t cur_dlil_input_threads
= 0;
609 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
610 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
611 "Current number of DLIL input threads");
613 #if IFNET_INPUT_SANITY_CHK
614 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
615 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
616 "Turn on sanity checking in DLIL input");
617 #endif /* IFNET_INPUT_SANITY_CHK */
619 static u_int32_t if_flowadv
= 1;
620 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
621 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
622 "enable flow-advisory mechanism");
624 static u_int32_t if_delaybased_queue
= 1;
625 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
626 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
627 "enable delay based dynamic queue sizing");
629 static uint64_t hwcksum_in_invalidated
= 0;
630 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
631 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
632 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
634 uint32_t hwcksum_dbg
= 0;
635 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
636 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
637 "enable hardware cksum debugging");
639 u_int32_t ifnet_start_delayed
= 0;
640 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delayed
,
641 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_start_delayed
, 0,
642 "number of times start was delayed");
644 u_int32_t ifnet_delay_start_disabled
= 0;
645 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delay_disabled
,
646 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_delay_start_disabled
, 0,
647 "number of times start was delayed");
649 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
650 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
651 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
652 #define HWCKSUM_DBG_MASK \
653 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
654 HWCKSUM_DBG_FINALIZE_FORCED)
656 static uint32_t hwcksum_dbg_mode
= 0;
657 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
658 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
659 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
661 static uint64_t hwcksum_dbg_partial_forced
= 0;
662 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
663 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
664 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
666 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
667 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
668 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
669 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
671 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
672 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
673 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
674 &hwcksum_dbg_partial_rxoff_forced
, 0,
675 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
676 "forced partial cksum rx offset");
678 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
679 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
680 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
681 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
682 "adjusted partial cksum rx offset");
684 static uint64_t hwcksum_dbg_verified
= 0;
685 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
686 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
687 &hwcksum_dbg_verified
, "packets verified for having good checksum");
689 static uint64_t hwcksum_dbg_bad_cksum
= 0;
690 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
691 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
692 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
694 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
695 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
696 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
697 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
699 static uint64_t hwcksum_dbg_adjusted
= 0;
700 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
701 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
702 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
704 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
705 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
706 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
707 &hwcksum_dbg_finalized_hdr
, "finalized headers");
709 static uint64_t hwcksum_dbg_finalized_data
= 0;
710 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
711 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
712 &hwcksum_dbg_finalized_data
, "finalized payloads");
714 uint32_t hwcksum_tx
= 1;
715 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
716 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
717 "enable transmit hardware checksum offload");
719 uint32_t hwcksum_rx
= 1;
720 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
721 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
722 "enable receive hardware checksum offload");
724 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, tx_chain_len_stats
,
725 CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 9,
726 sysctl_tx_chain_len_stats
, "S", "");
728 uint32_t tx_chain_len_count
= 0;
729 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, tx_chain_len_count
,
730 CTLFLAG_RW
| CTLFLAG_LOCKED
, &tx_chain_len_count
, 0, "");
732 static uint32_t threshold_notify
= 1; /* enable/disable */
733 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_notify
,
734 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_notify
, 0, "");
736 static uint32_t threshold_interval
= 2; /* in seconds */
737 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_interval
,
738 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_interval
, 0, "");
740 #if (DEVELOPMENT || DEBUG)
741 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
;
742 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_kao_frames
,
743 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_kao_frames
, "");
744 #endif /* DEVELOPMENT || DEBUG */
746 struct net_api_stats net_api_stats
;
747 SYSCTL_STRUCT(_net
, OID_AUTO
, api_stats
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
748 &net_api_stats
, net_api_stats
, "");
751 unsigned int net_rxpoll
= 1;
752 unsigned int net_affinity
= 1;
753 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
755 extern u_int32_t inject_buckets
;
757 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
758 static lck_attr_t
*dlil_lck_attributes
= NULL
;
760 /* DLIL data threshold thread call */
761 static void dlil_dt_tcall_fn(thread_call_param_t
, thread_call_param_t
);
763 static void dlil_mit_tcall_fn(thread_call_param_t
, thread_call_param_t
);
765 uint32_t dlil_rcv_mit_pkts_min
= 5;
766 uint32_t dlil_rcv_mit_pkts_max
= 64;
767 uint32_t dlil_rcv_mit_interval
= (500 * 1000);
769 #if (DEVELOPMENT || DEBUG)
770 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_pkts_min
,
771 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_pkts_min
, 0, "");
772 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_pkts_max
,
773 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_pkts_max
, 0, "");
774 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_interval
,
775 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_interval
, 0, "");
776 #endif /* DEVELOPMENT || DEBUG */
779 #define DLIL_INPUT_CHECK(m, ifp) { \
780 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
781 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
782 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
783 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
788 #define DLIL_EWMA(old, new, decay) do { \
790 if ((_avg = (old)) > 0) \
791 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
797 #define MBPS (1ULL * 1000 * 1000)
798 #define GBPS (MBPS * 1000)
800 struct rxpoll_time_tbl
{
801 u_int64_t speed
; /* downlink speed */
802 u_int32_t plowat
; /* packets low watermark */
803 u_int32_t phiwat
; /* packets high watermark */
804 u_int32_t blowat
; /* bytes low watermark */
805 u_int32_t bhiwat
; /* bytes high watermark */
808 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
809 { 10 * MBPS
, 2, 8, (1 * 1024), (6 * 1024) },
810 { 100 * MBPS
, 10, 40, (4 * 1024), (64 * 1024) },
811 { 1 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
812 { 10 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
813 { 100 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
818 proto_hash_value(u_int32_t protocol_family
)
821 * dlil_proto_unplumb_all() depends on the mapping between
822 * the hash bucket index and the protocol family defined
823 * here; future changes must be applied there as well.
825 switch (protocol_family
) {
839 * Caller must already be holding ifnet lock.
841 static struct if_proto
*
842 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
844 struct if_proto
*proto
= NULL
;
845 u_int32_t i
= proto_hash_value(protocol_family
);
847 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
849 if (ifp
->if_proto_hash
!= NULL
)
850 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
852 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
)
853 proto
= SLIST_NEXT(proto
, next_hash
);
862 if_proto_ref(struct if_proto
*proto
)
864 atomic_add_32(&proto
->refcount
, 1);
867 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
870 if_proto_free(struct if_proto
*proto
)
873 struct ifnet
*ifp
= proto
->ifp
;
874 u_int32_t proto_family
= proto
->protocol_family
;
875 struct kev_dl_proto_data ev_pr_data
;
877 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
881 /* No more reference on this, protocol must have been detached */
882 VERIFY(proto
->detached
);
884 if (proto
->proto_kpi
== kProtoKPI_v1
) {
885 if (proto
->kpi
.v1
.detached
)
886 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
888 if (proto
->proto_kpi
== kProtoKPI_v2
) {
889 if (proto
->kpi
.v2
.detached
)
890 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
894 * Cleanup routes that may still be in the routing table for that
895 * interface/protocol pair.
897 if_rtproto_del(ifp
, proto_family
);
900 * The reserved field carries the number of protocol still attached
901 * (subject to change)
903 ifnet_lock_shared(ifp
);
904 ev_pr_data
.proto_family
= proto_family
;
905 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
906 ifnet_lock_done(ifp
);
908 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
909 (struct net_event_data
*)&ev_pr_data
,
910 sizeof (struct kev_dl_proto_data
));
912 if (ev_pr_data
.proto_remaining_count
== 0) {
914 * The protocol count has gone to zero, mark the interface down.
915 * This used to be done by configd.KernelEventMonitor, but that
916 * is inherently prone to races (rdar://problem/30810208).
918 (void) ifnet_set_flags(ifp
, 0, IFF_UP
);
919 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
920 dlil_post_sifflags_msg(ifp
);
923 zfree(dlif_proto_zone
, proto
);
926 __private_extern__
void
927 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
932 unsigned int type
= 0;
936 case IFNET_LCK_ASSERT_EXCLUSIVE
:
937 type
= LCK_RW_ASSERT_EXCLUSIVE
;
940 case IFNET_LCK_ASSERT_SHARED
:
941 type
= LCK_RW_ASSERT_SHARED
;
944 case IFNET_LCK_ASSERT_OWNED
:
945 type
= LCK_RW_ASSERT_HELD
;
948 case IFNET_LCK_ASSERT_NOTOWNED
:
949 /* nothing to do here for RW lock; bypass assert */
954 panic("bad ifnet assert type: %d", what
);
958 LCK_RW_ASSERT(&ifp
->if_lock
, type
);
961 __private_extern__
void
962 ifnet_lock_shared(struct ifnet
*ifp
)
964 lck_rw_lock_shared(&ifp
->if_lock
);
967 __private_extern__
void
968 ifnet_lock_exclusive(struct ifnet
*ifp
)
970 lck_rw_lock_exclusive(&ifp
->if_lock
);
973 __private_extern__
void
974 ifnet_lock_done(struct ifnet
*ifp
)
976 lck_rw_done(&ifp
->if_lock
);
980 __private_extern__
void
981 if_inetdata_lock_shared(struct ifnet
*ifp
)
983 lck_rw_lock_shared(&ifp
->if_inetdata_lock
);
986 __private_extern__
void
987 if_inetdata_lock_exclusive(struct ifnet
*ifp
)
989 lck_rw_lock_exclusive(&ifp
->if_inetdata_lock
);
992 __private_extern__
void
993 if_inetdata_lock_done(struct ifnet
*ifp
)
995 lck_rw_done(&ifp
->if_inetdata_lock
);
1000 __private_extern__
void
1001 if_inet6data_lock_shared(struct ifnet
*ifp
)
1003 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
1006 __private_extern__
void
1007 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
1009 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
1012 __private_extern__
void
1013 if_inet6data_lock_done(struct ifnet
*ifp
)
1015 lck_rw_done(&ifp
->if_inet6data_lock
);
1019 __private_extern__
void
1020 ifnet_head_lock_shared(void)
1022 lck_rw_lock_shared(&ifnet_head_lock
);
1025 __private_extern__
void
1026 ifnet_head_lock_exclusive(void)
1028 lck_rw_lock_exclusive(&ifnet_head_lock
);
1031 __private_extern__
void
1032 ifnet_head_done(void)
1034 lck_rw_done(&ifnet_head_lock
);
1037 __private_extern__
void
1038 ifnet_head_assert_exclusive(void)
1040 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
1044 * dlil_ifp_protolist
1045 * - get the list of protocols attached to the interface, or just the number
1046 * of attached protocols
1047 * - if the number returned is greater than 'list_count', truncation occurred
1050 * - caller must already be holding ifnet lock.
1053 dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
1054 u_int32_t list_count
)
1056 u_int32_t count
= 0;
1059 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
1061 if (ifp
->if_proto_hash
== NULL
)
1064 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1065 struct if_proto
*proto
;
1066 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1067 if (list
!= NULL
&& count
< list_count
) {
1068 list
[count
] = proto
->protocol_family
;
1077 __private_extern__ u_int32_t
1078 if_get_protolist(struct ifnet
* ifp
, u_int32_t
*protolist
, u_int32_t count
)
1080 ifnet_lock_shared(ifp
);
1081 count
= dlil_ifp_protolist(ifp
, protolist
, count
);
1082 ifnet_lock_done(ifp
);
1086 __private_extern__
void
1087 if_free_protolist(u_int32_t
*list
)
1089 _FREE(list
, M_TEMP
);
1092 __private_extern__
void
1093 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
1094 u_int32_t event_code
, struct net_event_data
*event_data
,
1095 u_int32_t event_data_len
)
1097 struct net_event_data ev_data
;
1098 struct kev_msg ev_msg
;
1100 bzero(&ev_msg
, sizeof (ev_msg
));
1101 bzero(&ev_data
, sizeof (ev_data
));
1103 * a net event always starts with a net_event_data structure
1104 * but the caller can generate a simple net event or
1105 * provide a longer event structure to post
1107 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1108 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1109 ev_msg
.kev_subclass
= event_subclass
;
1110 ev_msg
.event_code
= event_code
;
1112 if (event_data
== NULL
) {
1113 event_data
= &ev_data
;
1114 event_data_len
= sizeof (struct net_event_data
);
1117 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
1118 event_data
->if_family
= ifp
->if_family
;
1119 event_data
->if_unit
= (u_int32_t
)ifp
->if_unit
;
1121 ev_msg
.dv
[0].data_length
= event_data_len
;
1122 ev_msg
.dv
[0].data_ptr
= event_data
;
1123 ev_msg
.dv
[1].data_length
= 0;
1125 /* Don't update interface generation for quality and RRC state changess */
1126 bool update_generation
= (event_subclass
!= KEV_DL_SUBCLASS
||
1127 (event_code
!= KEV_DL_LINK_QUALITY_METRIC_CHANGED
&&
1128 event_code
!= KEV_DL_RRC_STATE_CHANGED
));
1130 dlil_event_internal(ifp
, &ev_msg
, update_generation
);
1133 __private_extern__
int
1134 dlil_alloc_local_stats(struct ifnet
*ifp
)
1137 void *buf
, *base
, **pbuf
;
1142 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1143 /* allocate tcpstat_local structure */
1144 buf
= zalloc(dlif_tcpstat_zone
);
1149 bzero(buf
, dlif_tcpstat_bufsize
);
1151 /* Get the 64-bit aligned base address for this object */
1152 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1153 sizeof (u_int64_t
));
1154 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1155 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1158 * Wind back a pointer size from the aligned base and
1159 * save the original address so we can free it later.
1161 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1163 ifp
->if_tcp_stat
= base
;
1165 /* allocate udpstat_local structure */
1166 buf
= zalloc(dlif_udpstat_zone
);
1171 bzero(buf
, dlif_udpstat_bufsize
);
1173 /* Get the 64-bit aligned base address for this object */
1174 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1175 sizeof (u_int64_t
));
1176 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1177 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1180 * Wind back a pointer size from the aligned base and
1181 * save the original address so we can free it later.
1183 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1185 ifp
->if_udp_stat
= base
;
1187 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof (u_int64_t
)) &&
1188 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof (u_int64_t
)));
1193 if (ifp
->if_ipv4_stat
== NULL
) {
1194 MALLOC(ifp
->if_ipv4_stat
, struct if_tcp_ecn_stat
*,
1195 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1196 if (ifp
->if_ipv4_stat
== NULL
) {
1202 if (ifp
->if_ipv6_stat
== NULL
) {
1203 MALLOC(ifp
->if_ipv6_stat
, struct if_tcp_ecn_stat
*,
1204 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1205 if (ifp
->if_ipv6_stat
== NULL
) {
1212 if (ifp
->if_tcp_stat
!= NULL
) {
1214 ((intptr_t)ifp
->if_tcp_stat
- sizeof (void *));
1215 zfree(dlif_tcpstat_zone
, *pbuf
);
1216 ifp
->if_tcp_stat
= NULL
;
1218 if (ifp
->if_udp_stat
!= NULL
) {
1220 ((intptr_t)ifp
->if_udp_stat
- sizeof (void *));
1221 zfree(dlif_udpstat_zone
, *pbuf
);
1222 ifp
->if_udp_stat
= NULL
;
1224 if (ifp
->if_ipv4_stat
!= NULL
) {
1225 FREE(ifp
->if_ipv4_stat
, M_TEMP
);
1226 ifp
->if_ipv4_stat
= NULL
;
1228 if (ifp
->if_ipv6_stat
!= NULL
) {
1229 FREE(ifp
->if_ipv6_stat
, M_TEMP
);
1230 ifp
->if_ipv6_stat
= NULL
;
1238 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
)
1240 thread_continue_t func
;
1244 /* NULL ifp indicates the main input thread, called at dlil_init time */
1246 func
= dlil_main_input_thread_func
;
1247 VERIFY(inp
== dlil_main_input_thread
);
1248 (void) strlcat(inp
->input_name
,
1249 "main_input", DLIL_THREADNAME_LEN
);
1250 } else if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1251 func
= dlil_rxpoll_input_thread_func
;
1252 VERIFY(inp
!= dlil_main_input_thread
);
1253 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1254 "%s_input_poll", if_name(ifp
));
1256 func
= dlil_input_thread_func
;
1257 VERIFY(inp
!= dlil_main_input_thread
);
1258 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1259 "%s_input", if_name(ifp
));
1261 VERIFY(inp
->input_thr
== THREAD_NULL
);
1263 inp
->lck_grp
= lck_grp_alloc_init(inp
->input_name
, dlil_grp_attributes
);
1264 lck_mtx_init(&inp
->input_lck
, inp
->lck_grp
, dlil_lck_attributes
);
1266 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1267 inp
->ifp
= ifp
; /* NULL for main input thread */
1269 net_timerclear(&inp
->mode_holdtime
);
1270 net_timerclear(&inp
->mode_lasttime
);
1271 net_timerclear(&inp
->sample_holdtime
);
1272 net_timerclear(&inp
->sample_lasttime
);
1273 net_timerclear(&inp
->dbg_lasttime
);
1276 * For interfaces that support opportunistic polling, set the
1277 * low and high watermarks for outstanding inbound packets/bytes.
1278 * Also define freeze times for transitioning between modes
1279 * and updating the average.
1281 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1282 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1283 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1285 limit
= (u_int32_t
)-1;
1288 _qinit(&inp
->rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1289 if (inp
== dlil_main_input_thread
) {
1290 struct dlil_main_threading_info
*inpm
=
1291 (struct dlil_main_threading_info
*)inp
;
1292 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1295 error
= kernel_thread_start(func
, inp
, &inp
->input_thr
);
1296 if (error
== KERN_SUCCESS
) {
1297 ml_thread_policy(inp
->input_thr
, MACHINE_GROUP
,
1298 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_NETISR
));
1300 * We create an affinity set so that the matching workloop
1301 * thread or the starter thread (for loopback) can be
1302 * scheduled on the same processor set as the input thread.
1305 struct thread
*tp
= inp
->input_thr
;
1308 * Randomize to reduce the probability
1309 * of affinity tag namespace collision.
1311 read_frandom(&tag
, sizeof (tag
));
1312 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1313 thread_reference(tp
);
1315 inp
->net_affinity
= TRUE
;
1318 } else if (inp
== dlil_main_input_thread
) {
1319 panic_plain("%s: couldn't create main input thread", __func__
);
1322 panic_plain("%s: couldn't create %s input thread", __func__
,
1326 OSAddAtomic(1, &cur_dlil_input_threads
);
1331 #if TEST_INPUT_THREAD_TERMINATION
1333 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1335 #pragma unused(arg1, arg2)
1339 i
= if_input_thread_termination_spin
;
1341 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
1342 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
1345 if (net_rxpoll
== 0)
1348 if_input_thread_termination_spin
= i
;
1351 #endif /* TEST_INPUT_THREAD_TERMINATION */
1354 dlil_clean_threading_info(struct dlil_threading_info
*inp
)
1356 lck_mtx_destroy(&inp
->input_lck
, inp
->lck_grp
);
1357 lck_grp_free(inp
->lck_grp
);
1359 inp
->input_waiting
= 0;
1361 bzero(inp
->input_name
, sizeof (inp
->input_name
));
1363 VERIFY(qhead(&inp
->rcvq_pkts
) == NULL
&& qempty(&inp
->rcvq_pkts
));
1364 qlimit(&inp
->rcvq_pkts
) = 0;
1365 bzero(&inp
->stats
, sizeof (inp
->stats
));
1367 VERIFY(!inp
->net_affinity
);
1368 inp
->input_thr
= THREAD_NULL
;
1369 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
1370 VERIFY(inp
->poll_thr
== THREAD_NULL
);
1371 VERIFY(inp
->tag
== 0);
1373 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1374 bzero(&inp
->tstats
, sizeof (inp
->tstats
));
1375 bzero(&inp
->pstats
, sizeof (inp
->pstats
));
1376 bzero(&inp
->sstats
, sizeof (inp
->sstats
));
1378 net_timerclear(&inp
->mode_holdtime
);
1379 net_timerclear(&inp
->mode_lasttime
);
1380 net_timerclear(&inp
->sample_holdtime
);
1381 net_timerclear(&inp
->sample_lasttime
);
1382 net_timerclear(&inp
->dbg_lasttime
);
1384 #if IFNET_INPUT_SANITY_CHK
1385 inp
->input_mbuf_cnt
= 0;
1386 #endif /* IFNET_INPUT_SANITY_CHK */
1390 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1392 struct ifnet
*ifp
= inp
->ifp
;
1394 VERIFY(current_thread() == inp
->input_thr
);
1395 VERIFY(inp
!= dlil_main_input_thread
);
1397 OSAddAtomic(-1, &cur_dlil_input_threads
);
1399 #if TEST_INPUT_THREAD_TERMINATION
1400 { /* do something useless that won't get optimized away */
1402 for (uint32_t i
= 0;
1403 i
< if_input_thread_termination_spin
;
1407 printf("the value is %d\n", v
);
1409 #endif /* TEST_INPUT_THREAD_TERMINATION */
1411 lck_mtx_lock_spin(&inp
->input_lck
);
1412 VERIFY((inp
->input_waiting
& DLIL_INPUT_TERMINATE
) != 0);
1413 inp
->input_waiting
|= DLIL_INPUT_TERMINATE_COMPLETE
;
1414 wakeup_one((caddr_t
)&inp
->input_waiting
);
1415 lck_mtx_unlock(&inp
->input_lck
);
1417 /* for the extra refcnt from kernel_thread_start() */
1418 thread_deallocate(current_thread());
1421 printf("%s: input thread terminated\n",
1425 /* this is the end */
1426 thread_terminate(current_thread());
1430 static kern_return_t
1431 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1433 thread_affinity_policy_data_t policy
;
1435 bzero(&policy
, sizeof (policy
));
1436 policy
.affinity_tag
= tag
;
1437 return (thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1438 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
));
1444 thread_t thread
= THREAD_NULL
;
1447 * The following fields must be 64-bit aligned for atomic operations.
1449 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1450 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1451 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1452 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1453 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1454 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1455 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1456 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1457 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1458 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1459 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1460 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1461 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1462 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1463 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1465 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1466 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1467 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1468 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1469 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1470 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1471 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1472 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1473 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1474 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1475 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1476 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1477 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1478 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1479 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1482 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1484 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1485 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1486 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1487 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1488 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1489 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1490 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1491 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1492 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1493 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT
== IFNET_CSUM_ZERO_INVERT
);
1494 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1495 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1496 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1497 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1500 * ... as well as the mbuf checksum flags counterparts.
1502 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1503 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1504 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1505 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1506 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1507 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1508 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1509 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1510 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1511 _CASSERT(CSUM_ZERO_INVERT
== IF_HWASSIST_CSUM_ZERO_INVERT
);
1512 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1515 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1517 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1518 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1520 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1521 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1522 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1523 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1525 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1526 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1527 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1529 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1530 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1531 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1532 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1533 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1534 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1535 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1536 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1537 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1538 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1539 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1540 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1541 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1542 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1543 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1544 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1546 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1547 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1548 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1549 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1550 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1551 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1552 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC
== IFNET_SUBFAMILY_INTCOPROC
);
1554 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1555 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1557 PE_parse_boot_argn("net_affinity", &net_affinity
,
1558 sizeof (net_affinity
));
1560 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof (net_rxpoll
));
1562 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof (net_rtref
));
1564 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof (ifnet_debug
));
1566 dlif_size
= (ifnet_debug
== 0) ? sizeof (struct dlil_ifnet
) :
1567 sizeof (struct dlil_ifnet_dbg
);
1568 /* Enforce 64-bit alignment for dlil_ifnet structure */
1569 dlif_bufsize
= dlif_size
+ sizeof (void *) + sizeof (u_int64_t
);
1570 dlif_bufsize
= P2ROUNDUP(dlif_bufsize
, sizeof (u_int64_t
));
1571 dlif_zone
= zinit(dlif_bufsize
, DLIF_ZONE_MAX
* dlif_bufsize
,
1573 if (dlif_zone
== NULL
) {
1574 panic_plain("%s: failed allocating %s", __func__
,
1578 zone_change(dlif_zone
, Z_EXPAND
, TRUE
);
1579 zone_change(dlif_zone
, Z_CALLERACCT
, FALSE
);
1581 dlif_filt_size
= sizeof (struct ifnet_filter
);
1582 dlif_filt_zone
= zinit(dlif_filt_size
,
1583 DLIF_FILT_ZONE_MAX
* dlif_filt_size
, 0, DLIF_FILT_ZONE_NAME
);
1584 if (dlif_filt_zone
== NULL
) {
1585 panic_plain("%s: failed allocating %s", __func__
,
1586 DLIF_FILT_ZONE_NAME
);
1589 zone_change(dlif_filt_zone
, Z_EXPAND
, TRUE
);
1590 zone_change(dlif_filt_zone
, Z_CALLERACCT
, FALSE
);
1592 dlif_phash_size
= sizeof (struct proto_hash_entry
) * PROTO_HASH_SLOTS
;
1593 dlif_phash_zone
= zinit(dlif_phash_size
,
1594 DLIF_PHASH_ZONE_MAX
* dlif_phash_size
, 0, DLIF_PHASH_ZONE_NAME
);
1595 if (dlif_phash_zone
== NULL
) {
1596 panic_plain("%s: failed allocating %s", __func__
,
1597 DLIF_PHASH_ZONE_NAME
);
1600 zone_change(dlif_phash_zone
, Z_EXPAND
, TRUE
);
1601 zone_change(dlif_phash_zone
, Z_CALLERACCT
, FALSE
);
1603 dlif_proto_size
= sizeof (struct if_proto
);
1604 dlif_proto_zone
= zinit(dlif_proto_size
,
1605 DLIF_PROTO_ZONE_MAX
* dlif_proto_size
, 0, DLIF_PROTO_ZONE_NAME
);
1606 if (dlif_proto_zone
== NULL
) {
1607 panic_plain("%s: failed allocating %s", __func__
,
1608 DLIF_PROTO_ZONE_NAME
);
1611 zone_change(dlif_proto_zone
, Z_EXPAND
, TRUE
);
1612 zone_change(dlif_proto_zone
, Z_CALLERACCT
, FALSE
);
1614 dlif_tcpstat_size
= sizeof (struct tcpstat_local
);
1615 /* Enforce 64-bit alignment for tcpstat_local structure */
1616 dlif_tcpstat_bufsize
=
1617 dlif_tcpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1618 dlif_tcpstat_bufsize
=
1619 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof (u_int64_t
));
1620 dlif_tcpstat_zone
= zinit(dlif_tcpstat_bufsize
,
1621 DLIF_TCPSTAT_ZONE_MAX
* dlif_tcpstat_bufsize
, 0,
1622 DLIF_TCPSTAT_ZONE_NAME
);
1623 if (dlif_tcpstat_zone
== NULL
) {
1624 panic_plain("%s: failed allocating %s", __func__
,
1625 DLIF_TCPSTAT_ZONE_NAME
);
1628 zone_change(dlif_tcpstat_zone
, Z_EXPAND
, TRUE
);
1629 zone_change(dlif_tcpstat_zone
, Z_CALLERACCT
, FALSE
);
1631 dlif_udpstat_size
= sizeof (struct udpstat_local
);
1632 /* Enforce 64-bit alignment for udpstat_local structure */
1633 dlif_udpstat_bufsize
=
1634 dlif_udpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1635 dlif_udpstat_bufsize
=
1636 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof (u_int64_t
));
1637 dlif_udpstat_zone
= zinit(dlif_udpstat_bufsize
,
1638 DLIF_TCPSTAT_ZONE_MAX
* dlif_udpstat_bufsize
, 0,
1639 DLIF_UDPSTAT_ZONE_NAME
);
1640 if (dlif_udpstat_zone
== NULL
) {
1641 panic_plain("%s: failed allocating %s", __func__
,
1642 DLIF_UDPSTAT_ZONE_NAME
);
1645 zone_change(dlif_udpstat_zone
, Z_EXPAND
, TRUE
);
1646 zone_change(dlif_udpstat_zone
, Z_CALLERACCT
, FALSE
);
1648 ifnet_llreach_init();
1649 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt
);
1651 TAILQ_INIT(&dlil_ifnet_head
);
1652 TAILQ_INIT(&ifnet_head
);
1653 TAILQ_INIT(&ifnet_detaching_head
);
1654 TAILQ_INIT(&ifnet_ordered_head
);
1656 /* Setup the lock groups we will use */
1657 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1659 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1660 dlil_grp_attributes
);
1661 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1662 dlil_grp_attributes
);
1663 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1664 dlil_grp_attributes
);
1665 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1666 dlil_grp_attributes
);
1667 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1668 dlil_grp_attributes
);
1670 /* Setup the lock attributes we will use */
1671 dlil_lck_attributes
= lck_attr_alloc_init();
1673 ifnet_lock_attr
= lck_attr_alloc_init();
1675 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1676 dlil_lck_attributes
);
1677 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1679 /* Setup interface flow control related items */
1680 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1682 ifnet_fc_zone_size
= sizeof (struct ifnet_fc_entry
);
1683 ifnet_fc_zone
= zinit(ifnet_fc_zone_size
,
1684 IFNET_FC_ZONE_MAX
* ifnet_fc_zone_size
, 0, IFNET_FC_ZONE_NAME
);
1685 if (ifnet_fc_zone
== NULL
) {
1686 panic_plain("%s: failed allocating %s", __func__
,
1687 IFNET_FC_ZONE_NAME
);
1690 zone_change(ifnet_fc_zone
, Z_EXPAND
, TRUE
);
1691 zone_change(ifnet_fc_zone
, Z_CALLERACCT
, FALSE
);
1693 /* Initialize interface address subsystem */
1697 /* Initialize the packet filter */
1701 /* Initialize queue algorithms */
1704 /* Initialize packet schedulers */
1707 /* Initialize flow advisory subsystem */
1710 /* Initialize the pktap virtual interface */
1713 /* Initialize the service class to dscp map */
1716 /* Initialize the interface port list */
1717 if_ports_used_init();
1719 #if DEBUG || DEVELOPMENT
1720 /* Run self-tests */
1721 dlil_verify_sum16();
1722 #endif /* DEBUG || DEVELOPMENT */
1724 /* Initialize link layer table */
1725 lltable_glbl_init();
1728 * Create and start up the main DLIL input thread and the interface
1729 * detacher threads once everything is initialized.
1731 dlil_create_input_thread(NULL
, dlil_main_input_thread
);
1733 if (kernel_thread_start(ifnet_detacher_thread_func
,
1734 NULL
, &thread
) != KERN_SUCCESS
) {
1735 panic_plain("%s: couldn't create detacher thread", __func__
);
1738 thread_deallocate(thread
);
1743 if_flt_monitor_busy(struct ifnet
*ifp
)
1745 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1748 VERIFY(ifp
->if_flt_busy
!= 0);
1752 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1754 if_flt_monitor_leave(ifp
);
1758 if_flt_monitor_enter(struct ifnet
*ifp
)
1760 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1762 while (ifp
->if_flt_busy
) {
1763 ++ifp
->if_flt_waiters
;
1764 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1765 (PZERO
- 1), "if_flt_monitor", NULL
);
1767 if_flt_monitor_busy(ifp
);
1771 if_flt_monitor_leave(struct ifnet
*ifp
)
1773 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1775 VERIFY(ifp
->if_flt_busy
!= 0);
1778 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1779 ifp
->if_flt_waiters
= 0;
1780 wakeup(&ifp
->if_flt_head
);
1784 __private_extern__
int
1785 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1786 interface_filter_t
*filter_ref
, u_int32_t flags
)
1789 struct ifnet_filter
*filter
= NULL
;
1791 ifnet_head_lock_shared();
1792 /* Check that the interface is in the global list */
1793 if (!ifnet_lookup(ifp
)) {
1798 filter
= zalloc(dlif_filt_zone
);
1799 if (filter
== NULL
) {
1803 bzero(filter
, dlif_filt_size
);
1805 /* refcnt held above during lookup */
1806 filter
->filt_flags
= flags
;
1807 filter
->filt_ifp
= ifp
;
1808 filter
->filt_cookie
= if_filter
->iff_cookie
;
1809 filter
->filt_name
= if_filter
->iff_name
;
1810 filter
->filt_protocol
= if_filter
->iff_protocol
;
1812 * Do not install filter callbacks for internal coproc interface
1814 if (!IFNET_IS_INTCOPROC(ifp
)) {
1815 filter
->filt_input
= if_filter
->iff_input
;
1816 filter
->filt_output
= if_filter
->iff_output
;
1817 filter
->filt_event
= if_filter
->iff_event
;
1818 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1820 filter
->filt_detached
= if_filter
->iff_detached
;
1822 lck_mtx_lock(&ifp
->if_flt_lock
);
1823 if_flt_monitor_enter(ifp
);
1825 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1826 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1828 if_flt_monitor_leave(ifp
);
1829 lck_mtx_unlock(&ifp
->if_flt_lock
);
1831 *filter_ref
= filter
;
1834 * Bump filter count and route_generation ID to let TCP
1835 * know it shouldn't do TSO on this connection
1837 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1838 OSAddAtomic(1, &dlil_filter_disable_tso_count
);
1839 routegenid_update();
1841 OSIncrementAtomic64(&net_api_stats
.nas_iflt_attach_count
);
1842 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_total
);
1843 if ((filter
->filt_flags
& DLIL_IFF_INTERNAL
)) {
1844 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_os_total
);
1847 printf("%s: %s filter attached\n", if_name(ifp
),
1848 if_filter
->iff_name
);
1852 if (retval
!= 0 && ifp
!= NULL
) {
1853 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1854 if_name(ifp
), if_filter
->iff_name
, retval
);
1856 if (retval
!= 0 && filter
!= NULL
)
1857 zfree(dlif_filt_zone
, filter
);
1863 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1867 if (detached
== 0) {
1870 ifnet_head_lock_shared();
1871 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1872 interface_filter_t entry
= NULL
;
1874 lck_mtx_lock(&ifp
->if_flt_lock
);
1875 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1876 if (entry
!= filter
|| entry
->filt_skip
)
1879 * We've found a match; since it's possible
1880 * that the thread gets blocked in the monitor,
1881 * we do the lock dance. Interface should
1882 * not be detached since we still have a use
1883 * count held during filter attach.
1885 entry
->filt_skip
= 1; /* skip input/output */
1886 lck_mtx_unlock(&ifp
->if_flt_lock
);
1889 lck_mtx_lock(&ifp
->if_flt_lock
);
1890 if_flt_monitor_enter(ifp
);
1891 LCK_MTX_ASSERT(&ifp
->if_flt_lock
,
1892 LCK_MTX_ASSERT_OWNED
);
1894 /* Remove the filter from the list */
1895 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
1898 if_flt_monitor_leave(ifp
);
1899 lck_mtx_unlock(&ifp
->if_flt_lock
);
1901 printf("%s: %s filter detached\n",
1902 if_name(ifp
), filter
->filt_name
);
1906 lck_mtx_unlock(&ifp
->if_flt_lock
);
1910 /* filter parameter is not a valid filter ref */
1916 printf("%s filter detached\n", filter
->filt_name
);
1920 /* Call the detached function if there is one */
1921 if (filter
->filt_detached
)
1922 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
1925 * Decrease filter count and route_generation ID to let TCP
1926 * know it should reevalute doing TSO or not
1928 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1929 OSAddAtomic(-1, &dlil_filter_disable_tso_count
);
1930 routegenid_update();
1933 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_iflt_attach_count
) > 0);
1935 /* Free the filter */
1936 zfree(dlif_filt_zone
, filter
);
1939 if (retval
!= 0 && filter
!= NULL
) {
1940 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1941 filter
->filt_name
, retval
);
1947 __private_extern__
void
1948 dlil_detach_filter(interface_filter_t filter
)
1952 dlil_detach_filter_internal(filter
, 0);
1956 * Main input thread:
1958 * a) handles all inbound packets for lo0
1959 * b) handles all inbound packets for interfaces with no dedicated
1960 * input thread (e.g. anything but Ethernet/PDP or those that support
1961 * opportunistic polling.)
1962 * c) protocol registrations
1963 * d) packet injections
1965 __attribute__((noreturn
))
1967 dlil_main_input_thread_func(void *v
, wait_result_t w
)
1970 struct dlil_main_threading_info
*inpm
= v
;
1971 struct dlil_threading_info
*inp
= v
;
1973 VERIFY(inp
== dlil_main_input_thread
);
1974 VERIFY(inp
->ifp
== NULL
);
1975 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
1978 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
1979 u_int32_t m_cnt
, m_cnt_loop
;
1980 boolean_t proto_req
;
1982 lck_mtx_lock_spin(&inp
->input_lck
);
1984 /* Wait until there is work to be done */
1985 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1986 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1987 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1988 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1991 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1992 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1994 /* Main input thread cannot be terminated */
1995 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_TERMINATE
));
1997 proto_req
= (inp
->input_waiting
&
1998 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
2000 /* Packets for non-dedicated interfaces other than lo0 */
2001 m_cnt
= qlen(&inp
->rcvq_pkts
);
2002 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2004 /* Packets exclusive to lo0 */
2005 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
2006 m_loop
= _getq_all(&inpm
->lo_rcvq_pkts
, NULL
, NULL
, NULL
);
2010 lck_mtx_unlock(&inp
->input_lck
);
2013 * NOTE warning %%% attention !!!!
2014 * We should think about putting some thread starvation
2015 * safeguards if we deal with long chains of packets.
2018 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
2019 m_cnt_loop
, inp
->mode
);
2022 dlil_input_packet_list_extended(NULL
, m
,
2030 VERIFY(0); /* we should never get here */
2034 * Input thread for interfaces with legacy input model.
2037 dlil_input_thread_func(void *v
, wait_result_t w
)
2040 char thread_name
[MAXTHREADNAMESIZE
];
2041 struct dlil_threading_info
*inp
= v
;
2042 struct ifnet
*ifp
= inp
->ifp
;
2044 /* Construct the name for this thread, and then apply it. */
2045 bzero(thread_name
, sizeof(thread_name
));
2046 snprintf(thread_name
, sizeof(thread_name
), "dlil_input_%s", ifp
->if_xname
);
2047 thread_set_thread_name(inp
->input_thr
, thread_name
);
2049 VERIFY(inp
!= dlil_main_input_thread
);
2050 VERIFY(ifp
!= NULL
);
2051 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
);
2052 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
2055 struct mbuf
*m
= NULL
;
2058 lck_mtx_lock_spin(&inp
->input_lck
);
2060 /* Wait until there is work to be done */
2061 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2062 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2063 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2064 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2067 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2068 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2071 * Protocol registration and injection must always use
2072 * the main input thread; in theory the latter can utilize
2073 * the corresponding input thread where the packet arrived
2074 * on, but that requires our knowing the interface in advance
2075 * (and the benefits might not worth the trouble.)
2077 VERIFY(!(inp
->input_waiting
&
2078 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
2080 /* Packets for this interface */
2081 m_cnt
= qlen(&inp
->rcvq_pkts
);
2082 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2084 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2085 lck_mtx_unlock(&inp
->input_lck
);
2087 /* Free up pending packets */
2091 dlil_terminate_input_thread(inp
);
2098 dlil_input_stats_sync(ifp
, inp
);
2100 lck_mtx_unlock(&inp
->input_lck
);
2103 * NOTE warning %%% attention !!!!
2104 * We should think about putting some thread starvation
2105 * safeguards if we deal with long chains of packets.
2108 dlil_input_packet_list_extended(NULL
, m
,
2113 VERIFY(0); /* we should never get here */
2117 * Input thread for interfaces with opportunistic polling input model.
2120 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
2123 struct dlil_threading_info
*inp
= v
;
2124 struct ifnet
*ifp
= inp
->ifp
;
2127 VERIFY(inp
!= dlil_main_input_thread
);
2128 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
));
2131 struct mbuf
*m
= NULL
;
2132 u_int32_t m_cnt
, m_size
, poll_req
= 0;
2134 struct timespec now
, delta
;
2137 lck_mtx_lock_spin(&inp
->input_lck
);
2139 if ((ival
= inp
->rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
)
2140 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2142 /* Link parameters changed? */
2143 if (ifp
->if_poll_update
!= 0) {
2144 ifp
->if_poll_update
= 0;
2145 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
2148 /* Current operating mode */
2151 /* Wait until there is work to be done */
2152 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2153 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2154 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2155 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2158 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2159 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2162 * Protocol registration and injection must always use
2163 * the main input thread; in theory the latter can utilize
2164 * the corresponding input thread where the packet arrived
2165 * on, but that requires our knowing the interface in advance
2166 * (and the benefits might not worth the trouble.)
2168 VERIFY(!(inp
->input_waiting
&
2169 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
2171 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2172 /* Free up pending packets */
2173 lck_mtx_convert_spin(&inp
->input_lck
);
2174 _flushq(&inp
->rcvq_pkts
);
2175 if (inp
->input_mit_tcall
!= NULL
) {
2176 if (thread_call_isactive(inp
->input_mit_tcall
))
2177 thread_call_cancel(inp
->input_mit_tcall
);
2179 lck_mtx_unlock(&inp
->input_lck
);
2181 dlil_terminate_input_thread(inp
);
2186 /* Total count of all packets */
2187 m_cnt
= qlen(&inp
->rcvq_pkts
);
2189 /* Total bytes of all packets */
2190 m_size
= qsize(&inp
->rcvq_pkts
);
2192 /* Packets for this interface */
2193 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2194 VERIFY(m
!= NULL
|| m_cnt
== 0);
2197 if (!net_timerisset(&inp
->sample_lasttime
))
2198 *(&inp
->sample_lasttime
) = *(&now
);
2200 net_timersub(&now
, &inp
->sample_lasttime
, &delta
);
2201 if (if_rxpoll
&& net_timerisset(&inp
->sample_holdtime
)) {
2202 u_int32_t ptot
, btot
;
2204 /* Accumulate statistics for current sampling */
2205 PKTCNTR_ADD(&inp
->sstats
, m_cnt
, m_size
);
2207 if (net_timercmp(&delta
, &inp
->sample_holdtime
, <))
2210 *(&inp
->sample_lasttime
) = *(&now
);
2212 /* Calculate min/max of inbound bytes */
2213 btot
= (u_int32_t
)inp
->sstats
.bytes
;
2214 if (inp
->rxpoll_bmin
== 0 || inp
->rxpoll_bmin
> btot
)
2215 inp
->rxpoll_bmin
= btot
;
2216 if (btot
> inp
->rxpoll_bmax
)
2217 inp
->rxpoll_bmax
= btot
;
2219 /* Calculate EWMA of inbound bytes */
2220 DLIL_EWMA(inp
->rxpoll_bavg
, btot
, if_rxpoll_decay
);
2222 /* Calculate min/max of inbound packets */
2223 ptot
= (u_int32_t
)inp
->sstats
.packets
;
2224 if (inp
->rxpoll_pmin
== 0 || inp
->rxpoll_pmin
> ptot
)
2225 inp
->rxpoll_pmin
= ptot
;
2226 if (ptot
> inp
->rxpoll_pmax
)
2227 inp
->rxpoll_pmax
= ptot
;
2229 /* Calculate EWMA of inbound packets */
2230 DLIL_EWMA(inp
->rxpoll_pavg
, ptot
, if_rxpoll_decay
);
2232 /* Reset sampling statistics */
2233 PKTCNTR_CLEAR(&inp
->sstats
);
2235 /* Calculate EWMA of wakeup requests */
2236 DLIL_EWMA(inp
->rxpoll_wavg
, inp
->wtot
, if_rxpoll_decay
);
2240 if (!net_timerisset(&inp
->dbg_lasttime
))
2241 *(&inp
->dbg_lasttime
) = *(&now
);
2242 net_timersub(&now
, &inp
->dbg_lasttime
, &delta
);
2243 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
2244 *(&inp
->dbg_lasttime
) = *(&now
);
2245 printf("%s: [%s] pkts avg %d max %d "
2246 "limits [%d/%d], wreq avg %d "
2247 "limits [%d/%d], bytes avg %d "
2248 "limits [%d/%d]\n", if_name(ifp
),
2250 IFNET_MODEL_INPUT_POLL_ON
) ?
2251 "ON" : "OFF", inp
->rxpoll_pavg
,
2260 inp
->rxpoll_bhiwat
);
2264 /* Perform mode transition, if necessary */
2265 if (!net_timerisset(&inp
->mode_lasttime
))
2266 *(&inp
->mode_lasttime
) = *(&now
);
2268 net_timersub(&now
, &inp
->mode_lasttime
, &delta
);
2269 if (net_timercmp(&delta
, &inp
->mode_holdtime
, <))
2272 if (inp
->rxpoll_pavg
<= inp
->rxpoll_plowat
&&
2273 inp
->rxpoll_bavg
<= inp
->rxpoll_blowat
&&
2274 inp
->mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2275 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2276 } else if (inp
->rxpoll_pavg
>= inp
->rxpoll_phiwat
&&
2277 (inp
->rxpoll_bavg
>= inp
->rxpoll_bhiwat
||
2278 inp
->rxpoll_wavg
>= inp
->rxpoll_whiwat
) &&
2279 inp
->mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2280 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2283 if (mode
!= inp
->mode
) {
2285 *(&inp
->mode_lasttime
) = *(&now
);
2290 dlil_input_stats_sync(ifp
, inp
);
2292 lck_mtx_unlock(&inp
->input_lck
);
2295 * If there's a mode change and interface is still attached,
2296 * perform a downcall to the driver for the new mode. Also
2297 * hold an IO refcnt on the interface to prevent it from
2298 * being detached (will be release below.)
2300 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2301 struct ifnet_model_params p
= { mode
, { 0 } };
2305 printf("%s: polling is now %s, "
2306 "pkts avg %d max %d limits [%d/%d], "
2307 "wreq avg %d limits [%d/%d], "
2308 "bytes avg %d limits [%d/%d]\n",
2310 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2311 "ON" : "OFF", inp
->rxpoll_pavg
,
2312 inp
->rxpoll_pmax
, inp
->rxpoll_plowat
,
2313 inp
->rxpoll_phiwat
, inp
->rxpoll_wavg
,
2314 inp
->rxpoll_wlowat
, inp
->rxpoll_whiwat
,
2315 inp
->rxpoll_bavg
, inp
->rxpoll_blowat
,
2316 inp
->rxpoll_bhiwat
);
2319 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2320 IFNET_CTL_SET_INPUT_MODEL
, sizeof (p
), &p
))) != 0) {
2321 printf("%s: error setting polling mode "
2322 "to %s (%d)\n", if_name(ifp
),
2323 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2328 case IFNET_MODEL_INPUT_POLL_OFF
:
2329 ifnet_set_poll_cycle(ifp
, NULL
);
2330 inp
->rxpoll_offreq
++;
2332 inp
->rxpoll_offerr
++;
2335 case IFNET_MODEL_INPUT_POLL_ON
:
2336 net_nsectimer(&ival
, &ts
);
2337 ifnet_set_poll_cycle(ifp
, &ts
);
2339 inp
->rxpoll_onreq
++;
2341 inp
->rxpoll_onerr
++;
2349 /* Release the IO refcnt */
2350 ifnet_decr_iorefcnt(ifp
);
2354 * NOTE warning %%% attention !!!!
2355 * We should think about putting some thread starvation
2356 * safeguards if we deal with long chains of packets.
2359 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2363 VERIFY(0); /* we should never get here */
2367 * Must be called on an attached ifnet (caller is expected to check.)
2368 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2371 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2374 struct dlil_threading_info
*inp
;
2375 u_int64_t sample_holdtime
, inbw
;
2377 VERIFY(ifp
!= NULL
);
2378 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2382 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2383 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0))
2385 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2386 p
->packets_lowat
>= p
->packets_hiwat
)
2388 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2389 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0))
2391 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2392 p
->bytes_lowat
>= p
->bytes_hiwat
)
2394 if (p
->interval_time
!= 0 &&
2395 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
)
2396 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2400 lck_mtx_lock(&inp
->input_lck
);
2402 LCK_MTX_ASSERT(&inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
2405 * Normally, we'd reset the parameters to the auto-tuned values
2406 * if the the input thread detects a change in link rate. If the
2407 * driver provides its own parameters right after a link rate
2408 * changes, but before the input thread gets to run, we want to
2409 * make sure to keep the driver's values. Clearing if_poll_update
2410 * will achieve that.
2412 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0)
2413 ifp
->if_poll_update
= 0;
2415 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2416 sample_holdtime
= 0; /* polling is disabled */
2417 inp
->rxpoll_wlowat
= inp
->rxpoll_plowat
=
2418 inp
->rxpoll_blowat
= 0;
2419 inp
->rxpoll_whiwat
= inp
->rxpoll_phiwat
=
2420 inp
->rxpoll_bhiwat
= (u_int32_t
)-1;
2421 inp
->rxpoll_plim
= 0;
2422 inp
->rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2424 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2428 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2429 if (inbw
< rxpoll_tbl
[i
].speed
)
2433 /* auto-tune if caller didn't specify a value */
2434 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2435 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2436 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2437 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2438 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2439 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2440 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2441 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2442 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2443 if_rxpoll_max
: p
->packets_limit
);
2444 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2445 if_rxpoll_interval_time
: p
->interval_time
);
2447 VERIFY(plowat
!= 0 && phiwat
!= 0);
2448 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2449 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2451 sample_holdtime
= if_rxpoll_sample_holdtime
;
2452 inp
->rxpoll_wlowat
= if_rxpoll_wlowat
;
2453 inp
->rxpoll_whiwat
= if_rxpoll_whiwat
;
2454 inp
->rxpoll_plowat
= plowat
;
2455 inp
->rxpoll_phiwat
= phiwat
;
2456 inp
->rxpoll_blowat
= blowat
;
2457 inp
->rxpoll_bhiwat
= bhiwat
;
2458 inp
->rxpoll_plim
= plim
;
2459 inp
->rxpoll_ival
= ival
;
2462 net_nsectimer(&if_rxpoll_mode_holdtime
, &inp
->mode_holdtime
);
2463 net_nsectimer(&sample_holdtime
, &inp
->sample_holdtime
);
2466 printf("%s: speed %llu bps, sample per %llu nsec, "
2467 "poll interval %llu nsec, pkts per poll %u, "
2468 "pkt limits [%u/%u], wreq limits [%u/%u], "
2469 "bytes limits [%u/%u]\n", if_name(ifp
),
2470 inbw
, sample_holdtime
, inp
->rxpoll_ival
, inp
->rxpoll_plim
,
2471 inp
->rxpoll_plowat
, inp
->rxpoll_phiwat
, inp
->rxpoll_wlowat
,
2472 inp
->rxpoll_whiwat
, inp
->rxpoll_blowat
, inp
->rxpoll_bhiwat
);
2476 lck_mtx_unlock(&inp
->input_lck
);
2482 * Must be called on an attached ifnet (caller is expected to check.)
2485 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2487 struct dlil_threading_info
*inp
;
2489 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2490 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2493 bzero(p
, sizeof (*p
));
2495 lck_mtx_lock(&inp
->input_lck
);
2496 p
->packets_limit
= inp
->rxpoll_plim
;
2497 p
->packets_lowat
= inp
->rxpoll_plowat
;
2498 p
->packets_hiwat
= inp
->rxpoll_phiwat
;
2499 p
->bytes_lowat
= inp
->rxpoll_blowat
;
2500 p
->bytes_hiwat
= inp
->rxpoll_bhiwat
;
2501 p
->interval_time
= inp
->rxpoll_ival
;
2502 lck_mtx_unlock(&inp
->input_lck
);
2508 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2509 const struct ifnet_stat_increment_param
*s
)
2511 return (ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
));
2515 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2516 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2518 return (ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
));
2522 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2523 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2525 dlil_input_func input_func
;
2526 struct ifnet_stat_increment_param _s
;
2527 u_int32_t m_cnt
= 0, m_size
= 0;
2531 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2533 mbuf_freem_list(m_head
);
2537 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2538 VERIFY(m_tail
== NULL
|| ext
);
2539 VERIFY(s
!= NULL
|| !ext
);
2542 * Drop the packet(s) if the parameters are invalid, or if the
2543 * interface is no longer attached; else hold an IO refcnt to
2544 * prevent it from being detached (will be released below.)
2546 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_is_attached(ifp
, 1))) {
2548 mbuf_freem_list(m_head
);
2552 input_func
= ifp
->if_input_dlil
;
2553 VERIFY(input_func
!= NULL
);
2555 if (m_tail
== NULL
) {
2557 while (m_head
!= NULL
) {
2558 #if IFNET_INPUT_SANITY_CHK
2559 if (dlil_input_sanity_check
!= 0)
2560 DLIL_INPUT_CHECK(last
, ifp
);
2561 #endif /* IFNET_INPUT_SANITY_CHK */
2563 m_size
+= m_length(last
);
2564 if (mbuf_nextpkt(last
) == NULL
)
2566 last
= mbuf_nextpkt(last
);
2570 #if IFNET_INPUT_SANITY_CHK
2571 if (dlil_input_sanity_check
!= 0) {
2574 DLIL_INPUT_CHECK(last
, ifp
);
2576 m_size
+= m_length(last
);
2577 if (mbuf_nextpkt(last
) == NULL
)
2579 last
= mbuf_nextpkt(last
);
2582 m_cnt
= s
->packets_in
;
2583 m_size
= s
->bytes_in
;
2587 m_cnt
= s
->packets_in
;
2588 m_size
= s
->bytes_in
;
2590 #endif /* IFNET_INPUT_SANITY_CHK */
2593 if (last
!= m_tail
) {
2594 panic_plain("%s: invalid input packet chain for %s, "
2595 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2600 * Assert packet count only for the extended variant, for backwards
2601 * compatibility, since this came directly from the device driver.
2602 * Relax this assertion for input bytes, as the driver may have
2603 * included the link-layer headers in the computation; hence
2604 * m_size is just an approximation.
2606 if (ext
&& s
->packets_in
!= m_cnt
) {
2607 panic_plain("%s: input packet count mismatch for %s, "
2608 "%d instead of %d\n", __func__
, if_name(ifp
),
2609 s
->packets_in
, m_cnt
);
2613 bzero(&_s
, sizeof (_s
));
2618 _s
.packets_in
= m_cnt
;
2619 _s
.bytes_in
= m_size
;
2621 err
= (*input_func
)(ifp
, m_head
, m_tail
, s
, poll
, current_thread());
2623 if (ifp
!= lo_ifp
) {
2624 /* Release the IO refcnt */
2625 ifnet_decr_iorefcnt(ifp
);
2633 dlil_output_handler(struct ifnet
*ifp
, struct mbuf
*m
)
2635 return (ifp
->if_output(ifp
, m
));
2639 dlil_input_handler(struct ifnet
*ifp
, struct mbuf
*m_head
,
2640 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
2641 boolean_t poll
, struct thread
*tp
)
2643 struct dlil_threading_info
*inp
;
2644 u_int32_t m_cnt
= s
->packets_in
;
2645 u_int32_t m_size
= s
->bytes_in
;
2647 if ((inp
= ifp
->if_inp
) == NULL
)
2648 inp
= dlil_main_input_thread
;
2651 * If there is a matching DLIL input thread associated with an
2652 * affinity set, associate this thread with the same set. We
2653 * will only do this once.
2655 lck_mtx_lock_spin(&inp
->input_lck
);
2656 if (inp
!= dlil_main_input_thread
&& inp
->net_affinity
&& tp
!= NULL
&&
2657 ((!poll
&& inp
->wloop_thr
== THREAD_NULL
) ||
2658 (poll
&& inp
->poll_thr
== THREAD_NULL
))) {
2659 u_int32_t tag
= inp
->tag
;
2662 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2665 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2666 inp
->wloop_thr
= tp
;
2668 lck_mtx_unlock(&inp
->input_lck
);
2670 /* Associate the current thread with the new affinity tag */
2671 (void) dlil_affinity_set(tp
, tag
);
2674 * Take a reference on the current thread; during detach,
2675 * we will need to refer to it in order to tear down its
2678 thread_reference(tp
);
2679 lck_mtx_lock_spin(&inp
->input_lck
);
2682 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
2685 * Because of loopbacked multicast we cannot stuff the ifp in
2686 * the rcvif of the packet header: loopback (lo0) packets use a
2687 * dedicated list so that we can later associate them with lo_ifp
2688 * on their way up the stack. Packets for other interfaces without
2689 * dedicated input threads go to the regular list.
2691 if (m_head
!= NULL
) {
2692 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
2693 struct dlil_main_threading_info
*inpm
=
2694 (struct dlil_main_threading_info
*)inp
;
2695 _addq_multi(&inpm
->lo_rcvq_pkts
, m_head
, m_tail
,
2698 _addq_multi(&inp
->rcvq_pkts
, m_head
, m_tail
,
2703 #if IFNET_INPUT_SANITY_CHK
2704 if (dlil_input_sanity_check
!= 0) {
2708 for (m0
= m_head
, count
= 0; m0
; m0
= mbuf_nextpkt(m0
))
2711 if (count
!= m_cnt
) {
2712 panic_plain("%s: invalid packet count %d "
2713 "(expected %d)\n", if_name(ifp
),
2718 inp
->input_mbuf_cnt
+= m_cnt
;
2720 #endif /* IFNET_INPUT_SANITY_CHK */
2722 dlil_input_stats_add(s
, inp
, poll
);
2724 * If we're using the main input thread, synchronize the
2725 * stats now since we have the interface context. All
2726 * other cases involving dedicated input threads will
2727 * have their stats synchronized there.
2729 if (inp
== dlil_main_input_thread
)
2730 dlil_input_stats_sync(ifp
, inp
);
2732 if (inp
->input_mit_tcall
&&
2733 qlen(&inp
->rcvq_pkts
) >= dlil_rcv_mit_pkts_min
&&
2734 qlen(&inp
->rcvq_pkts
) < dlil_rcv_mit_pkts_max
&&
2735 (ifp
->if_family
== IFNET_FAMILY_ETHERNET
||
2736 ifp
->if_type
== IFT_CELLULAR
)
2738 if (!thread_call_isactive(inp
->input_mit_tcall
)) {
2740 clock_interval_to_deadline(dlil_rcv_mit_interval
,
2742 (void) thread_call_enter_delayed(
2743 inp
->input_mit_tcall
, deadline
);
2746 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
2747 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
2749 wakeup_one((caddr_t
)&inp
->input_waiting
);
2752 lck_mtx_unlock(&inp
->input_lck
);
2759 ifnet_start_common(struct ifnet
*ifp
, boolean_t resetfc
)
2761 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2764 * If the starter thread is inactive, signal it to do work,
2765 * unless the interface is being flow controlled from below,
2766 * e.g. a virtual interface being flow controlled by a real
2767 * network interface beneath it, or it's been disabled via
2768 * a call to ifnet_disable_output().
2770 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2772 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
2773 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
2774 lck_mtx_unlock(&ifp
->if_start_lock
);
2777 ifp
->if_start_req
++;
2778 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
&&
2779 (resetfc
|| !(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) ||
2780 IFCQ_LEN(&ifp
->if_snd
) >= ifp
->if_start_delay_qlen
||
2781 ifp
->if_start_delayed
== 0)) {
2782 (void) thread_wakeup_thread((caddr_t
)&ifp
->if_start_thread
,
2783 ifp
->if_start_thread
);
2785 lck_mtx_unlock(&ifp
->if_start_lock
);
2789 ifnet_start(struct ifnet
*ifp
)
2791 ifnet_start_common(ifp
, FALSE
);
2795 ifnet_start_thread_fn(void *v
, wait_result_t w
)
2798 struct ifnet
*ifp
= v
;
2799 char ifname
[IFNAMSIZ
+ 1];
2800 char thread_name
[MAXTHREADNAMESIZE
];
2801 struct timespec
*ts
= NULL
;
2802 struct ifclassq
*ifq
= &ifp
->if_snd
;
2803 struct timespec delay_start_ts
;
2805 /* Construct the name for this thread, and then apply it. */
2806 bzero(thread_name
, sizeof(thread_name
));
2807 (void) snprintf(thread_name
, sizeof (thread_name
),
2808 "ifnet_start_%s", ifp
->if_xname
);
2809 thread_set_thread_name(ifp
->if_start_thread
, thread_name
);
2812 * Treat the dedicated starter thread for lo0 as equivalent to
2813 * the driver workloop thread; if net_affinity is enabled for
2814 * the main input thread, associate this starter thread to it
2815 * by binding them with the same affinity tag. This is done
2816 * only once (as we only have one lo_ifp which never goes away.)
2818 if (ifp
== lo_ifp
) {
2819 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
2820 struct thread
*tp
= current_thread();
2822 lck_mtx_lock(&inp
->input_lck
);
2823 if (inp
->net_affinity
) {
2824 u_int32_t tag
= inp
->tag
;
2826 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2827 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2828 inp
->wloop_thr
= tp
;
2829 lck_mtx_unlock(&inp
->input_lck
);
2831 /* Associate this thread with the affinity tag */
2832 (void) dlil_affinity_set(tp
, tag
);
2834 lck_mtx_unlock(&inp
->input_lck
);
2838 (void) snprintf(ifname
, sizeof (ifname
), "%s_starter", if_name(ifp
));
2840 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2843 if (ifp
->if_start_thread
!= NULL
) {
2844 (void) msleep(&ifp
->if_start_thread
,
2845 &ifp
->if_start_lock
,
2846 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2848 /* interface is detached? */
2849 if (ifp
->if_start_thread
== THREAD_NULL
) {
2850 ifnet_set_start_cycle(ifp
, NULL
);
2851 lck_mtx_unlock(&ifp
->if_start_lock
);
2855 printf("%s: starter thread terminated\n",
2859 /* for the extra refcnt from kernel_thread_start() */
2860 thread_deallocate(current_thread());
2861 /* this is the end */
2862 thread_terminate(current_thread());
2867 ifp
->if_start_active
= 1;
2870 u_int32_t req
= ifp
->if_start_req
;
2871 if (!IFCQ_IS_EMPTY(ifq
) &&
2872 (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
2873 ifp
->if_start_delayed
== 0 &&
2874 IFCQ_LEN(ifq
) < ifp
->if_start_delay_qlen
&&
2875 (ifp
->if_eflags
& IFEF_DELAY_START
)) {
2876 ifp
->if_start_delayed
= 1;
2877 ifnet_start_delayed
++;
2880 ifp
->if_start_delayed
= 0;
2882 lck_mtx_unlock(&ifp
->if_start_lock
);
2885 * If no longer attached, don't call start because ifp
2886 * is being destroyed; else hold an IO refcnt to
2887 * prevent the interface from being detached (will be
2890 if (!ifnet_is_attached(ifp
, 1)) {
2891 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2895 /* invoke the driver's start routine */
2896 ((*ifp
->if_start
)(ifp
));
2899 * Release the io ref count taken by ifnet_is_attached.
2901 ifnet_decr_iorefcnt(ifp
);
2903 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2906 * If there's no pending request or if the
2907 * interface has been disabled, we're done.
2909 if (req
== ifp
->if_start_req
||
2910 (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
)) {
2915 ifp
->if_start_req
= 0;
2916 ifp
->if_start_active
= 0;
2919 * Wakeup N ns from now if rate-controlled by TBR, and if
2920 * there are still packets in the send queue which haven't
2921 * been dequeued so far; else sleep indefinitely (ts = NULL)
2922 * until ifnet_start() is called again.
2924 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
2925 &ifp
->if_start_cycle
: NULL
);
2927 if (ts
== NULL
&& ifp
->if_start_delayed
== 1) {
2928 delay_start_ts
.tv_sec
= 0;
2929 delay_start_ts
.tv_nsec
= ifp
->if_start_delay_timeout
;
2930 ts
= &delay_start_ts
;
2933 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
2941 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
2944 bzero(&ifp
->if_start_cycle
, sizeof (ifp
->if_start_cycle
));
2946 *(&ifp
->if_start_cycle
) = *ts
;
2948 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
2949 printf("%s: restart interval set to %lu nsec\n",
2950 if_name(ifp
), ts
->tv_nsec
);
2954 ifnet_poll(struct ifnet
*ifp
)
2957 * If the poller thread is inactive, signal it to do work.
2959 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2961 if (!ifp
->if_poll_active
&& ifp
->if_poll_thread
!= THREAD_NULL
) {
2962 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
2964 lck_mtx_unlock(&ifp
->if_poll_lock
);
2968 ifnet_poll_thread_fn(void *v
, wait_result_t w
)
2971 struct dlil_threading_info
*inp
;
2972 struct ifnet
*ifp
= v
;
2973 char ifname
[IFNAMSIZ
+ 1];
2974 struct timespec
*ts
= NULL
;
2975 struct ifnet_stat_increment_param s
;
2977 snprintf(ifname
, sizeof (ifname
), "%s_poller",
2979 bzero(&s
, sizeof (s
));
2981 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2984 VERIFY(inp
!= NULL
);
2987 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
2988 (void) msleep(&ifp
->if_poll_thread
, &ifp
->if_poll_lock
,
2989 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2992 /* interface is detached (maybe while asleep)? */
2993 if (ifp
->if_poll_thread
== THREAD_NULL
) {
2994 ifnet_set_poll_cycle(ifp
, NULL
);
2995 lck_mtx_unlock(&ifp
->if_poll_lock
);
2998 printf("%s: poller thread terminated\n",
3002 /* for the extra refcnt from kernel_thread_start() */
3003 thread_deallocate(current_thread());
3004 /* this is the end */
3005 thread_terminate(current_thread());
3010 ifp
->if_poll_active
= 1;
3012 struct mbuf
*m_head
, *m_tail
;
3013 u_int32_t m_lim
, m_cnt
, m_totlen
;
3014 u_int16_t req
= ifp
->if_poll_req
;
3016 lck_mtx_unlock(&ifp
->if_poll_lock
);
3019 * If no longer attached, there's nothing to do;
3020 * else hold an IO refcnt to prevent the interface
3021 * from being detached (will be released below.)
3023 if (!ifnet_is_attached(ifp
, 1)) {
3024 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3028 m_lim
= (inp
->rxpoll_plim
!= 0) ? inp
->rxpoll_plim
:
3029 MAX((qlimit(&inp
->rcvq_pkts
)),
3030 (inp
->rxpoll_phiwat
<< 2));
3032 if (dlil_verbose
> 1) {
3033 printf("%s: polling up to %d pkts, "
3034 "pkts avg %d max %d, wreq avg %d, "
3036 if_name(ifp
), m_lim
,
3037 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
3038 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
3041 /* invoke the driver's input poll routine */
3042 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
3043 &m_cnt
, &m_totlen
));
3045 if (m_head
!= NULL
) {
3046 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
3048 if (dlil_verbose
> 1) {
3049 printf("%s: polled %d pkts, "
3050 "pkts avg %d max %d, wreq avg %d, "
3052 if_name(ifp
), m_cnt
,
3053 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
3054 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
3057 /* stats are required for extended variant */
3058 s
.packets_in
= m_cnt
;
3059 s
.bytes_in
= m_totlen
;
3061 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
3064 if (dlil_verbose
> 1) {
3065 printf("%s: no packets, "
3066 "pkts avg %d max %d, wreq avg %d, "
3068 if_name(ifp
), inp
->rxpoll_pavg
,
3069 inp
->rxpoll_pmax
, inp
->rxpoll_wavg
,
3073 (void) ifnet_input_common(ifp
, NULL
, NULL
,
3077 /* Release the io ref count */
3078 ifnet_decr_iorefcnt(ifp
);
3080 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3082 /* if there's no pending request, we're done */
3083 if (req
== ifp
->if_poll_req
) {
3087 ifp
->if_poll_req
= 0;
3088 ifp
->if_poll_active
= 0;
3091 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3092 * until ifnet_poll() is called again.
3094 ts
= &ifp
->if_poll_cycle
;
3095 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
3103 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3106 bzero(&ifp
->if_poll_cycle
, sizeof (ifp
->if_poll_cycle
));
3108 *(&ifp
->if_poll_cycle
) = *ts
;
3110 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
3111 printf("%s: poll interval set to %lu nsec\n",
3112 if_name(ifp
), ts
->tv_nsec
);
3116 ifnet_purge(struct ifnet
*ifp
)
3118 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
))
3123 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
3125 IFCQ_LOCK_ASSERT_HELD(ifq
);
3127 if (!(IFCQ_IS_READY(ifq
)))
3130 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
3131 struct tb_profile tb
= { ifq
->ifcq_tbr
.tbr_rate_raw
,
3132 ifq
->ifcq_tbr
.tbr_percent
, 0 };
3133 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
3136 ifclassq_update(ifq
, ev
);
3140 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
3143 case CLASSQ_EV_LINK_BANDWIDTH
:
3144 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
))
3145 ifp
->if_poll_update
++;
3154 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
3156 struct ifclassq
*ifq
;
3160 if (ifp
== NULL
|| model
>= IFNET_SCHED_MODEL_MAX
)
3162 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3167 omodel
= ifp
->if_output_sched_model
;
3168 ifp
->if_output_sched_model
= model
;
3169 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0)
3170 ifp
->if_output_sched_model
= omodel
;
3177 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3181 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3184 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
3190 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3192 if (ifp
== NULL
|| maxqlen
== NULL
)
3194 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3197 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
3203 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
3207 if (ifp
== NULL
|| pkts
== NULL
)
3209 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3212 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
3219 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3220 u_int32_t
*pkts
, u_int32_t
*bytes
)
3224 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
3225 (pkts
== NULL
&& bytes
== NULL
))
3227 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3230 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
3236 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3238 struct dlil_threading_info
*inp
;
3242 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3246 maxqlen
= if_rcvq_maxlen
;
3247 else if (maxqlen
< IF_RCVQ_MINLEN
)
3248 maxqlen
= IF_RCVQ_MINLEN
;
3251 lck_mtx_lock(&inp
->input_lck
);
3252 qlimit(&inp
->rcvq_pkts
) = maxqlen
;
3253 lck_mtx_unlock(&inp
->input_lck
);
3259 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3261 struct dlil_threading_info
*inp
;
3263 if (ifp
== NULL
|| maxqlen
== NULL
)
3265 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3269 lck_mtx_lock(&inp
->input_lck
);
3270 *maxqlen
= qlimit(&inp
->rcvq_pkts
);
3271 lck_mtx_unlock(&inp
->input_lck
);
3276 ifnet_enqueue_multi_setup(struct ifnet
*ifp
, uint16_t delay_qlen
,
3277 uint16_t delay_timeout
)
3279 if (delay_qlen
> 0 && delay_timeout
> 0) {
3280 ifp
->if_eflags
|= IFEF_ENQUEUE_MULTI
;
3281 ifp
->if_start_delay_qlen
= min(100, delay_qlen
);
3282 ifp
->if_start_delay_timeout
= min(20000, delay_timeout
);
3283 /* convert timeout to nanoseconds */
3284 ifp
->if_start_delay_timeout
*= 1000;
3285 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3286 ifp
->if_xname
, (uint32_t)delay_qlen
,
3287 (uint32_t)delay_timeout
);
3289 ifp
->if_eflags
&= ~IFEF_ENQUEUE_MULTI
;
3293 static inline errno_t
3294 ifnet_enqueue_common(struct ifnet
*ifp
, void *p
, classq_pkt_type_t ptype
,
3295 boolean_t flush
, boolean_t
*pdrop
)
3297 volatile uint64_t *fg_ts
= NULL
;
3298 volatile uint64_t *rt_ts
= NULL
;
3300 struct timespec now
;
3301 u_int64_t now_nsec
= 0;
3304 ASSERT(ifp
->if_eflags
& IFEF_TXSTART
);
3307 * If packet already carries a timestamp, either from dlil_output()
3308 * or from flowswitch, use it here. Otherwise, record timestamp.
3309 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3310 * the timestamp value is used internally there.
3314 ASSERT(m
->m_flags
& M_PKTHDR
);
3315 ASSERT(m
->m_nextpkt
== NULL
);
3317 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_TS_VALID
) ||
3318 m
->m_pkthdr
.pkt_timestamp
== 0) {
3320 net_timernsec(&now
, &now_nsec
);
3321 m
->m_pkthdr
.pkt_timestamp
= now_nsec
;
3323 m
->m_pkthdr
.pkt_flags
&= ~PKTF_TS_VALID
;
3325 * If the packet service class is not background,
3326 * update the timestamp to indicate recent activity
3327 * on a foreground socket.
3329 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
3330 m
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
) {
3331 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_SO_BACKGROUND
)) {
3332 ifp
->if_fg_sendts
= _net_uptime
;
3334 *fg_ts
= _net_uptime
;
3336 if (m
->m_pkthdr
.pkt_flags
& PKTF_SO_REALTIME
) {
3337 ifp
->if_rt_sendts
= _net_uptime
;
3339 *rt_ts
= _net_uptime
;
3350 if (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) {
3351 if (now_nsec
== 0) {
3353 net_timernsec(&now
, &now_nsec
);
3356 * If the driver chose to delay start callback for
3357 * coalescing multiple packets, Then use the following
3358 * heuristics to make sure that start callback will
3359 * be delayed only when bulk data transfer is detected.
3360 * 1. number of packets enqueued in (delay_win * 2) is
3361 * greater than or equal to the delay qlen.
3362 * 2. If delay_start is enabled it will stay enabled for
3363 * another 10 idle windows. This is to take into account
3364 * variable RTT and burst traffic.
3365 * 3. If the time elapsed since last enqueue is more
3366 * than 200ms we disable delaying start callback. This is
3367 * is to take idle time into account.
3369 u_int64_t dwin
= (ifp
->if_start_delay_timeout
<< 1);
3370 if (ifp
->if_start_delay_swin
> 0) {
3371 if ((ifp
->if_start_delay_swin
+ dwin
) > now_nsec
) {
3372 ifp
->if_start_delay_cnt
++;
3373 } else if ((now_nsec
- ifp
->if_start_delay_swin
)
3374 >= (200 * 1000 * 1000)) {
3375 ifp
->if_start_delay_swin
= now_nsec
;
3376 ifp
->if_start_delay_cnt
= 1;
3377 ifp
->if_start_delay_idle
= 0;
3378 if (ifp
->if_eflags
& IFEF_DELAY_START
) {
3380 ~(IFEF_DELAY_START
);
3381 ifnet_delay_start_disabled
++;
3384 if (ifp
->if_start_delay_cnt
>=
3385 ifp
->if_start_delay_qlen
) {
3386 ifp
->if_eflags
|= IFEF_DELAY_START
;
3387 ifp
->if_start_delay_idle
= 0;
3389 if (ifp
->if_start_delay_idle
>= 10) {
3390 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3391 ifnet_delay_start_disabled
++;
3393 ifp
->if_start_delay_idle
++;
3396 ifp
->if_start_delay_swin
= now_nsec
;
3397 ifp
->if_start_delay_cnt
= 1;
3400 ifp
->if_start_delay_swin
= now_nsec
;
3401 ifp
->if_start_delay_cnt
= 1;
3402 ifp
->if_start_delay_idle
= 0;
3403 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3406 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3411 /* enqueue the packet (caller consumes object) */
3412 error
= ifclassq_enqueue(&ifp
->if_snd
, m
, QP_MBUF
, pdrop
);
3422 * Tell the driver to start dequeueing; do this even when the queue
3423 * for the packet is suspended (EQSUSPENDED), as the driver could still
3424 * be dequeueing from other unsuspended queues.
3426 if (!(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3427 ((error
== 0 && flush
) || error
== EQFULL
|| error
== EQSUSPENDED
))
3434 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
3437 return (ifnet_enqueue_mbuf(ifp
, m
, TRUE
, &pdrop
));
3441 ifnet_enqueue_mbuf(struct ifnet
*ifp
, struct mbuf
*m
, boolean_t flush
,
3444 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
3445 m
->m_nextpkt
!= NULL
) {
3451 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3452 !IF_FULLY_ATTACHED(ifp
)) {
3453 /* flag tested without lock for performance */
3457 } else if (!(ifp
->if_flags
& IFF_UP
)) {
3463 return (ifnet_enqueue_common(ifp
, m
, QP_MBUF
, flush
, pdrop
));
3468 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
3471 classq_pkt_type_t ptype
;
3472 if (ifp
== NULL
|| mp
== NULL
)
3474 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3475 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3477 if (!ifnet_is_attached(ifp
, 1))
3480 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
3481 (void **)mp
, NULL
, NULL
, NULL
, &ptype
);
3482 VERIFY((*mp
== NULL
) || (ptype
== QP_MBUF
));
3483 ifnet_decr_iorefcnt(ifp
);
3489 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3493 classq_pkt_type_t ptype
;
3494 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
))
3496 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3497 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3499 if (!ifnet_is_attached(ifp
, 1))
3502 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1,
3503 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)mp
, NULL
, NULL
,
3505 VERIFY((*mp
== NULL
) || (ptype
== QP_MBUF
));
3506 ifnet_decr_iorefcnt(ifp
);
3511 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t pkt_limit
,
3512 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3515 classq_pkt_type_t ptype
;
3516 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1)
3518 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3519 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3521 if (!ifnet_is_attached(ifp
, 1))
3524 rc
= ifclassq_dequeue(&ifp
->if_snd
, pkt_limit
,
3525 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)head
, (void **)tail
, cnt
,
3527 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3528 ifnet_decr_iorefcnt(ifp
);
3533 ifnet_dequeue_multi_bytes(struct ifnet
*ifp
, u_int32_t byte_limit
,
3534 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3537 classq_pkt_type_t ptype
;
3538 if (ifp
== NULL
|| head
== NULL
|| byte_limit
< 1)
3540 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3541 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3543 if (!ifnet_is_attached(ifp
, 1))
3546 rc
= ifclassq_dequeue(&ifp
->if_snd
, CLASSQ_DEQUEUE_MAX_PKT_LIMIT
,
3547 byte_limit
, (void **)head
, (void **)tail
, cnt
, len
, &ptype
);
3548 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3549 ifnet_decr_iorefcnt(ifp
);
3554 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3555 u_int32_t pkt_limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
3559 classq_pkt_type_t ptype
;
3560 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1 ||
3563 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3564 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3566 if (!ifnet_is_attached(ifp
, 1))
3569 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, pkt_limit
,
3570 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)head
,
3571 (void **)tail
, cnt
, len
, &ptype
);
3572 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3573 ifnet_decr_iorefcnt(ifp
);
3577 #if !CONFIG_EMBEDDED
3579 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
3580 const struct sockaddr
*dest
, const char *dest_linkaddr
,
3581 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
3588 return (ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
));
3590 #endif /* !CONFIG_EMBEDDED */
3593 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
3594 char **frame_header_p
, protocol_family_t protocol_family
)
3596 struct ifnet_filter
*filter
;
3599 * Pass the inbound packet to the interface filters
3601 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3602 /* prevent filter list from changing in case we drop the lock */
3603 if_flt_monitor_busy(ifp
);
3604 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3607 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
3608 (filter
->filt_protocol
== 0 ||
3609 filter
->filt_protocol
== protocol_family
)) {
3610 lck_mtx_unlock(&ifp
->if_flt_lock
);
3612 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
3613 ifp
, protocol_family
, m_p
, frame_header_p
);
3615 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3617 /* we're done with the filter list */
3618 if_flt_monitor_unbusy(ifp
);
3619 lck_mtx_unlock(&ifp
->if_flt_lock
);
3624 /* we're done with the filter list */
3625 if_flt_monitor_unbusy(ifp
);
3626 lck_mtx_unlock(&ifp
->if_flt_lock
);
3629 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3630 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3633 (*m_p
)->m_flags
&= ~M_PROTO1
;
3639 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
3640 protocol_family_t protocol_family
)
3642 struct ifnet_filter
*filter
;
3645 * Pass the outbound packet to the interface filters
3647 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3648 /* prevent filter list from changing in case we drop the lock */
3649 if_flt_monitor_busy(ifp
);
3650 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3653 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
3654 (filter
->filt_protocol
== 0 ||
3655 filter
->filt_protocol
== protocol_family
)) {
3656 lck_mtx_unlock(&ifp
->if_flt_lock
);
3658 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
3659 protocol_family
, m_p
);
3661 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3663 /* we're done with the filter list */
3664 if_flt_monitor_unbusy(ifp
);
3665 lck_mtx_unlock(&ifp
->if_flt_lock
);
3670 /* we're done with the filter list */
3671 if_flt_monitor_unbusy(ifp
);
3672 lck_mtx_unlock(&ifp
->if_flt_lock
);
3678 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
3682 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
3683 /* Version 1 protocols get one packet at a time */
3685 char * frame_header
;
3688 next_packet
= m
->m_nextpkt
;
3689 m
->m_nextpkt
= NULL
;
3690 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3691 m
->m_pkthdr
.pkt_hdr
= NULL
;
3692 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
3693 ifproto
->protocol_family
, m
, frame_header
);
3694 if (error
!= 0 && error
!= EJUSTRETURN
)
3698 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
3699 /* Version 2 protocols support packet lists */
3700 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
3701 ifproto
->protocol_family
, m
);
3702 if (error
!= 0 && error
!= EJUSTRETURN
)
3708 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
3709 struct dlil_threading_info
*inp
, boolean_t poll
)
3711 struct ifnet_stat_increment_param
*d
= &inp
->stats
;
3713 if (s
->packets_in
!= 0)
3714 d
->packets_in
+= s
->packets_in
;
3715 if (s
->bytes_in
!= 0)
3716 d
->bytes_in
+= s
->bytes_in
;
3717 if (s
->errors_in
!= 0)
3718 d
->errors_in
+= s
->errors_in
;
3720 if (s
->packets_out
!= 0)
3721 d
->packets_out
+= s
->packets_out
;
3722 if (s
->bytes_out
!= 0)
3723 d
->bytes_out
+= s
->bytes_out
;
3724 if (s
->errors_out
!= 0)
3725 d
->errors_out
+= s
->errors_out
;
3727 if (s
->collisions
!= 0)
3728 d
->collisions
+= s
->collisions
;
3729 if (s
->dropped
!= 0)
3730 d
->dropped
+= s
->dropped
;
3733 PKTCNTR_ADD(&inp
->tstats
, s
->packets_in
, s
->bytes_in
);
3737 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
3739 struct ifnet_stat_increment_param
*s
= &inp
->stats
;
3742 * Use of atomic operations is unavoidable here because
3743 * these stats may also be incremented elsewhere via KPIs.
3745 if (s
->packets_in
!= 0) {
3746 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
3749 if (s
->bytes_in
!= 0) {
3750 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
3753 if (s
->errors_in
!= 0) {
3754 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
3758 if (s
->packets_out
!= 0) {
3759 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
3762 if (s
->bytes_out
!= 0) {
3763 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
3766 if (s
->errors_out
!= 0) {
3767 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
3771 if (s
->collisions
!= 0) {
3772 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
3775 if (s
->dropped
!= 0) {
3776 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
3780 if (ifp
->if_data_threshold
!= 0) {
3781 lck_mtx_convert_spin(&inp
->input_lck
);
3782 ifnet_notify_data_threshold(ifp
);
3786 * No need for atomic operations as they are modified here
3787 * only from within the DLIL input thread context.
3789 if (inp
->tstats
.packets
!= 0) {
3790 inp
->pstats
.ifi_poll_packets
+= inp
->tstats
.packets
;
3791 inp
->tstats
.packets
= 0;
3793 if (inp
->tstats
.bytes
!= 0) {
3794 inp
->pstats
.ifi_poll_bytes
+= inp
->tstats
.bytes
;
3795 inp
->tstats
.bytes
= 0;
3799 __private_extern__
void
3800 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
3802 return (dlil_input_packet_list_common(ifp
, m
, 0,
3803 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
));
3806 __private_extern__
void
3807 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
3808 u_int32_t cnt
, ifnet_model_t mode
)
3810 return (dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
));
3814 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
3815 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
3818 protocol_family_t protocol_family
;
3820 ifnet_t ifp
= ifp_param
;
3821 char * frame_header
;
3822 struct if_proto
* last_ifproto
= NULL
;
3823 mbuf_t pkt_first
= NULL
;
3824 mbuf_t
* pkt_next
= NULL
;
3825 u_int32_t poll_thresh
= 0, poll_ival
= 0;
3827 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
3829 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
3830 (poll_ival
= if_rxpoll_interval_pkts
) > 0)
3834 struct if_proto
*ifproto
= NULL
;
3836 uint32_t pktf_mask
; /* pkt flags to preserve */
3838 if (ifp_param
== NULL
)
3839 ifp
= m
->m_pkthdr
.rcvif
;
3841 if ((ifp
->if_eflags
& IFEF_RXPOLL
) && poll_thresh
!= 0 &&
3842 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0)
3845 /* Check if this mbuf looks valid */
3846 MBUF_INPUT_CHECK(m
, ifp
);
3848 next_packet
= m
->m_nextpkt
;
3849 m
->m_nextpkt
= NULL
;
3850 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3851 m
->m_pkthdr
.pkt_hdr
= NULL
;
3854 * Get an IO reference count if the interface is not
3855 * loopback (lo0) and it is attached; lo0 never goes
3856 * away, so optimize for that.
3858 if (ifp
!= lo_ifp
) {
3859 if (!ifnet_is_attached(ifp
, 1)) {
3865 * Preserve the time stamp if it was set.
3867 pktf_mask
= PKTF_TS_VALID
;
3870 * If this arrived on lo0, preserve interface addr
3871 * info to allow for connectivity between loopback
3872 * and local interface addresses.
3874 pktf_mask
= (PKTF_LOOP
|PKTF_IFAINFO
);
3877 /* make sure packet comes in clean */
3878 m_classifier_init(m
, pktf_mask
);
3880 ifp_inc_traffic_class_in(ifp
, m
);
3882 /* find which protocol family this packet is for */
3883 ifnet_lock_shared(ifp
);
3884 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
3886 ifnet_lock_done(ifp
);
3888 if (error
== EJUSTRETURN
)
3890 protocol_family
= 0;
3893 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
3894 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
))
3895 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
3899 * For partial checksum offload, we expect the driver to
3900 * set the start offset indicating the start of the span
3901 * that is covered by the hardware-computed checksum;
3902 * adjust this start offset accordingly because the data
3903 * pointer has been advanced beyond the link-layer header.
3905 * Don't adjust if the interface is a bridge member, as
3906 * the adjustment will occur from the context of the
3907 * bridge interface during input.
3909 if (ifp
->if_bridge
== NULL
&& (m
->m_pkthdr
.csum_flags
&
3910 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
3911 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
3914 if (frame_header
== NULL
||
3915 frame_header
< (char *)mbuf_datastart(m
) ||
3916 frame_header
> (char *)m
->m_data
||
3917 (adj
= (m
->m_data
- frame_header
)) >
3918 m
->m_pkthdr
.csum_rx_start
) {
3919 m
->m_pkthdr
.csum_data
= 0;
3920 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
3921 hwcksum_in_invalidated
++;
3923 m
->m_pkthdr
.csum_rx_start
-= adj
;
3927 pktap_input(ifp
, protocol_family
, m
, frame_header
);
3929 if (m
->m_flags
& (M_BCAST
|M_MCAST
))
3930 atomic_add_64(&ifp
->if_imcasts
, 1);
3932 /* run interface filters, exclude VLAN packets PR-3586856 */
3933 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
3934 error
= dlil_interface_filters_input(ifp
, &m
,
3935 &frame_header
, protocol_family
);
3937 if (error
!= EJUSTRETURN
)
3942 if (error
!= 0 || ((m
->m_flags
& M_PROMISC
) != 0)) {
3947 /* Lookup the protocol attachment to this interface */
3948 if (protocol_family
== 0) {
3950 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
3951 (last_ifproto
->protocol_family
== protocol_family
)) {
3952 VERIFY(ifproto
== NULL
);
3953 ifproto
= last_ifproto
;
3954 if_proto_ref(last_ifproto
);
3956 VERIFY(ifproto
== NULL
);
3957 ifnet_lock_shared(ifp
);
3958 /* callee holds a proto refcnt upon success */
3959 ifproto
= find_attached_proto(ifp
, protocol_family
);
3960 ifnet_lock_done(ifp
);
3962 if (ifproto
== NULL
) {
3963 /* no protocol for this packet, discard */
3967 if (ifproto
!= last_ifproto
) {
3968 if (last_ifproto
!= NULL
) {
3969 /* pass up the list for the previous protocol */
3970 dlil_ifproto_input(last_ifproto
, pkt_first
);
3972 if_proto_free(last_ifproto
);
3974 last_ifproto
= ifproto
;
3975 if_proto_ref(ifproto
);
3977 /* extend the list */
3978 m
->m_pkthdr
.pkt_hdr
= frame_header
;
3979 if (pkt_first
== NULL
) {
3984 pkt_next
= &m
->m_nextpkt
;
3987 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
3988 /* pass up the last list of packets */
3989 dlil_ifproto_input(last_ifproto
, pkt_first
);
3990 if_proto_free(last_ifproto
);
3991 last_ifproto
= NULL
;
3993 if (ifproto
!= NULL
) {
3994 if_proto_free(ifproto
);
4000 /* update the driver's multicast filter, if needed */
4001 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
4002 ifp
->if_updatemcasts
= 0;
4004 ifnet_decr_iorefcnt(ifp
);
4007 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4011 if_mcasts_update(struct ifnet
*ifp
)
4015 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
4016 if (err
== EAFNOSUPPORT
)
4018 printf("%s: %s %d suspended link-layer multicast membership(s) "
4019 "(err=%d)\n", if_name(ifp
),
4020 (err
== 0 ? "successfully restored" : "failed to restore"),
4021 ifp
->if_updatemcasts
, err
);
4023 /* just return success */
4027 /* If ifp is set, we will increment the generation for the interface */
4029 dlil_post_complete_msg(struct ifnet
*ifp
, struct kev_msg
*event
)
4032 ifnet_increment_generation(ifp
);
4036 necp_update_all_clients();
4039 return (kev_post_msg(event
));
4042 __private_extern__
void
4043 dlil_post_sifflags_msg(struct ifnet
* ifp
)
4045 struct kev_msg ev_msg
;
4046 struct net_event_data ev_data
;
4048 bzero(&ev_data
, sizeof (ev_data
));
4049 bzero(&ev_msg
, sizeof (ev_msg
));
4050 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
4051 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
4052 ev_msg
.kev_subclass
= KEV_DL_SUBCLASS
;
4053 ev_msg
.event_code
= KEV_DL_SIFFLAGS
;
4054 strlcpy(&ev_data
.if_name
[0], ifp
->if_name
, IFNAMSIZ
);
4055 ev_data
.if_family
= ifp
->if_family
;
4056 ev_data
.if_unit
= (u_int32_t
) ifp
->if_unit
;
4057 ev_msg
.dv
[0].data_length
= sizeof(struct net_event_data
);
4058 ev_msg
.dv
[0].data_ptr
= &ev_data
;
4059 ev_msg
.dv
[1].data_length
= 0;
4060 dlil_post_complete_msg(ifp
, &ev_msg
);
4063 #define TMP_IF_PROTO_ARR_SIZE 10
4065 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
, bool update_generation
)
4067 struct ifnet_filter
*filter
= NULL
;
4068 struct if_proto
*proto
= NULL
;
4069 int if_proto_count
= 0;
4070 struct if_proto
**tmp_ifproto_arr
= NULL
;
4071 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
4072 int tmp_ifproto_arr_idx
= 0;
4073 bool tmp_malloc
= false;
4076 * Pass the event to the interface filters
4078 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4079 /* prevent filter list from changing in case we drop the lock */
4080 if_flt_monitor_busy(ifp
);
4081 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4082 if (filter
->filt_event
!= NULL
) {
4083 lck_mtx_unlock(&ifp
->if_flt_lock
);
4085 filter
->filt_event(filter
->filt_cookie
, ifp
,
4086 filter
->filt_protocol
, event
);
4088 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4091 /* we're done with the filter list */
4092 if_flt_monitor_unbusy(ifp
);
4093 lck_mtx_unlock(&ifp
->if_flt_lock
);
4095 /* Get an io ref count if the interface is attached */
4096 if (!ifnet_is_attached(ifp
, 1))
4100 * An embedded tmp_list_entry in if_proto may still get
4101 * over-written by another thread after giving up ifnet lock,
4102 * therefore we are avoiding embedded pointers here.
4104 ifnet_lock_shared(ifp
);
4105 if_proto_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
4106 if (if_proto_count
) {
4108 VERIFY(ifp
->if_proto_hash
!= NULL
);
4109 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
4110 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
4112 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
4113 sizeof (*tmp_ifproto_arr
) * if_proto_count
,
4115 if (tmp_ifproto_arr
== NULL
) {
4116 ifnet_lock_done(ifp
);
4122 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
4123 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
4125 if_proto_ref(proto
);
4126 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
4127 tmp_ifproto_arr_idx
++;
4130 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
4132 ifnet_lock_done(ifp
);
4134 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
4135 tmp_ifproto_arr_idx
++) {
4136 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
4137 VERIFY(proto
!= NULL
);
4138 proto_media_event eventp
=
4139 (proto
->proto_kpi
== kProtoKPI_v1
?
4140 proto
->kpi
.v1
.event
:
4141 proto
->kpi
.v2
.event
);
4143 if (eventp
!= NULL
) {
4144 eventp(ifp
, proto
->protocol_family
,
4147 if_proto_free(proto
);
4152 FREE(tmp_ifproto_arr
, M_TEMP
);
4155 /* Pass the event to the interface */
4156 if (ifp
->if_event
!= NULL
)
4157 ifp
->if_event(ifp
, event
);
4159 /* Release the io ref count */
4160 ifnet_decr_iorefcnt(ifp
);
4162 return (dlil_post_complete_msg(update_generation
? ifp
: NULL
, event
));
4166 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
4168 struct kev_msg kev_msg
;
4171 if (ifp
== NULL
|| event
== NULL
)
4174 bzero(&kev_msg
, sizeof (kev_msg
));
4175 kev_msg
.vendor_code
= event
->vendor_code
;
4176 kev_msg
.kev_class
= event
->kev_class
;
4177 kev_msg
.kev_subclass
= event
->kev_subclass
;
4178 kev_msg
.event_code
= event
->event_code
;
4179 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
4180 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
4181 kev_msg
.dv
[1].data_length
= 0;
4183 result
= dlil_event_internal(ifp
, &kev_msg
, TRUE
);
4189 #include <netinet/ip6.h>
4190 #include <netinet/ip.h>
4192 dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
4196 struct ip6_hdr
*ip6
;
4197 int type
= SOCK_RAW
;
4202 m
= m_pullup(*mp
, sizeof(struct ip
));
4206 ip
= mtod(m
, struct ip
*);
4207 if (ip
->ip_p
== IPPROTO_TCP
)
4209 else if (ip
->ip_p
== IPPROTO_UDP
)
4213 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
4217 ip6
= mtod(m
, struct ip6_hdr
*);
4218 if (ip6
->ip6_nxt
== IPPROTO_TCP
)
4220 else if (ip6
->ip6_nxt
== IPPROTO_UDP
)
4231 dlil_count_chain_len(mbuf_t m
, struct chain_len_stats
*cls
)
4244 atomic_add_64(&cls
->cls_one
, 1);
4247 atomic_add_64(&cls
->cls_two
, 1);
4250 atomic_add_64(&cls
->cls_three
, 1);
4253 atomic_add_64(&cls
->cls_four
, 1);
4257 atomic_add_64(&cls
->cls_five_or_more
, 1);
4265 * Caller should have a lock on the protocol domain if the protocol
4266 * doesn't support finer grained locking. In most cases, the lock
4267 * will be held from the socket layer and won't be released until
4268 * we return back to the socket layer.
4270 * This does mean that we must take a protocol lock before we take
4271 * an interface lock if we're going to take both. This makes sense
4272 * because a protocol is likely to interact with an ifp while it
4273 * is under the protocol lock.
4275 * An advisory code will be returned if adv is not null. This
4276 * can be used to provide feedback about interface queues to the
4280 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
4281 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
4283 char *frame_type
= NULL
;
4284 char *dst_linkaddr
= NULL
;
4286 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
4287 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
4288 struct if_proto
*proto
= NULL
;
4290 mbuf_t send_head
= NULL
;
4291 mbuf_t
*send_tail
= &send_head
;
4293 u_int32_t pre
= 0, post
= 0;
4294 u_int32_t fpkts
= 0, fbytes
= 0;
4296 struct timespec now
;
4299 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4302 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4303 * from happening while this operation is in progress
4305 if (!ifnet_is_attached(ifp
, 1)) {
4311 VERIFY(ifp
->if_output_dlil
!= NULL
);
4313 /* update the driver's multicast filter, if needed */
4314 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
4315 ifp
->if_updatemcasts
= 0;
4317 frame_type
= frame_type_buffer
;
4318 dst_linkaddr
= dst_linkaddr_buffer
;
4321 ifnet_lock_shared(ifp
);
4322 /* callee holds a proto refcnt upon success */
4323 proto
= find_attached_proto(ifp
, proto_family
);
4324 if (proto
== NULL
) {
4325 ifnet_lock_done(ifp
);
4329 ifnet_lock_done(ifp
);
4333 if (packetlist
== NULL
)
4337 packetlist
= packetlist
->m_nextpkt
;
4338 m
->m_nextpkt
= NULL
;
4341 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4342 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
4344 if (preoutp
!= NULL
) {
4345 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
4346 frame_type
, dst_linkaddr
);
4349 if (retval
== EJUSTRETURN
)
4358 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
4359 dlil_get_socket_type(&m
, proto_family
, raw
));
4368 if (!raw
&& proto_family
== PF_INET
) {
4369 struct ip
*ip
= mtod(m
, struct ip
*);
4370 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4371 struct ip
*, ip
, struct ifnet
*, ifp
,
4372 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
4374 } else if (!raw
&& proto_family
== PF_INET6
) {
4375 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
4376 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4377 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
4378 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
4380 #endif /* CONFIG_DTRACE */
4382 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
4386 * If this is a broadcast packet that needs to be
4387 * looped back into the system, set the inbound ifp
4388 * to that of the outbound ifp. This will allow
4389 * us to determine that it is a legitimate packet
4390 * for the system. Only set the ifp if it's not
4391 * already set, just to be safe.
4393 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
4394 m
->m_pkthdr
.rcvif
== NULL
) {
4395 m
->m_pkthdr
.rcvif
= ifp
;
4399 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
4400 frame_type
, &pre
, &post
);
4402 if (retval
!= EJUSTRETURN
)
4408 * For partial checksum offload, adjust the start
4409 * and stuff offsets based on the prepended header.
4411 if ((m
->m_pkthdr
.csum_flags
&
4412 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4413 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4414 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
4415 m
->m_pkthdr
.csum_tx_start
+= pre
;
4418 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
))
4419 dlil_output_cksum_dbg(ifp
, m
, pre
,
4423 * Clear the ifp if it was set above, and to be
4424 * safe, only if it is still the same as the
4425 * outbound ifp we have in context. If it was
4426 * looped back, then a copy of it was sent to the
4427 * loopback interface with the rcvif set, and we
4428 * are clearing the one that will go down to the
4431 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
)
4432 m
->m_pkthdr
.rcvif
= NULL
;
4436 * Let interface filters (if any) do their thing ...
4438 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4439 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
4440 retval
= dlil_interface_filters_output(ifp
,
4443 if (retval
!= EJUSTRETURN
)
4449 * Strip away M_PROTO1 bit prior to sending packet
4450 * to the driver as this field may be used by the driver
4452 m
->m_flags
&= ~M_PROTO1
;
4455 * If the underlying interface is not capable of handling a
4456 * packet whose data portion spans across physically disjoint
4457 * pages, we need to "normalize" the packet so that we pass
4458 * down a chain of mbufs where each mbuf points to a span that
4459 * resides in the system page boundary. If the packet does
4460 * not cross page(s), the following is a no-op.
4462 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
4463 if ((m
= m_normalize(m
)) == NULL
)
4468 * If this is a TSO packet, make sure the interface still
4469 * advertise TSO capability.
4471 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
4477 ifp_inc_traffic_class_out(ifp
, m
);
4478 pktap_output(ifp
, proto_family
, m
, pre
, post
);
4481 * Count the number of elements in the mbuf chain
4483 if (tx_chain_len_count
) {
4484 dlil_count_chain_len(m
, &tx_chain_len_stats
);
4488 * Record timestamp; ifnet_enqueue() will use this info
4489 * rather than redoing the work. An optimization could
4490 * involve doing this just once at the top, if there are
4491 * no interface filters attached, but that's probably
4495 net_timernsec(&now
, &now_nsec
);
4496 (void) mbuf_set_timestamp(m
, now_nsec
, TRUE
);
4499 * Discard partial sum information if this packet originated
4500 * from another interface; the packet would already have the
4501 * final checksum and we shouldn't recompute it.
4503 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) &&
4504 (m
->m_pkthdr
.csum_flags
& (CSUM_DATA_VALID
|CSUM_PARTIAL
)) ==
4505 (CSUM_DATA_VALID
|CSUM_PARTIAL
)) {
4506 m
->m_pkthdr
.csum_flags
&= ~CSUM_TX_FLAGS
;
4507 m
->m_pkthdr
.csum_data
= 0;
4511 * Finally, call the driver.
4513 if (ifp
->if_eflags
& (IFEF_SENDLIST
| IFEF_ENQUEUE_MULTI
)) {
4514 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4515 flen
+= (m_pktlen(m
) - (pre
+ post
));
4516 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4519 send_tail
= &m
->m_nextpkt
;
4521 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4522 flen
= (m_pktlen(m
) - (pre
+ post
));
4523 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4527 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4529 retval
= (*ifp
->if_output_dlil
)(ifp
, m
);
4530 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4531 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
4532 adv
->code
= (retval
== EQFULL
?
4533 FADV_FLOW_CONTROLLED
:
4538 if (retval
== 0 && flen
> 0) {
4542 if (retval
!= 0 && dlil_verbose
) {
4543 printf("%s: output error on %s retval = %d\n",
4544 __func__
, if_name(ifp
),
4547 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
4550 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4555 packetlist
= packetlist
->m_nextpkt
;
4556 m
->m_nextpkt
= NULL
;
4558 } while (m
!= NULL
);
4560 if (send_head
!= NULL
) {
4561 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4563 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
4564 retval
= (*ifp
->if_output_dlil
)(ifp
, send_head
);
4565 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4567 adv
->code
= (retval
== EQFULL
?
4568 FADV_FLOW_CONTROLLED
:
4573 if (retval
== 0 && flen
> 0) {
4577 if (retval
!= 0 && dlil_verbose
) {
4578 printf("%s: output error on %s retval = %d\n",
4579 __func__
, if_name(ifp
), retval
);
4582 struct mbuf
*send_m
;
4584 VERIFY(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
);
4585 while (send_head
!= NULL
) {
4587 send_head
= send_m
->m_nextpkt
;
4588 send_m
->m_nextpkt
= NULL
;
4589 retval
= (*ifp
->if_output_dlil
)(ifp
, send_m
);
4590 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4592 adv
->code
= (retval
== EQFULL
?
4593 FADV_FLOW_CONTROLLED
:
4603 if (retval
!= 0 && dlil_verbose
) {
4604 printf("%s: output error on %s "
4606 __func__
, if_name(ifp
), retval
);
4614 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4617 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4621 ifp
->if_fbytes
+= fbytes
;
4623 ifp
->if_fpackets
+= fpkts
;
4625 if_proto_free(proto
);
4626 if (packetlist
) /* if any packets are left, clean up */
4627 mbuf_freem_list(packetlist
);
4628 if (retval
== EJUSTRETURN
)
4631 ifnet_decr_iorefcnt(ifp
);
4637 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
4640 struct ifnet_filter
*filter
;
4641 int retval
= EOPNOTSUPP
;
4644 if (ifp
== NULL
|| ioctl_code
== 0)
4647 /* Get an io ref count if the interface is attached */
4648 if (!ifnet_is_attached(ifp
, 1))
4649 return (EOPNOTSUPP
);
4652 * Run the interface filters first.
4653 * We want to run all filters before calling the protocol,
4654 * interface family, or interface.
4656 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4657 /* prevent filter list from changing in case we drop the lock */
4658 if_flt_monitor_busy(ifp
);
4659 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4660 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
4661 filter
->filt_protocol
== proto_fam
)) {
4662 lck_mtx_unlock(&ifp
->if_flt_lock
);
4664 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
4665 proto_fam
, ioctl_code
, ioctl_arg
);
4667 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4669 /* Only update retval if no one has handled the ioctl */
4670 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4671 if (result
== ENOTSUP
)
4672 result
= EOPNOTSUPP
;
4674 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
4675 /* we're done with the filter list */
4676 if_flt_monitor_unbusy(ifp
);
4677 lck_mtx_unlock(&ifp
->if_flt_lock
);
4683 /* we're done with the filter list */
4684 if_flt_monitor_unbusy(ifp
);
4685 lck_mtx_unlock(&ifp
->if_flt_lock
);
4687 /* Allow the protocol to handle the ioctl */
4688 if (proto_fam
!= 0) {
4689 struct if_proto
*proto
;
4691 /* callee holds a proto refcnt upon success */
4692 ifnet_lock_shared(ifp
);
4693 proto
= find_attached_proto(ifp
, proto_fam
);
4694 ifnet_lock_done(ifp
);
4695 if (proto
!= NULL
) {
4696 proto_media_ioctl ioctlp
=
4697 (proto
->proto_kpi
== kProtoKPI_v1
?
4698 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
4699 result
= EOPNOTSUPP
;
4701 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
4703 if_proto_free(proto
);
4705 /* Only update retval if no one has handled the ioctl */
4706 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4707 if (result
== ENOTSUP
)
4708 result
= EOPNOTSUPP
;
4710 if (retval
&& retval
!= EOPNOTSUPP
)
4716 /* retval is either 0 or EOPNOTSUPP */
4719 * Let the interface handle this ioctl.
4720 * If it returns EOPNOTSUPP, ignore that, we may have
4721 * already handled this in the protocol or family.
4724 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
4726 /* Only update retval if no one has handled the ioctl */
4727 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4728 if (result
== ENOTSUP
)
4729 result
= EOPNOTSUPP
;
4731 if (retval
&& retval
!= EOPNOTSUPP
) {
4737 if (retval
== EJUSTRETURN
)
4740 ifnet_decr_iorefcnt(ifp
);
4745 __private_extern__ errno_t
4746 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
4751 if (ifp
->if_set_bpf_tap
) {
4752 /* Get an io reference on the interface if it is attached */
4753 if (!ifnet_is_attached(ifp
, 1))
4755 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
4756 ifnet_decr_iorefcnt(ifp
);
4762 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
4763 struct sockaddr
*ll_addr
, size_t ll_len
)
4765 errno_t result
= EOPNOTSUPP
;
4766 struct if_proto
*proto
;
4767 const struct sockaddr
*verify
;
4768 proto_media_resolve_multi resolvep
;
4770 if (!ifnet_is_attached(ifp
, 1))
4773 bzero(ll_addr
, ll_len
);
4775 /* Call the protocol first; callee holds a proto refcnt upon success */
4776 ifnet_lock_shared(ifp
);
4777 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
4778 ifnet_lock_done(ifp
);
4779 if (proto
!= NULL
) {
4780 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
4781 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
4782 if (resolvep
!= NULL
)
4783 result
= resolvep(ifp
, proto_addr
,
4784 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
4785 if_proto_free(proto
);
4788 /* Let the interface verify the multicast address */
4789 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
4793 verify
= proto_addr
;
4794 result
= ifp
->if_check_multi(ifp
, verify
);
4797 ifnet_decr_iorefcnt(ifp
);
4801 __private_extern__ errno_t
4802 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
4803 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
4804 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
4806 struct if_proto
*proto
;
4809 /* callee holds a proto refcnt upon success */
4810 ifnet_lock_shared(ifp
);
4811 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
4812 ifnet_lock_done(ifp
);
4813 if (proto
== NULL
) {
4816 proto_media_send_arp arpp
;
4817 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4818 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
4824 arpstat
.txrequests
++;
4825 if (target_hw
!= NULL
)
4826 arpstat
.txurequests
++;
4829 arpstat
.txreplies
++;
4832 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
4833 target_hw
, target_proto
);
4835 if_proto_free(proto
);
4841 struct net_thread_marks
{ };
4842 static const struct net_thread_marks net_thread_marks_base
= { };
4844 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
4845 &net_thread_marks_base
;
4847 __private_extern__ net_thread_marks_t
4848 net_thread_marks_push(u_int32_t push
)
4850 static const char *const base
= (const void*)&net_thread_marks_base
;
4854 struct uthread
*uth
= get_bsdthread_info(current_thread());
4856 pop
= push
& ~uth
->uu_network_marks
;
4858 uth
->uu_network_marks
|= pop
;
4861 return ((net_thread_marks_t
)&base
[pop
]);
4864 __private_extern__ net_thread_marks_t
4865 net_thread_unmarks_push(u_int32_t unpush
)
4867 static const char *const base
= (const void*)&net_thread_marks_base
;
4868 u_int32_t unpop
= 0;
4871 struct uthread
*uth
= get_bsdthread_info(current_thread());
4873 unpop
= unpush
& uth
->uu_network_marks
;
4875 uth
->uu_network_marks
&= ~unpop
;
4878 return ((net_thread_marks_t
)&base
[unpop
]);
4881 __private_extern__
void
4882 net_thread_marks_pop(net_thread_marks_t popx
)
4884 static const char *const base
= (const void*)&net_thread_marks_base
;
4885 const ptrdiff_t pop
= (const char *)popx
- (const char *)base
;
4888 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4889 struct uthread
*uth
= get_bsdthread_info(current_thread());
4891 VERIFY((pop
& ones
) == pop
);
4892 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
4893 uth
->uu_network_marks
&= ~pop
;
4897 __private_extern__
void
4898 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
4900 static const char *const base
= (const void*)&net_thread_marks_base
;
4901 ptrdiff_t unpop
= (const char *)unpopx
- (const char *)base
;
4904 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4905 struct uthread
*uth
= get_bsdthread_info(current_thread());
4907 VERIFY((unpop
& ones
) == unpop
);
4908 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
4909 uth
->uu_network_marks
|= unpop
;
4913 __private_extern__ u_int32_t
4914 net_thread_is_marked(u_int32_t check
)
4917 struct uthread
*uth
= get_bsdthread_info(current_thread());
4918 return (uth
->uu_network_marks
& check
);
4924 __private_extern__ u_int32_t
4925 net_thread_is_unmarked(u_int32_t check
)
4928 struct uthread
*uth
= get_bsdthread_info(current_thread());
4929 return (~uth
->uu_network_marks
& check
);
4935 static __inline__
int
4936 _is_announcement(const struct sockaddr_in
* sender_sin
,
4937 const struct sockaddr_in
* target_sin
)
4939 if (sender_sin
== NULL
) {
4942 return (sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
);
4945 __private_extern__ errno_t
4946 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
*sender_hw
,
4947 const struct sockaddr
*sender_proto
, const struct sockaddr_dl
*target_hw
,
4948 const struct sockaddr
*target_proto0
, u_int32_t rtflags
)
4951 const struct sockaddr_in
* sender_sin
;
4952 const struct sockaddr_in
* target_sin
;
4953 struct sockaddr_inarp target_proto_sinarp
;
4954 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
4956 if (target_proto
== NULL
|| (sender_proto
!= NULL
&&
4957 sender_proto
->sa_family
!= target_proto
->sa_family
))
4961 * If the target is a (default) router, provide that
4962 * information to the send_arp callback routine.
4964 if (rtflags
& RTF_ROUTER
) {
4965 bcopy(target_proto
, &target_proto_sinarp
,
4966 sizeof (struct sockaddr_in
));
4967 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
4968 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
4972 * If this is an ARP request and the target IP is IPv4LL,
4973 * send the request on all interfaces. The exception is
4974 * an announcement, which must only appear on the specific
4977 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
4978 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
4979 if (target_proto
->sa_family
== AF_INET
&&
4980 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
4981 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
4982 !_is_announcement(target_sin
, sender_sin
)) {
4989 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
4990 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
4992 ifaddr_t source_hw
= NULL
;
4993 ifaddr_t source_ip
= NULL
;
4994 struct sockaddr_in source_ip_copy
;
4995 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
4998 * Only arp on interfaces marked for IPv4LL
4999 * ARPing. This may mean that we don't ARP on
5000 * the interface the subnet route points to.
5002 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
))
5005 /* Find the source IP address */
5006 ifnet_lock_shared(cur_ifp
);
5007 source_hw
= cur_ifp
->if_lladdr
;
5008 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
5010 IFA_LOCK(source_ip
);
5011 if (source_ip
->ifa_addr
!= NULL
&&
5012 source_ip
->ifa_addr
->sa_family
==
5014 /* Copy the source IP address */
5016 *(struct sockaddr_in
*)
5017 (void *)source_ip
->ifa_addr
;
5018 IFA_UNLOCK(source_ip
);
5021 IFA_UNLOCK(source_ip
);
5024 /* No IP Source, don't arp */
5025 if (source_ip
== NULL
) {
5026 ifnet_lock_done(cur_ifp
);
5030 IFA_ADDREF(source_hw
);
5031 ifnet_lock_done(cur_ifp
);
5034 new_result
= dlil_send_arp_internal(cur_ifp
,
5035 arpop
, (struct sockaddr_dl
*)(void *)
5036 source_hw
->ifa_addr
,
5037 (struct sockaddr
*)&source_ip_copy
, NULL
,
5040 IFA_REMREF(source_hw
);
5041 if (result
== ENOTSUP
) {
5042 result
= new_result
;
5045 ifnet_list_free(ifp_list
);
5048 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
5049 sender_proto
, target_hw
, target_proto
);
5056 * Caller must hold ifnet head lock.
5059 ifnet_lookup(struct ifnet
*ifp
)
5063 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
5064 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
5068 return (_ifp
!= NULL
);
5072 * Caller has to pass a non-zero refio argument to get a
5073 * IO reference count. This will prevent ifnet_detach from
5074 * being called when there are outstanding io reference counts.
5077 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
5081 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5082 if ((ret
= IF_FULLY_ATTACHED(ifp
))) {
5086 lck_mtx_unlock(&ifp
->if_ref_lock
);
5092 * Caller must ensure the interface is attached; the assumption is that
5093 * there is at least an outstanding IO reference count held already.
5094 * Most callers would call ifnet_is_attached() instead.
5097 ifnet_incr_iorefcnt(struct ifnet
*ifp
)
5099 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5100 VERIFY(IF_FULLY_ATTACHED(ifp
));
5101 VERIFY(ifp
->if_refio
> 0);
5103 lck_mtx_unlock(&ifp
->if_ref_lock
);
5107 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
5109 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5110 VERIFY(ifp
->if_refio
> 0);
5111 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
5115 * if there are no more outstanding io references, wakeup the
5116 * ifnet_detach thread if detaching flag is set.
5118 if (ifp
->if_refio
== 0 && (ifp
->if_refflags
& IFRF_DETACHING
))
5119 wakeup(&(ifp
->if_refio
));
5121 lck_mtx_unlock(&ifp
->if_ref_lock
);
5125 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
5127 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
5132 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
5133 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
5138 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
5139 tr
= dl_if_dbg
->dldbg_if_refhold
;
5141 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
5142 tr
= dl_if_dbg
->dldbg_if_refrele
;
5145 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
5146 ctrace_record(&tr
[idx
]);
5150 dlil_if_ref(struct ifnet
*ifp
)
5152 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5157 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
5158 ++dl_if
->dl_if_refcnt
;
5159 if (dl_if
->dl_if_refcnt
== 0) {
5160 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
5163 if (dl_if
->dl_if_trace
!= NULL
)
5164 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
5165 lck_mtx_unlock(&dl_if
->dl_if_lock
);
5171 dlil_if_free(struct ifnet
*ifp
)
5173 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5174 bool need_release
= FALSE
;
5179 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
5180 switch (dl_if
->dl_if_refcnt
) {
5182 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
5186 if ((ifp
->if_refflags
& IFRF_EMBRYONIC
) != 0) {
5187 need_release
= TRUE
;
5193 --dl_if
->dl_if_refcnt
;
5194 if (dl_if
->dl_if_trace
!= NULL
)
5195 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
5196 lck_mtx_unlock(&dl_if
->dl_if_lock
);
5198 dlil_if_release(ifp
);
5204 dlil_attach_protocol_internal(struct if_proto
*proto
,
5205 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
,
5206 uint32_t * proto_count
)
5208 struct kev_dl_proto_data ev_pr_data
;
5209 struct ifnet
*ifp
= proto
->ifp
;
5211 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
5212 struct if_proto
*prev_proto
;
5213 struct if_proto
*_proto
;
5215 /* callee holds a proto refcnt upon success */
5216 ifnet_lock_exclusive(ifp
);
5217 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
5218 if (_proto
!= NULL
) {
5219 ifnet_lock_done(ifp
);
5220 if_proto_free(_proto
);
5225 * Call family module add_proto routine so it can refine the
5226 * demux descriptors as it wishes.
5228 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
5231 ifnet_lock_done(ifp
);
5236 * Insert the protocol in the hash
5238 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
5239 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
)
5240 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
5242 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
5244 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
5247 /* hold a proto refcnt for attach */
5248 if_proto_ref(proto
);
5251 * The reserved field carries the number of protocol still attached
5252 * (subject to change)
5254 ev_pr_data
.proto_family
= proto
->protocol_family
;
5255 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
5257 ifnet_lock_done(ifp
);
5259 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
5260 (struct net_event_data
*)&ev_pr_data
,
5261 sizeof (struct kev_dl_proto_data
));
5262 if (proto_count
!= NULL
) {
5263 *proto_count
= ev_pr_data
.proto_remaining_count
;
5269 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
5270 const struct ifnet_attach_proto_param
*proto_details
)
5273 struct if_proto
*ifproto
= NULL
;
5274 uint32_t proto_count
= 0;
5276 ifnet_head_lock_shared();
5277 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5281 /* Check that the interface is in the global list */
5282 if (!ifnet_lookup(ifp
)) {
5287 ifproto
= zalloc(dlif_proto_zone
);
5288 if (ifproto
== NULL
) {
5292 bzero(ifproto
, dlif_proto_size
);
5294 /* refcnt held above during lookup */
5296 ifproto
->protocol_family
= protocol
;
5297 ifproto
->proto_kpi
= kProtoKPI_v1
;
5298 ifproto
->kpi
.v1
.input
= proto_details
->input
;
5299 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
5300 ifproto
->kpi
.v1
.event
= proto_details
->event
;
5301 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
5302 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
5303 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
5304 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
5306 retval
= dlil_attach_protocol_internal(ifproto
,
5307 proto_details
->demux_list
, proto_details
->demux_count
,
5311 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5312 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5313 if_name(ifp
), protocol
, retval
);
5316 printf("%s: attached v1 protocol %d (count = %d)\n",
5318 protocol
, proto_count
);
5324 * A protocol has been attached, mark the interface up.
5325 * This used to be done by configd.KernelEventMonitor, but that
5326 * is inherently prone to races (rdar://problem/30810208).
5328 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
5329 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
5330 dlil_post_sifflags_msg(ifp
);
5331 } else if (ifproto
!= NULL
) {
5332 zfree(dlif_proto_zone
, ifproto
);
5338 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
5339 const struct ifnet_attach_proto_param_v2
*proto_details
)
5342 struct if_proto
*ifproto
= NULL
;
5343 uint32_t proto_count
= 0;
5345 ifnet_head_lock_shared();
5346 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5350 /* Check that the interface is in the global list */
5351 if (!ifnet_lookup(ifp
)) {
5356 ifproto
= zalloc(dlif_proto_zone
);
5357 if (ifproto
== NULL
) {
5361 bzero(ifproto
, sizeof(*ifproto
));
5363 /* refcnt held above during lookup */
5365 ifproto
->protocol_family
= protocol
;
5366 ifproto
->proto_kpi
= kProtoKPI_v2
;
5367 ifproto
->kpi
.v2
.input
= proto_details
->input
;
5368 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
5369 ifproto
->kpi
.v2
.event
= proto_details
->event
;
5370 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
5371 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
5372 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
5373 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
5375 retval
= dlil_attach_protocol_internal(ifproto
,
5376 proto_details
->demux_list
, proto_details
->demux_count
,
5380 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5381 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5382 if_name(ifp
), protocol
, retval
);
5385 printf("%s: attached v2 protocol %d (count = %d)\n",
5387 protocol
, proto_count
);
5393 * A protocol has been attached, mark the interface up.
5394 * This used to be done by configd.KernelEventMonitor, but that
5395 * is inherently prone to races (rdar://problem/30810208).
5397 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
5398 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
5399 dlil_post_sifflags_msg(ifp
);
5400 } else if (ifproto
!= NULL
) {
5401 zfree(dlif_proto_zone
, ifproto
);
5407 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
5409 struct if_proto
*proto
= NULL
;
5412 if (ifp
== NULL
|| proto_family
== 0) {
5417 ifnet_lock_exclusive(ifp
);
5418 /* callee holds a proto refcnt upon success */
5419 proto
= find_attached_proto(ifp
, proto_family
);
5420 if (proto
== NULL
) {
5422 ifnet_lock_done(ifp
);
5426 /* call family module del_proto */
5427 if (ifp
->if_del_proto
)
5428 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
5430 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
5431 proto
, if_proto
, next_hash
);
5433 if (proto
->proto_kpi
== kProtoKPI_v1
) {
5434 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
5435 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
5436 proto
->kpi
.v1
.event
= ifproto_media_event
;
5437 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
5438 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
5439 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
5441 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
5442 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
5443 proto
->kpi
.v2
.event
= ifproto_media_event
;
5444 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
5445 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
5446 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
5448 proto
->detached
= 1;
5449 ifnet_lock_done(ifp
);
5452 printf("%s: detached %s protocol %d\n", if_name(ifp
),
5453 (proto
->proto_kpi
== kProtoKPI_v1
) ?
5454 "v1" : "v2", proto_family
);
5457 /* release proto refcnt held during protocol attach */
5458 if_proto_free(proto
);
5461 * Release proto refcnt held during lookup; the rest of
5462 * protocol detach steps will happen when the last proto
5463 * reference is released.
5465 if_proto_free(proto
);
5473 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
5474 struct mbuf
*packet
, char *header
)
5476 #pragma unused(ifp, protocol, packet, header)
5481 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
5482 struct mbuf
*packet
)
5484 #pragma unused(ifp, protocol, packet)
5490 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
5491 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
5492 char *link_layer_dest
)
5494 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5500 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
5501 const struct kev_msg
*event
)
5503 #pragma unused(ifp, protocol, event)
5507 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
5508 unsigned long command
, void *argument
)
5510 #pragma unused(ifp, protocol, command, argument)
5515 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
5516 struct sockaddr_dl
*out_ll
, size_t ll_len
)
5518 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5523 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
5524 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
5525 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
5527 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5531 extern int if_next_index(void);
5532 extern int tcp_ecn_outbound
;
5535 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
5537 struct ifnet
*tmp_if
;
5539 struct if_data_internal if_data_saved
;
5540 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5541 struct dlil_threading_info
*dl_inp
;
5542 u_int32_t sflags
= 0;
5549 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5550 * prevent the interface from being configured while it is
5551 * embryonic, as ifnet_head_lock is dropped and reacquired
5552 * below prior to marking the ifnet with IFRF_ATTACHED.
5555 ifnet_head_lock_exclusive();
5556 /* Verify we aren't already on the list */
5557 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
5558 if (tmp_if
== ifp
) {
5565 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5566 if (!(ifp
->if_refflags
& IFRF_EMBRYONIC
)) {
5567 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
5571 lck_mtx_unlock(&ifp
->if_ref_lock
);
5573 ifnet_lock_exclusive(ifp
);
5576 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
5577 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
5579 if (ll_addr
!= NULL
) {
5580 if (ifp
->if_addrlen
== 0) {
5581 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
5582 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
5583 ifnet_lock_done(ifp
);
5591 * Allow interfaces without protocol families to attach
5592 * only if they have the necessary fields filled out.
5594 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
5595 DLIL_PRINTF("%s: Attempt to attach interface without "
5596 "family module - %d\n", __func__
, ifp
->if_family
);
5597 ifnet_lock_done(ifp
);
5603 /* Allocate protocol hash table */
5604 VERIFY(ifp
->if_proto_hash
== NULL
);
5605 ifp
->if_proto_hash
= zalloc(dlif_phash_zone
);
5606 if (ifp
->if_proto_hash
== NULL
) {
5607 ifnet_lock_done(ifp
);
5612 bzero(ifp
->if_proto_hash
, dlif_phash_size
);
5614 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5615 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
5616 TAILQ_INIT(&ifp
->if_flt_head
);
5617 VERIFY(ifp
->if_flt_busy
== 0);
5618 VERIFY(ifp
->if_flt_waiters
== 0);
5619 lck_mtx_unlock(&ifp
->if_flt_lock
);
5621 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
5622 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
5623 LIST_INIT(&ifp
->if_multiaddrs
);
5626 VERIFY(ifp
->if_allhostsinm
== NULL
);
5627 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
5628 TAILQ_INIT(&ifp
->if_addrhead
);
5630 if (ifp
->if_index
== 0) {
5631 int idx
= if_next_index();
5635 ifnet_lock_done(ifp
);
5640 ifp
->if_index
= idx
;
5642 /* There should not be anything occupying this slot */
5643 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
5645 /* allocate (if needed) and initialize a link address */
5646 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
5648 ifnet_lock_done(ifp
);
5654 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
5655 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
5657 /* make this address the first on the list */
5659 /* hold a reference for ifnet_addrs[] */
5660 IFA_ADDREF_LOCKED(ifa
);
5661 /* if_attach_link_ifa() holds a reference for ifa_link */
5662 if_attach_link_ifa(ifp
, ifa
);
5666 mac_ifnet_label_associate(ifp
);
5669 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
5670 ifindex2ifnet
[ifp
->if_index
] = ifp
;
5672 /* Hold a reference to the underlying dlil_ifnet */
5673 ifnet_reference(ifp
);
5675 /* Clear stats (save and restore other fields that we care) */
5676 if_data_saved
= ifp
->if_data
;
5677 bzero(&ifp
->if_data
, sizeof (ifp
->if_data
));
5678 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
5679 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
5680 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
5681 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
5682 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
5683 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
5684 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
5685 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
5686 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
5687 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
5688 ifnet_touch_lastchange(ifp
);
5690 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
5691 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
||
5692 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_FQ_CODEL
);
5694 /* By default, use SFB and enable flow advisory */
5695 sflags
= PKTSCHEDF_QALG_SFB
;
5697 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
5699 if (if_delaybased_queue
)
5700 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
5702 if (ifp
->if_output_sched_model
==
5703 IFNET_SCHED_MODEL_DRIVER_MANAGED
)
5704 sflags
|= PKTSCHEDF_QALG_DRIVER_MANAGED
;
5706 /* Initialize transmit queue(s) */
5707 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5709 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5710 "err=%d", __func__
, ifp
, err
);
5714 /* Sanity checks on the input thread storage */
5715 dl_inp
= &dl_if
->dl_if_inpstorage
;
5716 bzero(&dl_inp
->stats
, sizeof (dl_inp
->stats
));
5717 VERIFY(dl_inp
->input_waiting
== 0);
5718 VERIFY(dl_inp
->wtot
== 0);
5719 VERIFY(dl_inp
->ifp
== NULL
);
5720 VERIFY(qhead(&dl_inp
->rcvq_pkts
) == NULL
&& qempty(&dl_inp
->rcvq_pkts
));
5721 VERIFY(qlimit(&dl_inp
->rcvq_pkts
) == 0);
5722 VERIFY(!dl_inp
->net_affinity
);
5723 VERIFY(ifp
->if_inp
== NULL
);
5724 VERIFY(dl_inp
->input_thr
== THREAD_NULL
);
5725 VERIFY(dl_inp
->wloop_thr
== THREAD_NULL
);
5726 VERIFY(dl_inp
->poll_thr
== THREAD_NULL
);
5727 VERIFY(dl_inp
->tag
== 0);
5728 VERIFY(dl_inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
5729 bzero(&dl_inp
->tstats
, sizeof (dl_inp
->tstats
));
5730 bzero(&dl_inp
->pstats
, sizeof (dl_inp
->pstats
));
5731 bzero(&dl_inp
->sstats
, sizeof (dl_inp
->sstats
));
5732 #if IFNET_INPUT_SANITY_CHK
5733 VERIFY(dl_inp
->input_mbuf_cnt
== 0);
5734 #endif /* IFNET_INPUT_SANITY_CHK */
5737 * A specific DLIL input thread is created per Ethernet/cellular
5738 * interface or for an interface which supports opportunistic
5739 * input polling. Pseudo interfaces or other types of interfaces
5740 * use the main input thread instead.
5742 if ((net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) ||
5743 ifp
->if_type
== IFT_ETHER
|| ifp
->if_type
== IFT_CELLULAR
) {
5744 ifp
->if_inp
= dl_inp
;
5745 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
);
5747 panic_plain("%s: ifp=%p couldn't get an input thread; "
5748 "err=%d", __func__
, ifp
, err
);
5753 if (ifp
->if_inp
!= NULL
&& ifp
->if_inp
->input_mit_tcall
== NULL
) {
5754 ifp
->if_inp
->input_mit_tcall
=
5755 thread_call_allocate_with_priority(dlil_mit_tcall_fn
,
5756 ifp
, THREAD_CALL_PRIORITY_KERNEL
);
5760 * If the driver supports the new transmit model, calculate flow hash
5761 * and create a workloop starter thread to invoke the if_start callback
5762 * where the packets may be dequeued and transmitted.
5764 if (ifp
->if_eflags
& IFEF_TXSTART
) {
5765 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
5766 VERIFY(ifp
->if_flowhash
!= 0);
5767 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
5769 ifnet_set_start_cycle(ifp
, NULL
);
5770 ifp
->if_start_active
= 0;
5771 ifp
->if_start_req
= 0;
5772 ifp
->if_start_flags
= 0;
5773 VERIFY(ifp
->if_start
!= NULL
);
5774 if ((err
= kernel_thread_start(ifnet_start_thread_fn
,
5775 ifp
, &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
5777 "ifp=%p couldn't get a start thread; "
5778 "err=%d", __func__
, ifp
, err
);
5781 ml_thread_policy(ifp
->if_start_thread
, MACHINE_GROUP
,
5782 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5784 ifp
->if_flowhash
= 0;
5788 * If the driver supports the new receive model, create a poller
5789 * thread to invoke if_input_poll callback where the packets may
5790 * be dequeued from the driver and processed for reception.
5792 if (ifp
->if_eflags
& IFEF_RXPOLL
) {
5793 VERIFY(ifp
->if_input_poll
!= NULL
);
5794 VERIFY(ifp
->if_input_ctl
!= NULL
);
5795 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
5797 ifnet_set_poll_cycle(ifp
, NULL
);
5798 ifp
->if_poll_update
= 0;
5799 ifp
->if_poll_active
= 0;
5800 ifp
->if_poll_req
= 0;
5801 if ((err
= kernel_thread_start(ifnet_poll_thread_fn
, ifp
,
5802 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
5803 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5804 "err=%d", __func__
, ifp
, err
);
5807 ml_thread_policy(ifp
->if_poll_thread
, MACHINE_GROUP
,
5808 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5811 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
5812 VERIFY(ifp
->if_desc
.ifd_len
== 0);
5813 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
5815 /* Record attach PC stacktrace */
5816 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
5818 ifp
->if_updatemcasts
= 0;
5819 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
5820 struct ifmultiaddr
*ifma
;
5821 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
5823 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
5824 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
)
5825 ifp
->if_updatemcasts
++;
5829 printf("%s: attached with %d suspended link-layer multicast "
5830 "membership(s)\n", if_name(ifp
),
5831 ifp
->if_updatemcasts
);
5834 /* Clear logging parameters */
5835 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
5837 /* Clear foreground/realtime activity timestamps */
5838 ifp
->if_fg_sendts
= 0;
5839 ifp
->if_rt_sendts
= 0;
5841 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
5842 VERIFY(ifp
->if_delegated
.type
== 0);
5843 VERIFY(ifp
->if_delegated
.family
== 0);
5844 VERIFY(ifp
->if_delegated
.subfamily
== 0);
5845 VERIFY(ifp
->if_delegated
.expensive
== 0);
5847 VERIFY(ifp
->if_agentids
== NULL
);
5848 VERIFY(ifp
->if_agentcount
== 0);
5850 /* Reset interface state */
5851 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
5852 ifp
->if_interface_state
.valid_bitmask
|=
5853 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
5854 ifp
->if_interface_state
.interface_availability
=
5855 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
;
5857 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5858 if (ifp
== lo_ifp
) {
5859 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_GOOD
;
5860 ifp
->if_interface_state
.valid_bitmask
|=
5861 IF_INTERFACE_STATE_LQM_STATE_VALID
;
5863 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_UNKNOWN
;
5867 * Enable ECN capability on this interface depending on the
5868 * value of ECN global setting
5870 if (tcp_ecn_outbound
== 2 && !IFNET_IS_CELLULAR(ifp
)) {
5871 ifp
->if_eflags
|= IFEF_ECN_ENABLE
;
5872 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
5876 * Built-in Cyclops always on policy for WiFi infra
5878 if (IFNET_IS_WIFI_INFRA(ifp
) && net_qos_policy_wifi_enabled
!= 0) {
5881 error
= if_set_qosmarking_mode(ifp
,
5882 IFRTYPE_QOSMARKING_FASTLANE
);
5884 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5885 __func__
, ifp
->if_xname
, error
);
5887 ifp
->if_eflags
|= IFEF_QOSMARKING_ENABLED
;
5888 #if (DEVELOPMENT || DEBUG)
5889 printf("%s fastlane enabled on %s\n",
5890 __func__
, ifp
->if_xname
);
5891 #endif /* (DEVELOPMENT || DEBUG) */
5895 ifnet_lock_done(ifp
);
5899 lck_mtx_lock(&ifp
->if_cached_route_lock
);
5900 /* Enable forwarding cached route */
5901 ifp
->if_fwd_cacheok
= 1;
5902 /* Clean up any existing cached routes */
5903 ROUTE_RELEASE(&ifp
->if_fwd_route
);
5904 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
5905 ROUTE_RELEASE(&ifp
->if_src_route
);
5906 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
5907 ROUTE_RELEASE(&ifp
->if_src_route6
);
5908 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
5909 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
5911 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5914 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5915 * and trees; do this before the ifnet is marked as attached.
5916 * The ifnet keeps the reference to the info structures even after
5917 * the ifnet is detached, since the network-layer records still
5918 * refer to the info structures even after that. This also
5919 * makes it possible for them to still function after the ifnet
5920 * is recycled or reattached.
5923 if (IGMP_IFINFO(ifp
) == NULL
) {
5924 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, M_WAITOK
);
5925 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
5927 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
5928 igmp_domifreattach(IGMP_IFINFO(ifp
));
5932 if (MLD_IFINFO(ifp
) == NULL
) {
5933 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, M_WAITOK
);
5934 VERIFY(MLD_IFINFO(ifp
) != NULL
);
5936 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
5937 mld_domifreattach(MLD_IFINFO(ifp
));
5941 VERIFY(ifp
->if_data_threshold
== 0);
5942 VERIFY(ifp
->if_dt_tcall
!= NULL
);
5945 * Finally, mark this ifnet as attached.
5947 lck_mtx_lock(rnh_lock
);
5948 ifnet_lock_exclusive(ifp
);
5949 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5950 ifp
->if_refflags
= IFRF_ATTACHED
; /* clears embryonic */
5951 lck_mtx_unlock(&ifp
->if_ref_lock
);
5953 /* boot-args override; enable idle notification */
5954 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
5957 /* apply previous request(s) to set the idle flags, if any */
5958 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
5959 ifp
->if_idle_new_flags_mask
);
5962 ifnet_lock_done(ifp
);
5963 lck_mtx_unlock(rnh_lock
);
5968 * Attach packet filter to this interface, if enabled.
5970 pf_ifnet_hook(ifp
, 1);
5973 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
5976 printf("%s: attached%s\n", if_name(ifp
),
5977 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
5984 * Prepare the storage for the first/permanent link address, which must
5985 * must have the same lifetime as the ifnet itself. Although the link
5986 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5987 * its location in memory must never change as it may still be referred
5988 * to by some parts of the system afterwards (unfortunate implementation
5989 * artifacts inherited from BSD.)
5991 * Caller must hold ifnet lock as writer.
5993 static struct ifaddr
*
5994 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
5996 struct ifaddr
*ifa
, *oifa
;
5997 struct sockaddr_dl
*asdl
, *msdl
;
5998 char workbuf
[IFNAMSIZ
*2];
5999 int namelen
, masklen
, socksize
;
6000 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6002 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
6003 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
6005 namelen
= snprintf(workbuf
, sizeof (workbuf
), "%s",
6007 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0])
6008 + ((namelen
> 0) ? namelen
: 0);
6009 socksize
= masklen
+ ifp
->if_addrlen
;
6010 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
6011 if ((u_int32_t
)socksize
< sizeof (struct sockaddr_dl
))
6012 socksize
= sizeof(struct sockaddr_dl
);
6013 socksize
= ROUNDUP(socksize
);
6016 ifa
= ifp
->if_lladdr
;
6017 if (socksize
> DLIL_SDLMAXLEN
||
6018 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
6020 * Rare, but in the event that the link address requires
6021 * more storage space than DLIL_SDLMAXLEN, allocate the
6022 * largest possible storages for address and mask, such
6023 * that we can reuse the same space when if_addrlen grows.
6024 * This same space will be used when if_addrlen shrinks.
6026 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
6027 int ifasize
= sizeof (*ifa
) + 2 * SOCK_MAXADDRLEN
;
6028 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
6032 /* Don't set IFD_ALLOC, as this is permanent */
6033 ifa
->ifa_debug
= IFD_LINK
;
6036 /* address and mask sockaddr_dl locations */
6037 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
6038 bzero(asdl
, SOCK_MAXADDRLEN
);
6039 msdl
= (struct sockaddr_dl
*)(void *)
6040 ((char *)asdl
+ SOCK_MAXADDRLEN
);
6041 bzero(msdl
, SOCK_MAXADDRLEN
);
6043 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
6045 * Use the storage areas for address and mask within the
6046 * dlil_ifnet structure. This is the most common case.
6049 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
6051 /* Don't set IFD_ALLOC, as this is permanent */
6052 ifa
->ifa_debug
= IFD_LINK
;
6055 /* address and mask sockaddr_dl locations */
6056 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
6057 bzero(asdl
, sizeof (dl_if
->dl_if_lladdr
.asdl
));
6058 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
6059 bzero(msdl
, sizeof (dl_if
->dl_if_lladdr
.msdl
));
6062 /* hold a permanent reference for the ifnet itself */
6063 IFA_ADDREF_LOCKED(ifa
);
6064 oifa
= ifp
->if_lladdr
;
6065 ifp
->if_lladdr
= ifa
;
6067 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
6069 ifa
->ifa_rtrequest
= link_rtrequest
;
6070 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
6071 asdl
->sdl_len
= socksize
;
6072 asdl
->sdl_family
= AF_LINK
;
6074 bcopy(workbuf
, asdl
->sdl_data
, min(namelen
,
6075 sizeof (asdl
->sdl_data
)));
6076 asdl
->sdl_nlen
= namelen
;
6080 asdl
->sdl_index
= ifp
->if_index
;
6081 asdl
->sdl_type
= ifp
->if_type
;
6082 if (ll_addr
!= NULL
) {
6083 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
6084 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
6088 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
6089 msdl
->sdl_len
= masklen
;
6091 msdl
->sdl_data
[--namelen
] = 0xff;
6101 if_purgeaddrs(struct ifnet
*ifp
)
6107 in6_purgeaddrs(ifp
);
6112 ifnet_detach(ifnet_t ifp
)
6114 struct ifnet
*delegated_ifp
;
6115 struct nd_ifinfo
*ndi
= NULL
;
6120 ndi
= ND_IFINFO(ifp
);
6122 ndi
->cga_initialized
= FALSE
;
6124 lck_mtx_lock(rnh_lock
);
6125 ifnet_head_lock_exclusive();
6126 ifnet_lock_exclusive(ifp
);
6129 * Check to see if this interface has previously triggered
6130 * aggressive protocol draining; if so, decrement the global
6131 * refcnt and clear PR_AGGDRAIN on the route domain if
6132 * there are no more of such an interface around.
6134 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
6136 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6137 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
6138 lck_mtx_unlock(&ifp
->if_ref_lock
);
6139 ifnet_lock_done(ifp
);
6141 lck_mtx_unlock(rnh_lock
);
6143 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
6144 /* Interface has already been detached */
6145 lck_mtx_unlock(&ifp
->if_ref_lock
);
6146 ifnet_lock_done(ifp
);
6148 lck_mtx_unlock(rnh_lock
);
6151 VERIFY(!(ifp
->if_refflags
& IFRF_EMBRYONIC
));
6152 /* Indicate this interface is being detached */
6153 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
6154 ifp
->if_refflags
|= IFRF_DETACHING
;
6155 lck_mtx_unlock(&ifp
->if_ref_lock
);
6158 printf("%s: detaching\n", if_name(ifp
));
6161 /* clean up flow control entry object if there's any */
6162 if (ifp
->if_eflags
& IFEF_TXSTART
) {
6163 ifnet_flowadv(ifp
->if_flowhash
);
6166 /* Reset ECN enable/disable flags */
6167 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
6168 ifp
->if_eflags
&= ~IFEF_ECN_ENABLE
;
6171 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6172 * no longer be visible during lookups from this point.
6174 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
6175 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
6176 ifp
->if_link
.tqe_next
= NULL
;
6177 ifp
->if_link
.tqe_prev
= NULL
;
6178 if (ifp
->if_ordered_link
.tqe_next
!= NULL
||
6179 ifp
->if_ordered_link
.tqe_prev
!= NULL
) {
6180 ifnet_remove_from_ordered_list(ifp
);
6182 ifindex2ifnet
[ifp
->if_index
] = NULL
;
6184 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6185 ifp
->if_eflags
&= ~(IFEF_IPV4_ROUTER
| IFEF_IPV6_ROUTER
);
6187 /* Record detach PC stacktrace */
6188 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
6190 /* Clear logging parameters */
6191 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
6193 /* Clear delegated interface info (reference released below) */
6194 delegated_ifp
= ifp
->if_delegated
.ifp
;
6195 bzero(&ifp
->if_delegated
, sizeof (ifp
->if_delegated
));
6197 /* Reset interface state */
6198 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
6200 ifnet_lock_done(ifp
);
6202 lck_mtx_unlock(rnh_lock
);
6205 /* Release reference held on the delegated interface */
6206 if (delegated_ifp
!= NULL
)
6207 ifnet_release(delegated_ifp
);
6209 /* Reset Link Quality Metric (unless loopback [lo0]) */
6211 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
, 0);
6213 /* Reset TCP local statistics */
6214 if (ifp
->if_tcp_stat
!= NULL
)
6215 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
6217 /* Reset UDP local statistics */
6218 if (ifp
->if_udp_stat
!= NULL
)
6219 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
6221 /* Reset ifnet IPv4 stats */
6222 if (ifp
->if_ipv4_stat
!= NULL
)
6223 bzero(ifp
->if_ipv4_stat
, sizeof(*ifp
->if_ipv4_stat
));
6225 /* Reset ifnet IPv6 stats */
6226 if (ifp
->if_ipv6_stat
!= NULL
)
6227 bzero(ifp
->if_ipv6_stat
, sizeof(*ifp
->if_ipv6_stat
));
6229 /* Release memory held for interface link status report */
6230 if (ifp
->if_link_status
!= NULL
) {
6231 FREE(ifp
->if_link_status
, M_TEMP
);
6232 ifp
->if_link_status
= NULL
;
6235 /* Clear agent IDs */
6236 if (ifp
->if_agentids
!= NULL
) {
6237 FREE(ifp
->if_agentids
, M_NETAGENT
);
6238 ifp
->if_agentids
= NULL
;
6240 ifp
->if_agentcount
= 0;
6243 /* Let BPF know we're detaching */
6246 /* Mark the interface as DOWN */
6249 /* Disable forwarding cached route */
6250 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6251 ifp
->if_fwd_cacheok
= 0;
6252 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6254 /* Disable data threshold and wait for any pending event posting */
6255 ifp
->if_data_threshold
= 0;
6256 VERIFY(ifp
->if_dt_tcall
!= NULL
);
6257 (void) thread_call_cancel_wait(ifp
->if_dt_tcall
);
6260 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6261 * references to the info structures and leave them attached to
6265 igmp_domifdetach(ifp
);
6268 mld_domifdetach(ifp
);
6271 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
6273 /* Let worker thread take care of the rest, to avoid reentrancy */
6275 ifnet_detaching_enqueue(ifp
);
6282 ifnet_detaching_enqueue(struct ifnet
*ifp
)
6284 dlil_if_lock_assert();
6286 ++ifnet_detaching_cnt
;
6287 VERIFY(ifnet_detaching_cnt
!= 0);
6288 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6289 wakeup((caddr_t
)&ifnet_delayed_run
);
6292 static struct ifnet
*
6293 ifnet_detaching_dequeue(void)
6297 dlil_if_lock_assert();
6299 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
6300 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
6302 VERIFY(ifnet_detaching_cnt
!= 0);
6303 --ifnet_detaching_cnt
;
6304 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6305 ifp
->if_detaching_link
.tqe_next
= NULL
;
6306 ifp
->if_detaching_link
.tqe_prev
= NULL
;
6312 ifnet_detacher_thread_cont(int err
)
6318 dlil_if_lock_assert();
6319 while (ifnet_detaching_cnt
== 0) {
6320 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6321 (PZERO
- 1), "ifnet_detacher_cont", 0,
6322 ifnet_detacher_thread_cont
);
6326 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
6328 /* Take care of detaching ifnet */
6329 ifp
= ifnet_detaching_dequeue();
6332 ifnet_detach_final(ifp
);
6339 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
6341 #pragma unused(v, w)
6343 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6344 (PZERO
- 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont
);
6346 * msleep0() shouldn't have returned as PCATCH was not set;
6347 * therefore assert in this case.
6354 ifnet_detach_final(struct ifnet
*ifp
)
6356 struct ifnet_filter
*filter
, *filter_next
;
6357 struct ifnet_filter_head fhead
;
6358 struct dlil_threading_info
*inp
;
6360 ifnet_detached_func if_free
;
6363 lck_mtx_lock(&ifp
->if_ref_lock
);
6364 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6365 panic("%s: flags mismatch (detaching not set) ifp=%p",
6371 * Wait until the existing IO references get released
6372 * before we proceed with ifnet_detach. This is not a
6373 * common case, so block without using a continuation.
6375 while (ifp
->if_refio
> 0) {
6376 printf("%s: Waiting for IO references on %s interface "
6377 "to be released\n", __func__
, if_name(ifp
));
6378 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
6379 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
6381 lck_mtx_unlock(&ifp
->if_ref_lock
);
6383 /* Drain and destroy send queue */
6384 ifclassq_teardown(ifp
);
6386 /* Detach interface filters */
6387 lck_mtx_lock(&ifp
->if_flt_lock
);
6388 if_flt_monitor_enter(ifp
);
6390 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
6391 fhead
= ifp
->if_flt_head
;
6392 TAILQ_INIT(&ifp
->if_flt_head
);
6394 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
6395 filter_next
= TAILQ_NEXT(filter
, filt_next
);
6396 lck_mtx_unlock(&ifp
->if_flt_lock
);
6398 dlil_detach_filter_internal(filter
, 1);
6399 lck_mtx_lock(&ifp
->if_flt_lock
);
6401 if_flt_monitor_leave(ifp
);
6402 lck_mtx_unlock(&ifp
->if_flt_lock
);
6404 /* Tell upper layers to drop their network addresses */
6407 ifnet_lock_exclusive(ifp
);
6409 /* Uplumb all protocols */
6410 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
6411 struct if_proto
*proto
;
6413 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6414 while (proto
!= NULL
) {
6415 protocol_family_t family
= proto
->protocol_family
;
6416 ifnet_lock_done(ifp
);
6417 proto_unplumb(family
, ifp
);
6418 ifnet_lock_exclusive(ifp
);
6419 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6421 /* There should not be any protocols left */
6422 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
6424 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
6425 ifp
->if_proto_hash
= NULL
;
6427 /* Detach (permanent) link address from if_addrhead */
6428 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
6429 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
6431 if_detach_link_ifa(ifp
, ifa
);
6434 /* Remove (permanent) link address from ifnet_addrs[] */
6436 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
6438 /* This interface should not be on {ifnet_head,detaching} */
6439 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
6440 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
6441 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
6442 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
6443 VERIFY(ifp
->if_ordered_link
.tqe_next
== NULL
);
6444 VERIFY(ifp
->if_ordered_link
.tqe_prev
== NULL
);
6446 /* The slot should have been emptied */
6447 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
6449 /* There should not be any addresses left */
6450 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
6453 * Signal the starter thread to terminate itself.
6455 if (ifp
->if_start_thread
!= THREAD_NULL
) {
6456 lck_mtx_lock_spin(&ifp
->if_start_lock
);
6457 ifp
->if_start_flags
= 0;
6458 ifp
->if_start_thread
= THREAD_NULL
;
6459 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
6460 lck_mtx_unlock(&ifp
->if_start_lock
);
6464 * Signal the poller thread to terminate itself.
6466 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
6467 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
6468 ifp
->if_poll_thread
= THREAD_NULL
;
6469 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
6470 lck_mtx_unlock(&ifp
->if_poll_lock
);
6474 * If thread affinity was set for the workloop thread, we will need
6475 * to tear down the affinity and release the extra reference count
6476 * taken at attach time. Does not apply to lo0 or other interfaces
6477 * without dedicated input threads.
6479 if ((inp
= ifp
->if_inp
) != NULL
) {
6480 VERIFY(inp
!= dlil_main_input_thread
);
6482 if (inp
->net_affinity
) {
6483 struct thread
*tp
, *wtp
, *ptp
;
6485 lck_mtx_lock_spin(&inp
->input_lck
);
6486 wtp
= inp
->wloop_thr
;
6487 inp
->wloop_thr
= THREAD_NULL
;
6488 ptp
= inp
->poll_thr
;
6489 inp
->poll_thr
= THREAD_NULL
;
6490 tp
= inp
->input_thr
; /* don't nullify now */
6492 inp
->net_affinity
= FALSE
;
6493 lck_mtx_unlock(&inp
->input_lck
);
6495 /* Tear down poll thread affinity */
6497 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
6498 (void) dlil_affinity_set(ptp
,
6499 THREAD_AFFINITY_TAG_NULL
);
6500 thread_deallocate(ptp
);
6503 /* Tear down workloop thread affinity */
6505 (void) dlil_affinity_set(wtp
,
6506 THREAD_AFFINITY_TAG_NULL
);
6507 thread_deallocate(wtp
);
6510 /* Tear down DLIL input thread affinity */
6511 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
6512 thread_deallocate(tp
);
6515 /* disassociate ifp DLIL input thread */
6518 /* tell the input thread to terminate */
6519 lck_mtx_lock_spin(&inp
->input_lck
);
6520 inp
->input_waiting
|= DLIL_INPUT_TERMINATE
;
6521 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
6522 wakeup_one((caddr_t
)&inp
->input_waiting
);
6524 lck_mtx_unlock(&inp
->input_lck
);
6525 ifnet_lock_done(ifp
);
6527 /* wait for the input thread to terminate */
6528 lck_mtx_lock_spin(&inp
->input_lck
);
6529 while ((inp
->input_waiting
& DLIL_INPUT_TERMINATE_COMPLETE
)
6531 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
6532 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
6534 lck_mtx_unlock(&inp
->input_lck
);
6535 ifnet_lock_exclusive(ifp
);
6537 /* clean-up input thread state */
6538 dlil_clean_threading_info(inp
);
6542 /* The driver might unload, so point these to ourselves */
6543 if_free
= ifp
->if_free
;
6544 ifp
->if_output_dlil
= ifp_if_output
;
6545 ifp
->if_output
= ifp_if_output
;
6546 ifp
->if_pre_enqueue
= ifp_if_output
;
6547 ifp
->if_start
= ifp_if_start
;
6548 ifp
->if_output_ctl
= ifp_if_ctl
;
6549 ifp
->if_input_dlil
= ifp_if_input
;
6550 ifp
->if_input_poll
= ifp_if_input_poll
;
6551 ifp
->if_input_ctl
= ifp_if_ctl
;
6552 ifp
->if_ioctl
= ifp_if_ioctl
;
6553 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
6554 ifp
->if_free
= ifp_if_free
;
6555 ifp
->if_demux
= ifp_if_demux
;
6556 ifp
->if_event
= ifp_if_event
;
6557 ifp
->if_framer_legacy
= ifp_if_framer
;
6558 ifp
->if_framer
= ifp_if_framer_extended
;
6559 ifp
->if_add_proto
= ifp_if_add_proto
;
6560 ifp
->if_del_proto
= ifp_if_del_proto
;
6561 ifp
->if_check_multi
= ifp_if_check_multi
;
6563 /* wipe out interface description */
6564 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
6565 ifp
->if_desc
.ifd_len
= 0;
6566 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
6567 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
6569 /* there shouldn't be any delegation by now */
6570 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
6571 VERIFY(ifp
->if_delegated
.type
== 0);
6572 VERIFY(ifp
->if_delegated
.family
== 0);
6573 VERIFY(ifp
->if_delegated
.subfamily
== 0);
6574 VERIFY(ifp
->if_delegated
.expensive
== 0);
6576 /* QoS marking get cleared */
6577 ifp
->if_eflags
&= ~IFEF_QOSMARKING_ENABLED
;
6578 if_set_qosmarking_mode(ifp
, IFRTYPE_QOSMARKING_MODE_NONE
);
6581 ifnet_lock_done(ifp
);
6585 * Detach this interface from packet filter, if enabled.
6587 pf_ifnet_hook(ifp
, 0);
6590 /* Filter list should be empty */
6591 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
6592 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
6593 VERIFY(ifp
->if_flt_busy
== 0);
6594 VERIFY(ifp
->if_flt_waiters
== 0);
6595 lck_mtx_unlock(&ifp
->if_flt_lock
);
6597 /* Last chance to drain send queue */
6600 /* Last chance to cleanup any cached route */
6601 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6602 VERIFY(!ifp
->if_fwd_cacheok
);
6603 ROUTE_RELEASE(&ifp
->if_fwd_route
);
6604 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
6605 ROUTE_RELEASE(&ifp
->if_src_route
);
6606 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
6607 ROUTE_RELEASE(&ifp
->if_src_route6
);
6608 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
6609 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6611 VERIFY(ifp
->if_data_threshold
== 0);
6612 VERIFY(ifp
->if_dt_tcall
!= NULL
);
6613 VERIFY(!thread_call_isactive(ifp
->if_dt_tcall
));
6615 ifnet_llreach_ifdetach(ifp
);
6617 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
6620 * Finally, mark this ifnet as detached.
6622 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6623 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6624 panic("%s: flags mismatch (detaching not set) ifp=%p",
6628 ifp
->if_refflags
&= ~IFRF_DETACHING
;
6629 lck_mtx_unlock(&ifp
->if_ref_lock
);
6630 if (if_free
!= NULL
)
6634 printf("%s: detached\n", if_name(ifp
));
6636 /* Release reference held during ifnet attach */
6641 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
6649 ifp_if_start(struct ifnet
*ifp
)
6655 ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
6656 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
6657 boolean_t poll
, struct thread
*tp
)
6659 #pragma unused(ifp, m_tail, s, poll, tp)
6660 m_freem_list(m_head
);
6665 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
6666 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
6668 #pragma unused(ifp, flags, max_cnt)
6680 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
6682 #pragma unused(ifp, cmd, arglen, arg)
6683 return (EOPNOTSUPP
);
6687 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
6689 #pragma unused(ifp, fh, pf)
6691 return (EJUSTRETURN
);
6695 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
6696 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
6698 #pragma unused(ifp, pf, da, dc)
6703 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
6705 #pragma unused(ifp, pf)
6710 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
6712 #pragma unused(ifp, sa)
6713 return (EOPNOTSUPP
);
6718 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
6719 const struct sockaddr
*sa
, const char *ll
, const char *t
,
6720 u_int32_t
*pre
, u_int32_t
*post
)
6723 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
6724 const struct sockaddr
*sa
, const char *ll
, const char *t
)
6725 #endif /* !CONFIG_EMBEDDED */
6727 #pragma unused(ifp, m, sa, ll, t)
6729 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, pre
, post
));
6731 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
));
6732 #endif /* !CONFIG_EMBEDDED */
6736 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
6737 const struct sockaddr
*sa
, const char *ll
, const char *t
,
6738 u_int32_t
*pre
, u_int32_t
*post
)
6740 #pragma unused(ifp, sa, ll, t)
6749 return (EJUSTRETURN
);
6753 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
6755 #pragma unused(ifp, cmd, arg)
6756 return (EOPNOTSUPP
);
6760 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
6762 #pragma unused(ifp, tm, f)
6763 /* XXX not sure what to do here */
6768 ifp_if_free(struct ifnet
*ifp
)
6774 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
6776 #pragma unused(ifp, e)
6779 int dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
6780 size_t uniqueid_len
, const char *ifxname
, struct ifnet
**ifp
)
6782 struct ifnet
*ifp1
= NULL
;
6783 struct dlil_ifnet
*dlifp1
= NULL
;
6784 void *buf
, *base
, **pbuf
;
6787 VERIFY(*ifp
== NULL
);
6790 * We absolutely can't have an interface with the same name
6792 * To make sure of that list has to be traversed completely
6794 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
6795 ifp1
= (struct ifnet
*)dlifp1
;
6797 if (ifp1
->if_family
!= family
)
6801 * If interface is in use, return EBUSY if either unique id
6802 * or interface extended names are the same
6804 lck_mtx_lock(&dlifp1
->dl_if_lock
);
6805 if (strncmp(ifxname
, ifp1
->if_xname
, IFXNAMSIZ
) == 0) {
6806 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
6807 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6814 if (uniqueid_len
== dlifp1
->dl_if_uniqueid_len
&&
6815 bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
) == 0) {
6816 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
6817 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6821 dlifp1
->dl_if_flags
|= (DLIF_INUSE
|DLIF_REUSE
);
6822 /* Cache the first interface that can be recycled */
6826 * XXX Do not break or jump to end as we have to traverse
6827 * the whole list to ensure there are no name collisions
6832 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6835 /* If there's an interface that can be recycled, use that */
6839 /* no interface found, allocate a new one */
6840 buf
= zalloc(dlif_zone
);
6845 bzero(buf
, dlif_bufsize
);
6847 /* Get the 64-bit aligned base address for this object */
6848 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
6849 sizeof (u_int64_t
));
6850 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
6853 * Wind back a pointer size from the aligned base and
6854 * save the original address so we can free it later.
6856 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
6861 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
6863 if (dlifp1
->dl_if_uniqueid
== NULL
) {
6864 zfree(dlif_zone
, buf
);
6868 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
6869 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
6872 ifp1
= (struct ifnet
*)dlifp1
;
6873 dlifp1
->dl_if_flags
= DLIF_INUSE
;
6875 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
6876 dlifp1
->dl_if_trace
= dlil_if_trace
;
6878 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
6879 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
6881 /* initialize interface description */
6882 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
6883 ifp1
->if_desc
.ifd_len
= 0;
6884 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
6888 mac_ifnet_label_init(ifp1
);
6891 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
6892 DLIL_PRINTF("%s: failed to allocate if local stats, "
6893 "error: %d\n", __func__
, ret
);
6894 /* This probably shouldn't be fatal */
6898 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6899 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6900 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6901 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6902 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
6904 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6906 lck_rw_init(&ifp1
->if_inetdata_lock
, ifnet_lock_group
,
6908 ifp1
->if_inetdata
= NULL
;
6911 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
,
6913 ifp1
->if_inet6data
= NULL
;
6915 lck_rw_init(&ifp1
->if_link_status_lock
, ifnet_lock_group
,
6917 ifp1
->if_link_status
= NULL
;
6919 /* for send data paths */
6920 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
6922 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
6924 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
6927 /* for receive data paths */
6928 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
6931 /* thread call allocation is done with sleeping zalloc */
6932 ifp1
->if_dt_tcall
= thread_call_allocate_with_options(dlil_dt_tcall_fn
,
6933 ifp1
, THREAD_CALL_PRIORITY_KERNEL
, THREAD_CALL_OPTIONS_ONCE
);
6934 if (ifp1
->if_dt_tcall
== NULL
) {
6935 panic_plain("%s: couldn't create if_dt_tcall", __func__
);
6939 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
6946 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof (u_int64_t
)) &&
6947 IS_P2ALIGNED(&ifp1
->if_data
, sizeof (u_int64_t
))));
6952 __private_extern__
void
6953 dlil_if_release(ifnet_t ifp
)
6955 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
6957 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_count
) > 0);
6958 if (!(ifp
->if_xflags
& IFXF_ALLOC_KPI
)) {
6959 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_os_count
) > 0);
6962 ifnet_lock_exclusive(ifp
);
6963 lck_mtx_lock(&dlifp
->dl_if_lock
);
6964 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
6965 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
6966 ifp
->if_name
= dlifp
->dl_if_namestorage
;
6967 /* Reset external name (name + unit) */
6968 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
6969 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
6970 "%s?", ifp
->if_name
);
6971 lck_mtx_unlock(&dlifp
->dl_if_lock
);
6974 * We can either recycle the MAC label here or in dlil_if_acquire().
6975 * It seems logical to do it here but this means that anything that
6976 * still has a handle on ifp will now see it as unlabeled.
6977 * Since the interface is "dead" that may be OK. Revisit later.
6979 mac_ifnet_label_recycle(ifp
);
6981 ifnet_lock_done(ifp
);
6984 __private_extern__
void
6987 lck_mtx_lock(&dlil_ifnet_lock
);
6990 __private_extern__
void
6991 dlil_if_unlock(void)
6993 lck_mtx_unlock(&dlil_ifnet_lock
);
6996 __private_extern__
void
6997 dlil_if_lock_assert(void)
6999 LCK_MTX_ASSERT(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
7002 __private_extern__
void
7003 dlil_proto_unplumb_all(struct ifnet
*ifp
)
7006 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
7007 * each bucket contains exactly one entry; PF_VLAN does not need an
7010 * if_proto_hash[3] is for other protocols; we expect anything
7011 * in this bucket to respond to the DETACHING event (which would
7012 * have happened by now) and do the unplumb then.
7014 (void) proto_unplumb(PF_INET
, ifp
);
7016 (void) proto_unplumb(PF_INET6
, ifp
);
7021 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
7023 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7024 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7026 route_copyout(dst
, &ifp
->if_src_route
, sizeof (*dst
));
7028 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7032 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
7034 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7035 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7037 if (ifp
->if_fwd_cacheok
) {
7038 route_copyin(src
, &ifp
->if_src_route
, sizeof (*src
));
7042 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7047 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
7049 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7050 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7052 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
7055 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7059 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
7061 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7062 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7064 if (ifp
->if_fwd_cacheok
) {
7065 route_copyin((struct route
*)src
,
7066 (struct route
*)&ifp
->if_src_route6
, sizeof (*src
));
7070 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7075 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
7077 struct route src_rt
;
7078 struct sockaddr_in
*dst
;
7080 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
7082 ifp_src_route_copyout(ifp
, &src_rt
);
7084 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
7085 ROUTE_RELEASE(&src_rt
);
7086 if (dst
->sin_family
!= AF_INET
) {
7087 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
7088 dst
->sin_len
= sizeof (src_rt
.ro_dst
);
7089 dst
->sin_family
= AF_INET
;
7091 dst
->sin_addr
= src_ip
;
7093 VERIFY(src_rt
.ro_rt
== NULL
);
7094 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
7095 0, 0, ifp
->if_index
);
7097 if (src_rt
.ro_rt
!= NULL
) {
7098 /* retain a ref, copyin consumes one */
7099 struct rtentry
*rte
= src_rt
.ro_rt
;
7101 ifp_src_route_copyin(ifp
, &src_rt
);
7106 return (src_rt
.ro_rt
);
7111 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
7113 struct route_in6 src_rt
;
7115 ifp_src_route6_copyout(ifp
, &src_rt
);
7117 if (ROUTE_UNUSABLE(&src_rt
) ||
7118 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
7119 ROUTE_RELEASE(&src_rt
);
7120 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
7121 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
7122 src_rt
.ro_dst
.sin6_len
= sizeof (src_rt
.ro_dst
);
7123 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
7125 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
7126 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
7127 sizeof (src_rt
.ro_dst
.sin6_addr
));
7129 if (src_rt
.ro_rt
== NULL
) {
7130 src_rt
.ro_rt
= rtalloc1_scoped(
7131 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
7134 if (src_rt
.ro_rt
!= NULL
) {
7135 /* retain a ref, copyin consumes one */
7136 struct rtentry
*rte
= src_rt
.ro_rt
;
7138 ifp_src_route6_copyin(ifp
, &src_rt
);
7144 return (src_rt
.ro_rt
);
7149 if_lqm_update(struct ifnet
*ifp
, int lqm
, int locked
)
7151 struct kev_dl_link_quality_metric_data ev_lqm_data
;
7153 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
7155 /* Normalize to edge */
7156 if (lqm
>= 0 && lqm
<= IFNET_LQM_THRESH_ABORT
) {
7157 lqm
= IFNET_LQM_THRESH_ABORT
;
7158 atomic_bitset_32(&tcbinfo
.ipi_flags
,
7159 INPCBINFO_HANDLE_LQM_ABORT
);
7160 inpcb_timer_sched(&tcbinfo
, INPCB_TIMER_FAST
);
7161 } else if (lqm
> IFNET_LQM_THRESH_ABORT
&&
7162 lqm
<= IFNET_LQM_THRESH_MINIMALLY_VIABLE
) {
7163 lqm
= IFNET_LQM_THRESH_MINIMALLY_VIABLE
;
7164 } else if (lqm
> IFNET_LQM_THRESH_MINIMALLY_VIABLE
&&
7165 lqm
<= IFNET_LQM_THRESH_POOR
) {
7166 lqm
= IFNET_LQM_THRESH_POOR
;
7167 } else if (lqm
> IFNET_LQM_THRESH_POOR
&&
7168 lqm
<= IFNET_LQM_THRESH_GOOD
) {
7169 lqm
= IFNET_LQM_THRESH_GOOD
;
7173 * Take the lock if needed
7176 ifnet_lock_exclusive(ifp
);
7178 if (lqm
== ifp
->if_interface_state
.lqm_state
&&
7179 (ifp
->if_interface_state
.valid_bitmask
&
7180 IF_INTERFACE_STATE_LQM_STATE_VALID
)) {
7182 * Release the lock if was not held by the caller
7185 ifnet_lock_done(ifp
);
7186 return; /* nothing to update */
7188 ifp
->if_interface_state
.valid_bitmask
|=
7189 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7190 ifp
->if_interface_state
.lqm_state
= lqm
;
7193 * Don't want to hold the lock when issuing kernel events
7195 ifnet_lock_done(ifp
);
7197 bzero(&ev_lqm_data
, sizeof (ev_lqm_data
));
7198 ev_lqm_data
.link_quality_metric
= lqm
;
7200 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
7201 (struct net_event_data
*)&ev_lqm_data
, sizeof (ev_lqm_data
));
7204 * Reacquire the lock for the caller
7207 ifnet_lock_exclusive(ifp
);
7211 if_rrc_state_update(struct ifnet
*ifp
, unsigned int rrc_state
)
7213 struct kev_dl_rrc_state kev
;
7215 if (rrc_state
== ifp
->if_interface_state
.rrc_state
&&
7216 (ifp
->if_interface_state
.valid_bitmask
&
7217 IF_INTERFACE_STATE_RRC_STATE_VALID
))
7220 ifp
->if_interface_state
.valid_bitmask
|=
7221 IF_INTERFACE_STATE_RRC_STATE_VALID
;
7223 ifp
->if_interface_state
.rrc_state
= rrc_state
;
7226 * Don't want to hold the lock when issuing kernel events
7228 ifnet_lock_done(ifp
);
7230 bzero(&kev
, sizeof(struct kev_dl_rrc_state
));
7231 kev
.rrc_state
= rrc_state
;
7233 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_RRC_STATE_CHANGED
,
7234 (struct net_event_data
*)&kev
, sizeof(struct kev_dl_rrc_state
));
7236 ifnet_lock_exclusive(ifp
);
7240 if_state_update(struct ifnet
*ifp
,
7241 struct if_interface_state
*if_interface_state
)
7243 u_short if_index_available
= 0;
7245 ifnet_lock_exclusive(ifp
);
7247 if ((ifp
->if_type
!= IFT_CELLULAR
) &&
7248 (if_interface_state
->valid_bitmask
&
7249 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
7250 ifnet_lock_done(ifp
);
7253 if ((if_interface_state
->valid_bitmask
&
7254 IF_INTERFACE_STATE_LQM_STATE_VALID
) &&
7255 (if_interface_state
->lqm_state
< IFNET_LQM_MIN
||
7256 if_interface_state
->lqm_state
> IFNET_LQM_MAX
)) {
7257 ifnet_lock_done(ifp
);
7260 if ((if_interface_state
->valid_bitmask
&
7261 IF_INTERFACE_STATE_RRC_STATE_VALID
) &&
7262 if_interface_state
->rrc_state
!=
7263 IF_INTERFACE_STATE_RRC_STATE_IDLE
&&
7264 if_interface_state
->rrc_state
!=
7265 IF_INTERFACE_STATE_RRC_STATE_CONNECTED
) {
7266 ifnet_lock_done(ifp
);
7270 if (if_interface_state
->valid_bitmask
&
7271 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
7272 if_lqm_update(ifp
, if_interface_state
->lqm_state
, 1);
7274 if (if_interface_state
->valid_bitmask
&
7275 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
7276 if_rrc_state_update(ifp
, if_interface_state
->rrc_state
);
7278 if (if_interface_state
->valid_bitmask
&
7279 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
7280 ifp
->if_interface_state
.valid_bitmask
|=
7281 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7282 ifp
->if_interface_state
.interface_availability
=
7283 if_interface_state
->interface_availability
;
7285 if (ifp
->if_interface_state
.interface_availability
==
7286 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
) {
7287 if_index_available
= ifp
->if_index
;
7290 ifnet_lock_done(ifp
);
7293 * Check if the TCP connections going on this interface should be
7294 * forced to send probe packets instead of waiting for TCP timers
7295 * to fire. This will be done when there is an explicit
7296 * notification that the interface became available.
7298 if (if_index_available
> 0)
7299 tcp_interface_send_probe(if_index_available
);
7305 if_get_state(struct ifnet
*ifp
,
7306 struct if_interface_state
*if_interface_state
)
7308 ifnet_lock_shared(ifp
);
7310 if_interface_state
->valid_bitmask
= 0;
7312 if (ifp
->if_interface_state
.valid_bitmask
&
7313 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
7314 if_interface_state
->valid_bitmask
|=
7315 IF_INTERFACE_STATE_RRC_STATE_VALID
;
7316 if_interface_state
->rrc_state
=
7317 ifp
->if_interface_state
.rrc_state
;
7319 if (ifp
->if_interface_state
.valid_bitmask
&
7320 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
7321 if_interface_state
->valid_bitmask
|=
7322 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7323 if_interface_state
->lqm_state
=
7324 ifp
->if_interface_state
.lqm_state
;
7326 if (ifp
->if_interface_state
.valid_bitmask
&
7327 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
7328 if_interface_state
->valid_bitmask
|=
7329 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7330 if_interface_state
->interface_availability
=
7331 ifp
->if_interface_state
.interface_availability
;
7334 ifnet_lock_done(ifp
);
7338 if_probe_connectivity(struct ifnet
*ifp
, u_int32_t conn_probe
)
7340 ifnet_lock_exclusive(ifp
);
7341 if (conn_probe
> 1) {
7342 ifnet_lock_done(ifp
);
7345 if (conn_probe
== 0)
7346 ifp
->if_eflags
&= ~IFEF_PROBE_CONNECTIVITY
;
7348 ifp
->if_eflags
|= IFEF_PROBE_CONNECTIVITY
;
7349 ifnet_lock_done(ifp
);
7352 necp_update_all_clients();
7355 tcp_probe_connectivity(ifp
, conn_probe
);
7361 uuid_get_ethernet(u_int8_t
*node
)
7364 struct sockaddr_dl
*sdl
;
7366 ifnet_head_lock_shared();
7367 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
7368 ifnet_lock_shared(ifp
);
7369 IFA_LOCK_SPIN(ifp
->if_lladdr
);
7370 sdl
= (struct sockaddr_dl
*)(void *)ifp
->if_lladdr
->ifa_addr
;
7371 if (sdl
->sdl_type
== IFT_ETHER
) {
7372 memcpy(node
, LLADDR(sdl
), ETHER_ADDR_LEN
);
7373 IFA_UNLOCK(ifp
->if_lladdr
);
7374 ifnet_lock_done(ifp
);
7378 IFA_UNLOCK(ifp
->if_lladdr
);
7379 ifnet_lock_done(ifp
);
7387 sysctl_rxpoll SYSCTL_HANDLER_ARGS
7389 #pragma unused(arg1, arg2)
7395 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7396 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7399 if (net_rxpoll
== 0)
7407 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7409 #pragma unused(arg1, arg2)
7413 q
= if_rxpoll_mode_holdtime
;
7415 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7416 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7419 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
)
7420 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
7422 if_rxpoll_mode_holdtime
= q
;
7428 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7430 #pragma unused(arg1, arg2)
7434 q
= if_rxpoll_sample_holdtime
;
7436 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7437 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7440 if (q
< IF_RXPOLL_SAMPLETIME_MIN
)
7441 q
= IF_RXPOLL_SAMPLETIME_MIN
;
7443 if_rxpoll_sample_holdtime
= q
;
7449 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7451 #pragma unused(arg1, arg2)
7455 q
= if_rxpoll_interval_time
;
7457 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7458 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7461 if (q
< IF_RXPOLL_INTERVALTIME_MIN
)
7462 q
= IF_RXPOLL_INTERVALTIME_MIN
;
7464 if_rxpoll_interval_time
= q
;
7470 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7472 #pragma unused(arg1, arg2)
7476 i
= if_rxpoll_wlowat
;
7478 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7479 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7482 if (i
== 0 || i
>= if_rxpoll_whiwat
)
7485 if_rxpoll_wlowat
= i
;
7490 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7492 #pragma unused(arg1, arg2)
7496 i
= if_rxpoll_whiwat
;
7498 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7499 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7502 if (i
<= if_rxpoll_wlowat
)
7505 if_rxpoll_whiwat
= i
;
7510 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
7512 #pragma unused(arg1, arg2)
7517 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7518 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7521 if (i
< IF_SNDQ_MINLEN
)
7529 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
7531 #pragma unused(arg1, arg2)
7536 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7537 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7540 if (i
< IF_RCVQ_MINLEN
)
7548 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
7549 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
7551 struct kev_dl_node_presence kev
;
7552 struct sockaddr_dl
*sdl
;
7553 struct sockaddr_in6
*sin6
;
7557 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
7559 bzero(&kev
, sizeof (kev
));
7560 sin6
= &kev
.sin6_node_address
;
7561 sdl
= &kev
.sdl_node_address
;
7562 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
7564 kev
.link_quality_metric
= lqm
;
7565 kev
.node_proximity_metric
= npm
;
7566 bcopy(srvinfo
, kev
.node_service_info
, sizeof (kev
.node_service_info
));
7568 nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
7569 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
7570 &kev
.link_data
, sizeof (kev
));
7574 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
7576 struct kev_dl_node_absence kev
;
7577 struct sockaddr_in6
*sin6
;
7578 struct sockaddr_dl
*sdl
;
7582 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
7584 bzero(&kev
, sizeof (kev
));
7585 sin6
= &kev
.sin6_node_address
;
7586 sdl
= &kev
.sdl_node_address
;
7587 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
7589 nd6_alt_node_absent(ifp
, sin6
);
7590 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
7591 &kev
.link_data
, sizeof (kev
));
7595 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
7596 kauth_cred_t
*credp
)
7598 const u_int8_t
*bytes
;
7601 bytes
= CONST_LLADDR(sdl
);
7602 size
= sdl
->sdl_alen
;
7605 if (dlil_lladdr_ckreq
) {
7606 switch (sdl
->sdl_type
) {
7615 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
7616 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
7624 #pragma unused(credp)
7627 if (sizep
!= NULL
) *sizep
= size
;
7632 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
7633 u_int8_t info
[DLIL_MODARGLEN
])
7635 struct kev_dl_issues kev
;
7638 VERIFY(ifp
!= NULL
);
7639 VERIFY(modid
!= NULL
);
7640 _CASSERT(sizeof (kev
.modid
) == DLIL_MODIDLEN
);
7641 _CASSERT(sizeof (kev
.info
) == DLIL_MODARGLEN
);
7643 bzero(&kev
, sizeof (kev
));
7646 kev
.timestamp
= tv
.tv_sec
;
7647 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
7649 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
7651 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
7652 &kev
.link_data
, sizeof (kev
));
7656 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7659 u_int32_t level
= IFNET_THROTTLE_OFF
;
7662 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
7664 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
7666 * XXX: Use priv_check_cred() instead of root check?
7668 if ((result
= proc_suser(p
)) != 0)
7671 if (ifr
->ifr_opportunistic
.ifo_flags
==
7672 IFRIFOF_BLOCK_OPPORTUNISTIC
)
7673 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
7674 else if (ifr
->ifr_opportunistic
.ifo_flags
== 0)
7675 level
= IFNET_THROTTLE_OFF
;
7680 result
= ifnet_set_throttle(ifp
, level
);
7681 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
7682 ifr
->ifr_opportunistic
.ifo_flags
= 0;
7683 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
7684 ifr
->ifr_opportunistic
.ifo_flags
|=
7685 IFRIFOF_BLOCK_OPPORTUNISTIC
;
7690 * Return the count of current opportunistic connections
7691 * over the interface.
7695 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
7696 INPCB_OPPORTUNISTIC_SETCMD
: 0;
7697 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
7698 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
7699 ifr
->ifr_opportunistic
.ifo_inuse
=
7700 udp_count_opportunistic(ifp
->if_index
, flags
) +
7701 tcp_count_opportunistic(ifp
->if_index
, flags
);
7704 if (result
== EALREADY
)
7711 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
7713 struct ifclassq
*ifq
;
7716 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7719 *level
= IFNET_THROTTLE_OFF
;
7723 /* Throttling works only for IFCQ, not ALTQ instances */
7724 if (IFCQ_IS_ENABLED(ifq
))
7725 IFCQ_GET_THROTTLE(ifq
, *level
, err
);
7732 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
7734 struct ifclassq
*ifq
;
7737 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7743 case IFNET_THROTTLE_OFF
:
7744 case IFNET_THROTTLE_OPPORTUNISTIC
:
7751 if (IFCQ_IS_ENABLED(ifq
))
7752 IFCQ_SET_THROTTLE(ifq
, level
, err
);
7756 printf("%s: throttling level set to %d\n", if_name(ifp
),
7758 if (level
== IFNET_THROTTLE_OFF
)
7766 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7772 int level
, category
, subcategory
;
7774 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
7776 if (cmd
== SIOCSIFLOG
) {
7777 if ((result
= priv_check_cred(kauth_cred_get(),
7778 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0)
7781 level
= ifr
->ifr_log
.ifl_level
;
7782 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
)
7785 flags
= ifr
->ifr_log
.ifl_flags
;
7786 if ((flags
&= IFNET_LOGF_MASK
) == 0)
7789 category
= ifr
->ifr_log
.ifl_category
;
7790 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
7793 result
= ifnet_set_log(ifp
, level
, flags
,
7794 category
, subcategory
);
7796 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
7799 ifr
->ifr_log
.ifl_level
= level
;
7800 ifr
->ifr_log
.ifl_flags
= flags
;
7801 ifr
->ifr_log
.ifl_category
= category
;
7802 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
7810 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
7811 int32_t category
, int32_t subcategory
)
7815 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
7816 VERIFY(flags
& IFNET_LOGF_MASK
);
7819 * The logging level applies to all facilities; make sure to
7820 * update them all with the most current level.
7822 flags
|= ifp
->if_log
.flags
;
7824 if (ifp
->if_output_ctl
!= NULL
) {
7825 struct ifnet_log_params l
;
7827 bzero(&l
, sizeof (l
));
7830 l
.flags
&= ~IFNET_LOGF_DLIL
;
7831 l
.category
= category
;
7832 l
.subcategory
= subcategory
;
7834 /* Send this request to lower layers */
7836 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
7839 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
7841 * If targeted to the lower layers without an output
7842 * control callback registered on the interface, just
7843 * silently ignore facilities other than ours.
7845 flags
&= IFNET_LOGF_DLIL
;
7846 if (flags
== 0 && (!(ifp
->if_log
.flags
& IFNET_LOGF_DLIL
)))
7851 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
)
7852 ifp
->if_log
.flags
= 0;
7854 ifp
->if_log
.flags
|= flags
;
7856 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
7857 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
7858 ifp
->if_log
.level
, ifp
->if_log
.flags
,
7859 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
7860 category
, subcategory
);
7867 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
7868 int32_t *category
, int32_t *subcategory
)
7871 *level
= ifp
->if_log
.level
;
7873 *flags
= ifp
->if_log
.flags
;
7874 if (category
!= NULL
)
7875 *category
= ifp
->if_log
.category
;
7876 if (subcategory
!= NULL
)
7877 *subcategory
= ifp
->if_log
.subcategory
;
7883 ifnet_notify_address(struct ifnet
*ifp
, int af
)
7885 struct ifnet_notify_address_params na
;
7888 (void) pf_ifaddr_hook(ifp
);
7891 if (ifp
->if_output_ctl
== NULL
)
7892 return (EOPNOTSUPP
);
7894 bzero(&na
, sizeof (na
));
7895 na
.address_family
= af
;
7897 return (ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
7902 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
7904 if (ifp
== NULL
|| flowid
== NULL
) {
7906 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7907 !IF_FULLY_ATTACHED(ifp
)) {
7911 *flowid
= ifp
->if_flowhash
;
7917 ifnet_disable_output(struct ifnet
*ifp
)
7923 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7924 !IF_FULLY_ATTACHED(ifp
)) {
7928 if ((err
= ifnet_fc_add(ifp
)) == 0) {
7929 lck_mtx_lock_spin(&ifp
->if_start_lock
);
7930 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
7931 lck_mtx_unlock(&ifp
->if_start_lock
);
7937 ifnet_enable_output(struct ifnet
*ifp
)
7941 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7942 !IF_FULLY_ATTACHED(ifp
)) {
7946 ifnet_start_common(ifp
, TRUE
);
7951 ifnet_flowadv(uint32_t flowhash
)
7953 struct ifnet_fc_entry
*ifce
;
7956 ifce
= ifnet_fc_get(flowhash
);
7960 VERIFY(ifce
->ifce_ifp
!= NULL
);
7961 ifp
= ifce
->ifce_ifp
;
7963 /* flow hash gets recalculated per attach, so check */
7964 if (ifnet_is_attached(ifp
, 1)) {
7965 if (ifp
->if_flowhash
== flowhash
)
7966 (void) ifnet_enable_output(ifp
);
7967 ifnet_decr_iorefcnt(ifp
);
7969 ifnet_fc_entry_free(ifce
);
7973 * Function to compare ifnet_fc_entries in ifnet flow control tree
7976 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
7978 return (fc1
->ifce_flowhash
- fc2
->ifce_flowhash
);
7982 ifnet_fc_add(struct ifnet
*ifp
)
7984 struct ifnet_fc_entry keyfc
, *ifce
;
7987 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
7988 VERIFY(ifp
->if_flowhash
!= 0);
7989 flowhash
= ifp
->if_flowhash
;
7991 bzero(&keyfc
, sizeof (keyfc
));
7992 keyfc
.ifce_flowhash
= flowhash
;
7994 lck_mtx_lock_spin(&ifnet_fc_lock
);
7995 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
7996 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
7997 /* Entry is already in ifnet_fc_tree, return */
7998 lck_mtx_unlock(&ifnet_fc_lock
);
8004 * There is a different fc entry with the same flow hash
8005 * but different ifp pointer. There can be a collision
8006 * on flow hash but the probability is low. Let's just
8007 * avoid adding a second one when there is a collision.
8009 lck_mtx_unlock(&ifnet_fc_lock
);
8013 /* become regular mutex */
8014 lck_mtx_convert_spin(&ifnet_fc_lock
);
8016 ifce
= zalloc(ifnet_fc_zone
);
8018 /* memory allocation failed */
8019 lck_mtx_unlock(&ifnet_fc_lock
);
8022 bzero(ifce
, ifnet_fc_zone_size
);
8024 ifce
->ifce_flowhash
= flowhash
;
8025 ifce
->ifce_ifp
= ifp
;
8027 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
8028 lck_mtx_unlock(&ifnet_fc_lock
);
8032 static struct ifnet_fc_entry
*
8033 ifnet_fc_get(uint32_t flowhash
)
8035 struct ifnet_fc_entry keyfc
, *ifce
;
8038 bzero(&keyfc
, sizeof (keyfc
));
8039 keyfc
.ifce_flowhash
= flowhash
;
8041 lck_mtx_lock_spin(&ifnet_fc_lock
);
8042 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
8044 /* Entry is not present in ifnet_fc_tree, return */
8045 lck_mtx_unlock(&ifnet_fc_lock
);
8049 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
8051 VERIFY(ifce
->ifce_ifp
!= NULL
);
8052 ifp
= ifce
->ifce_ifp
;
8054 /* become regular mutex */
8055 lck_mtx_convert_spin(&ifnet_fc_lock
);
8057 if (!ifnet_is_attached(ifp
, 0)) {
8059 * This ifp is not attached or in the process of being
8060 * detached; just don't process it.
8062 ifnet_fc_entry_free(ifce
);
8065 lck_mtx_unlock(&ifnet_fc_lock
);
8071 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
8073 zfree(ifnet_fc_zone
, ifce
);
8077 ifnet_calc_flowhash(struct ifnet
*ifp
)
8079 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
8080 uint32_t flowhash
= 0;
8082 if (ifnet_flowhash_seed
== 0)
8083 ifnet_flowhash_seed
= RandomULong();
8085 bzero(&fh
, sizeof (fh
));
8087 (void) snprintf(fh
.ifk_name
, sizeof (fh
.ifk_name
), "%s", ifp
->if_name
);
8088 fh
.ifk_unit
= ifp
->if_unit
;
8089 fh
.ifk_flags
= ifp
->if_flags
;
8090 fh
.ifk_eflags
= ifp
->if_eflags
;
8091 fh
.ifk_capabilities
= ifp
->if_capabilities
;
8092 fh
.ifk_capenable
= ifp
->if_capenable
;
8093 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
8094 fh
.ifk_rand1
= RandomULong();
8095 fh
.ifk_rand2
= RandomULong();
8098 flowhash
= net_flowhash(&fh
, sizeof (fh
), ifnet_flowhash_seed
);
8099 if (flowhash
== 0) {
8100 /* try to get a non-zero flowhash */
8101 ifnet_flowhash_seed
= RandomULong();
8109 ifnet_set_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t len
,
8110 uint16_t flags
, uint8_t *data
)
8112 #pragma unused(flags)
8117 if_inetdata_lock_exclusive(ifp
);
8118 if (IN_IFEXTRA(ifp
) != NULL
) {
8120 /* Allow clearing the signature */
8121 IN_IFEXTRA(ifp
)->netsig_len
= 0;
8122 bzero(IN_IFEXTRA(ifp
)->netsig
,
8123 sizeof (IN_IFEXTRA(ifp
)->netsig
));
8124 if_inetdata_lock_done(ifp
);
8126 } else if (len
> sizeof (IN_IFEXTRA(ifp
)->netsig
)) {
8128 if_inetdata_lock_done(ifp
);
8131 IN_IFEXTRA(ifp
)->netsig_len
= len
;
8132 bcopy(data
, IN_IFEXTRA(ifp
)->netsig
, len
);
8136 if_inetdata_lock_done(ifp
);
8140 if_inet6data_lock_exclusive(ifp
);
8141 if (IN6_IFEXTRA(ifp
) != NULL
) {
8143 /* Allow clearing the signature */
8144 IN6_IFEXTRA(ifp
)->netsig_len
= 0;
8145 bzero(IN6_IFEXTRA(ifp
)->netsig
,
8146 sizeof (IN6_IFEXTRA(ifp
)->netsig
));
8147 if_inet6data_lock_done(ifp
);
8149 } else if (len
> sizeof (IN6_IFEXTRA(ifp
)->netsig
)) {
8151 if_inet6data_lock_done(ifp
);
8154 IN6_IFEXTRA(ifp
)->netsig_len
= len
;
8155 bcopy(data
, IN6_IFEXTRA(ifp
)->netsig
, len
);
8159 if_inet6data_lock_done(ifp
);
8171 ifnet_get_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t *len
,
8172 uint16_t *flags
, uint8_t *data
)
8176 if (ifp
== NULL
|| len
== NULL
|| data
== NULL
)
8181 if_inetdata_lock_shared(ifp
);
8182 if (IN_IFEXTRA(ifp
) != NULL
) {
8183 if (*len
== 0 || *len
< IN_IFEXTRA(ifp
)->netsig_len
) {
8185 if_inetdata_lock_done(ifp
);
8188 if ((*len
= IN_IFEXTRA(ifp
)->netsig_len
) > 0)
8189 bcopy(IN_IFEXTRA(ifp
)->netsig
, data
, *len
);
8195 if_inetdata_lock_done(ifp
);
8199 if_inet6data_lock_shared(ifp
);
8200 if (IN6_IFEXTRA(ifp
) != NULL
) {
8201 if (*len
== 0 || *len
< IN6_IFEXTRA(ifp
)->netsig_len
) {
8203 if_inet6data_lock_done(ifp
);
8206 if ((*len
= IN6_IFEXTRA(ifp
)->netsig_len
) > 0)
8207 bcopy(IN6_IFEXTRA(ifp
)->netsig
, data
, *len
);
8213 if_inet6data_lock_done(ifp
);
8221 if (error
== 0 && flags
!= NULL
)
8229 ifnet_set_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
8231 int i
, error
= 0, one_set
= 0;
8233 if_inet6data_lock_exclusive(ifp
);
8235 if (IN6_IFEXTRA(ifp
) == NULL
) {
8240 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
8241 uint32_t prefix_len
=
8242 prefixes
[i
].prefix_len
;
8243 struct in6_addr
*prefix
=
8244 &prefixes
[i
].ipv6_prefix
;
8246 if (prefix_len
== 0) {
8247 /* Allow clearing the signature */
8248 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= 0;
8249 bzero(&IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
8250 sizeof(struct in6_addr
));
8253 } else if (prefix_len
!= NAT64_PREFIX_LEN_32
&&
8254 prefix_len
!= NAT64_PREFIX_LEN_40
&&
8255 prefix_len
!= NAT64_PREFIX_LEN_48
&&
8256 prefix_len
!= NAT64_PREFIX_LEN_56
&&
8257 prefix_len
!= NAT64_PREFIX_LEN_64
&&
8258 prefix_len
!= NAT64_PREFIX_LEN_96
) {
8263 if (IN6_IS_SCOPE_EMBED(prefix
)) {
8268 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= prefix_len
;
8269 bcopy(prefix
, &IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
8270 sizeof(struct in6_addr
));
8275 if_inet6data_lock_done(ifp
);
8277 if (error
== 0 && one_set
!= 0)
8278 necp_update_all_clients();
8284 ifnet_get_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
8286 int i
, found_one
= 0, error
= 0;
8291 if_inet6data_lock_shared(ifp
);
8293 if (IN6_IFEXTRA(ifp
) == NULL
) {
8298 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
8299 if (IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
!= 0)
8303 if (found_one
== 0) {
8309 bcopy(IN6_IFEXTRA(ifp
)->nat64_prefixes
, prefixes
,
8310 sizeof(IN6_IFEXTRA(ifp
)->nat64_prefixes
));
8313 if_inet6data_lock_done(ifp
);
8320 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
8321 protocol_family_t pf
)
8326 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
8327 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
|CSUM_TSO_IPV6
)))
8332 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
8333 if (did_sw
& CSUM_DELAY_IP
)
8334 hwcksum_dbg_finalized_hdr
++;
8335 if (did_sw
& CSUM_DELAY_DATA
)
8336 hwcksum_dbg_finalized_data
++;
8341 * Checksum offload should not have been enabled when
8342 * extension headers exist; that also means that we
8343 * cannot force-finalize packets with extension headers.
8344 * Indicate to the callee should it skip such case by
8345 * setting optlen to -1.
8347 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
8348 m
->m_pkthdr
.csum_flags
);
8349 if (did_sw
& CSUM_DELAY_IPV6_DATA
)
8350 hwcksum_dbg_finalized_data
++;
8359 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
8360 protocol_family_t pf
)
8365 if (frame_header
== NULL
||
8366 frame_header
< (char *)mbuf_datastart(m
) ||
8367 frame_header
> (char *)m
->m_data
) {
8368 printf("%s: frame header pointer 0x%llx out of range "
8369 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
8370 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
8371 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
8372 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
8373 (uint64_t)VM_KERNEL_ADDRPERM(m
));
8376 hlen
= (m
->m_data
- frame_header
);
8389 * Force partial checksum offload; useful to simulate cases
8390 * where the hardware does not support partial checksum offload,
8391 * in order to validate correctness throughout the layers above.
8393 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
8394 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
8396 if (foff
> (uint32_t)m
->m_pkthdr
.len
)
8399 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
8401 /* Compute 16-bit 1's complement sum from forced offset */
8402 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
8404 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
8405 m
->m_pkthdr
.csum_rx_val
= sum
;
8406 m
->m_pkthdr
.csum_rx_start
= (foff
+ hlen
);
8408 hwcksum_dbg_partial_forced
++;
8409 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
8413 * Partial checksum offload verification (and adjustment);
8414 * useful to validate and test cases where the hardware
8415 * supports partial checksum offload.
8417 if ((m
->m_pkthdr
.csum_flags
&
8418 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
8419 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
8422 /* Start offset must begin after frame header */
8423 rxoff
= m
->m_pkthdr
.csum_rx_start
;
8425 hwcksum_dbg_bad_rxoff
++;
8427 printf("%s: partial cksum start offset %d "
8428 "is less than frame header length %d for "
8429 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
8430 (uint64_t)VM_KERNEL_ADDRPERM(m
));
8436 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
8438 * Compute the expected 16-bit 1's complement sum;
8439 * skip this if we've already computed it above
8440 * when partial checksum offload is forced.
8442 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
8444 /* Hardware or driver is buggy */
8445 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
8446 hwcksum_dbg_bad_cksum
++;
8448 printf("%s: bad partial cksum value "
8449 "0x%x (expected 0x%x) for mbuf "
8450 "0x%llx [rx_start %d]\n",
8452 m
->m_pkthdr
.csum_rx_val
, sum
,
8453 (uint64_t)VM_KERNEL_ADDRPERM(m
),
8454 m
->m_pkthdr
.csum_rx_start
);
8459 hwcksum_dbg_verified
++;
8462 * This code allows us to emulate various hardwares that
8463 * perform 16-bit 1's complement sum beginning at various
8464 * start offset values.
8466 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
8467 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
8469 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
)
8472 sum
= m_adj_sum16(m
, rxoff
, aoff
,
8473 m_pktlen(m
) - aoff
, sum
);
8475 m
->m_pkthdr
.csum_rx_val
= sum
;
8476 m
->m_pkthdr
.csum_rx_start
= (aoff
+ hlen
);
8478 hwcksum_dbg_adjusted
++;
8484 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8486 #pragma unused(arg1, arg2)
8490 i
= hwcksum_dbg_mode
;
8492 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8493 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8496 if (hwcksum_dbg
== 0)
8499 if ((i
& ~HWCKSUM_DBG_MASK
) != 0)
8502 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
8508 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8510 #pragma unused(arg1, arg2)
8514 i
= hwcksum_dbg_partial_rxoff_forced
;
8516 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8517 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8520 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
))
8523 hwcksum_dbg_partial_rxoff_forced
= i
;
8529 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8531 #pragma unused(arg1, arg2)
8535 i
= hwcksum_dbg_partial_rxoff_adj
;
8537 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8538 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8541 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
))
8544 hwcksum_dbg_partial_rxoff_adj
= i
;
8550 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8552 #pragma unused(oidp, arg1, arg2)
8555 if (req
->oldptr
== USER_ADDR_NULL
) {
8558 if (req
->newptr
!= USER_ADDR_NULL
) {
8561 err
= SYSCTL_OUT(req
, &tx_chain_len_stats
,
8562 sizeof(struct chain_len_stats
));
8568 #if DEBUG || DEVELOPMENT
8569 /* Blob for sum16 verification */
8570 static uint8_t sumdata
[] = {
8571 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8572 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8573 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8574 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8575 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8576 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8577 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8578 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8579 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8580 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8581 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8582 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8583 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8584 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8585 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8586 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8587 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8588 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8589 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8590 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8591 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8592 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8593 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8594 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8595 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8596 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8597 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8598 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8599 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8600 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8601 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8602 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8603 0xc8, 0x28, 0x02, 0x00, 0x00
8606 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
8610 uint16_t sumr
; /* reference */
8611 uint16_t sumrp
; /* reference, precomputed */
8613 { FALSE
, 0, 0, 0x0000 },
8614 { FALSE
, 1, 0, 0x001f },
8615 { FALSE
, 2, 0, 0x8b1f },
8616 { FALSE
, 3, 0, 0x8b27 },
8617 { FALSE
, 7, 0, 0x790e },
8618 { FALSE
, 11, 0, 0xcb6d },
8619 { FALSE
, 20, 0, 0x20dd },
8620 { FALSE
, 27, 0, 0xbabd },
8621 { FALSE
, 32, 0, 0xf3e8 },
8622 { FALSE
, 37, 0, 0x197d },
8623 { FALSE
, 43, 0, 0x9eae },
8624 { FALSE
, 64, 0, 0x4678 },
8625 { FALSE
, 127, 0, 0x9399 },
8626 { FALSE
, 256, 0, 0xd147 },
8627 { FALSE
, 325, 0, 0x0358 },
8629 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8632 dlil_verify_sum16(void)
8638 /* Make sure test data plus extra room for alignment fits in cluster */
8639 _CASSERT((sizeof (sumdata
) + (sizeof (uint64_t) * 2)) <= MCLBYTES
);
8641 kprintf("DLIL: running SUM16 self-tests ... ");
8643 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
8644 MH_ALIGN(m
, sizeof (uint32_t)); /* 32-bit starting alignment */
8645 buf
= mtod(m
, uint8_t *); /* base address */
8647 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
8648 uint16_t len
= sumtbl
[n
].len
;
8651 /* Verify for all possible alignments */
8652 for (i
= 0; i
< (int)sizeof (uint64_t); i
++) {
8656 /* Copy over test data to mbuf */
8657 VERIFY(len
<= sizeof (sumdata
));
8659 bcopy(sumdata
, c
, len
);
8661 /* Zero-offset test (align by data pointer) */
8662 m
->m_data
= (caddr_t
)c
;
8664 sum
= m_sum16(m
, 0, len
);
8666 if (!sumtbl
[n
].init
) {
8667 sumr
= in_cksum_mbuf_ref(m
, len
, 0, 0);
8668 sumtbl
[n
].sumr
= sumr
;
8669 sumtbl
[n
].init
= TRUE
;
8671 sumr
= sumtbl
[n
].sumr
;
8674 /* Something is horribly broken; stop now */
8675 if (sumr
!= sumtbl
[n
].sumrp
) {
8676 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
8677 "for len=%d align=%d sum=0x%04x "
8678 "[expected=0x%04x]\n", __func__
,
8681 } else if (sum
!= sumr
) {
8682 panic_plain("\n%s: broken m_sum16() for len=%d "
8683 "align=%d sum=0x%04x [expected=0x%04x]\n",
8684 __func__
, len
, i
, sum
, sumr
);
8688 /* Alignment test by offset (fixed data pointer) */
8689 m
->m_data
= (caddr_t
)buf
;
8691 sum
= m_sum16(m
, i
, len
);
8693 /* Something is horribly broken; stop now */
8695 panic_plain("\n%s: broken m_sum16() for len=%d "
8696 "offset=%d sum=0x%04x [expected=0x%04x]\n",
8697 __func__
, len
, i
, sum
, sumr
);
8701 /* Simple sum16 contiguous buffer test by aligment */
8702 sum
= b_sum16(c
, len
);
8704 /* Something is horribly broken; stop now */
8706 panic_plain("\n%s: broken b_sum16() for len=%d "
8707 "align=%d sum=0x%04x [expected=0x%04x]\n",
8708 __func__
, len
, i
, sum
, sumr
);
8716 kprintf("PASSED\n");
8718 #endif /* DEBUG || DEVELOPMENT */
8720 #define CASE_STRINGIFY(x) case x: return #x
8722 __private_extern__
const char *
8723 dlil_kev_dl_code_str(u_int32_t event_code
)
8725 switch (event_code
) {
8726 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
8727 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
8728 CASE_STRINGIFY(KEV_DL_SIFMTU
);
8729 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
8730 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
8731 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
8732 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
8733 CASE_STRINGIFY(KEV_DL_DELMULTI
);
8734 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
8735 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
8736 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
8737 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
8738 CASE_STRINGIFY(KEV_DL_LINK_ON
);
8739 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
8740 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
8741 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
8742 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
8743 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
8744 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
8745 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
8746 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
8747 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
8748 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
8749 CASE_STRINGIFY(KEV_DL_ISSUES
);
8750 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);
8758 dlil_dt_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
8760 #pragma unused(arg1)
8761 struct ifnet
*ifp
= arg0
;
8763 if (ifnet_is_attached(ifp
, 1)) {
8764 nstat_ifnet_threshold_reached(ifp
->if_index
);
8765 ifnet_decr_iorefcnt(ifp
);
8770 ifnet_notify_data_threshold(struct ifnet
*ifp
)
8772 uint64_t bytes
= (ifp
->if_ibytes
+ ifp
->if_obytes
);
8773 uint64_t oldbytes
= ifp
->if_dt_bytes
;
8775 ASSERT(ifp
->if_dt_tcall
!= NULL
);
8778 * If we went over the threshold, notify NetworkStatistics.
8779 * We rate-limit it based on the threshold interval value.
8781 if (threshold_notify
&& (bytes
- oldbytes
) > ifp
->if_data_threshold
&&
8782 OSCompareAndSwap64(oldbytes
, bytes
, &ifp
->if_dt_bytes
) &&
8783 !thread_call_isactive(ifp
->if_dt_tcall
)) {
8784 uint64_t tival
= (threshold_interval
* NSEC_PER_SEC
);
8785 uint64_t now
= mach_absolute_time(), deadline
= now
;
8789 nanoseconds_to_absolutetime(tival
, &ival
);
8790 clock_deadline_for_periodic_event(ival
, now
, &deadline
);
8791 (void) thread_call_enter_delayed(ifp
->if_dt_tcall
,
8794 (void) thread_call_enter(ifp
->if_dt_tcall
);
8799 #if (DEVELOPMENT || DEBUG)
8801 * The sysctl variable name contains the input parameters of
8802 * ifnet_get_keepalive_offload_frames()
8803 * ifp (interface index): name[0]
8804 * frames_array_count: name[1]
8805 * frame_data_offset: name[2]
8806 * The return length gives used_frames_count
8809 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8811 #pragma unused(oidp)
8812 int *name
= (int *)arg1
;
8813 u_int namelen
= arg2
;
8816 u_int32_t frames_array_count
;
8817 size_t frame_data_offset
;
8818 u_int32_t used_frames_count
;
8819 struct ifnet_keepalive_offload_frame
*frames_array
= NULL
;
8824 * Only root can get look at other people TCP frames
8826 error
= proc_suser(current_proc());
8830 * Validate the input parameters
8832 if (req
->newptr
!= USER_ADDR_NULL
) {
8840 if (req
->oldptr
== USER_ADDR_NULL
) {
8844 if (req
->oldlen
== 0) {
8849 frames_array_count
= name
[1];
8850 frame_data_offset
= name
[2];
8852 /* Make sure the passed buffer is large enough */
8853 if (frames_array_count
* sizeof(struct ifnet_keepalive_offload_frame
) >
8859 ifnet_head_lock_shared();
8860 if (!IF_INDEX_IN_RANGE(idx
)) {
8865 ifp
= ifindex2ifnet
[idx
];
8868 frames_array
= _MALLOC(frames_array_count
*
8869 sizeof(struct ifnet_keepalive_offload_frame
), M_TEMP
, M_WAITOK
);
8870 if (frames_array
== NULL
) {
8875 error
= ifnet_get_keepalive_offload_frames(ifp
, frames_array
,
8876 frames_array_count
, frame_data_offset
, &used_frames_count
);
8878 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8883 for (i
= 0; i
< used_frames_count
; i
++) {
8884 error
= SYSCTL_OUT(req
, frames_array
+ i
,
8885 sizeof(struct ifnet_keepalive_offload_frame
));
8891 if (frames_array
!= NULL
)
8892 _FREE(frames_array
, M_TEMP
);
8895 #endif /* DEVELOPMENT || DEBUG */
8898 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow
*ifs
,
8901 tcp_update_stats_per_flow(ifs
, ifp
);
8905 dlil_mit_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
8907 #pragma unused(arg1)
8908 struct ifnet
*ifp
= (struct ifnet
*)arg0
;
8909 struct dlil_threading_info
*inp
= ifp
->if_inp
;
8911 ifnet_lock_shared(ifp
);
8912 if (!IF_FULLY_ATTACHED(ifp
) || inp
== NULL
) {
8913 ifnet_lock_done(ifp
);
8917 lck_mtx_lock_spin(&inp
->input_lck
);
8918 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
8919 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
) ||
8920 !qempty(&inp
->rcvq_pkts
)) {
8922 wakeup_one((caddr_t
)&inp
->input_waiting
);
8924 lck_mtx_unlock(&inp
->input_lck
);
8925 ifnet_lock_done(ifp
);