2 * Copyright (c) 1999-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
42 #include <sys/socket.h>
43 #include <sys/domain.h>
45 #include <sys/random.h>
46 #include <sys/socketvar.h>
47 #include <net/if_dl.h>
49 #include <net/route.h>
50 #include <net/if_var.h>
52 #include <net/if_arp.h>
53 #include <net/iptap.h>
54 #include <net/pktap.h>
55 #include <sys/kern_event.h>
56 #include <sys/kdebug.h>
57 #include <sys/mcache.h>
58 #include <sys/syslog.h>
59 #include <sys/protosw.h>
62 #include <kern/assert.h>
63 #include <kern/task.h>
64 #include <kern/thread.h>
65 #include <kern/sched_prim.h>
66 #include <kern/locks.h>
67 #include <kern/zalloc.h>
69 #include <net/kpi_protocol.h>
70 #include <net/if_types.h>
71 #include <net/if_ipsec.h>
72 #include <net/if_llreach.h>
73 #include <net/if_utun.h>
74 #include <net/kpi_interfacefilter.h>
75 #include <net/classq/classq.h>
76 #include <net/classq/classq_sfb.h>
77 #include <net/flowhash.h>
78 #include <net/ntstat.h>
79 #include <net/if_llatbl.h>
80 #include <net/net_api_stats.h>
81 #include <net/if_ports_used.h>
82 #include <net/if_vlan_var.h>
83 #include <netinet/in.h>
85 #include <netinet/in_var.h>
86 #include <netinet/igmp_var.h>
87 #include <netinet/ip_var.h>
88 #include <netinet/tcp.h>
89 #include <netinet/tcp_var.h>
90 #include <netinet/udp.h>
91 #include <netinet/udp_var.h>
92 #include <netinet/if_ether.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet/in_tclass.h>
95 #include <netinet/ip.h>
96 #include <netinet/ip_icmp.h>
97 #include <netinet/icmp_var.h>
100 #include <net/nat464_utils.h>
101 #include <netinet6/in6_var.h>
102 #include <netinet6/nd6.h>
103 #include <netinet6/mld6_var.h>
104 #include <netinet6/scope6_var.h>
105 #include <netinet/ip6.h>
106 #include <netinet/icmp6.h>
107 #include <net/pf_pbuf.h>
108 #include <libkern/OSAtomic.h>
109 #include <libkern/tree.h>
111 #include <dev/random/randomdev.h>
112 #include <machine/machine_routines.h>
114 #include <mach/thread_act.h>
115 #include <mach/sdt.h>
118 #include <sys/kauth.h>
119 #include <security/mac_framework.h>
120 #include <net/ethernet.h>
121 #include <net/firewire.h>
125 #include <net/pfvar.h>
127 #include <net/pktsched/pktsched.h>
128 #include <net/pktsched/pktsched_netem.h>
131 #include <net/necp.h>
137 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
138 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
139 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
140 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
141 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
143 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
144 #define MAX_LINKADDR 4 /* LONGWORDS */
145 #define M_NKE M_IFADDR
148 #define DLIL_PRINTF printf
150 #define DLIL_PRINTF kprintf
153 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
154 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
156 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
157 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
165 * List of if_proto structures in if_proto_hash[] is protected by
166 * the ifnet lock. The rest of the fields are initialized at protocol
167 * attach time and never change, thus no lock required as long as
168 * a reference to it is valid, via if_proto_ref().
171 SLIST_ENTRY(if_proto
) next_hash
;
175 protocol_family_t protocol_family
;
179 proto_media_input input
;
180 proto_media_preout pre_output
;
181 proto_media_event event
;
182 proto_media_ioctl ioctl
;
183 proto_media_detached detached
;
184 proto_media_resolve_multi resolve_multi
;
185 proto_media_send_arp send_arp
;
188 proto_media_input_v2 input
;
189 proto_media_preout pre_output
;
190 proto_media_event event
;
191 proto_media_ioctl ioctl
;
192 proto_media_detached detached
;
193 proto_media_resolve_multi resolve_multi
;
194 proto_media_send_arp send_arp
;
199 SLIST_HEAD(proto_hash_entry
, if_proto
);
201 #define DLIL_SDLDATALEN \
202 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
205 struct ifnet dl_if
; /* public ifnet */
207 * DLIL private fields, protected by dl_if_lock
209 decl_lck_mtx_data(, dl_if_lock
);
210 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
211 u_int32_t dl_if_flags
; /* flags (below) */
212 u_int32_t dl_if_refcnt
; /* refcnt */
213 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
214 void *dl_if_uniqueid
; /* unique interface id */
215 size_t dl_if_uniqueid_len
; /* length of the unique id */
216 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
217 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
219 struct ifaddr ifa
; /* lladdr ifa */
220 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
221 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
223 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
224 u_int8_t dl_if_permanent_ether
[ETHER_ADDR_LEN
]; /* permanent address */
225 u_int8_t dl_if_permanent_ether_is_set
;
226 u_int8_t dl_if_unused
;
227 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
228 ctrace_t dl_if_attach
; /* attach PC stacktrace */
229 ctrace_t dl_if_detach
; /* detach PC stacktrace */
232 /* Values for dl_if_flags (private to DLIL) */
233 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
234 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
235 #define DLIF_DEBUG 0x4 /* has debugging info */
237 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
240 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
242 struct dlil_ifnet_dbg
{
243 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
244 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
245 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
247 * Circular lists of ifnet_{reference,release} callers.
249 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
250 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
253 #define DLIL_TO_IFP(s) (&s->dl_if)
254 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
256 struct ifnet_filter
{
257 TAILQ_ENTRY(ifnet_filter
) filt_next
;
259 u_int32_t filt_flags
;
261 const char *filt_name
;
263 protocol_family_t filt_protocol
;
264 iff_input_func filt_input
;
265 iff_output_func filt_output
;
266 iff_event_func filt_event
;
267 iff_ioctl_func filt_ioctl
;
268 iff_detached_func filt_detached
;
271 struct proto_input_entry
;
273 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
274 static lck_grp_t
*dlil_lock_group
;
275 lck_grp_t
*ifnet_lock_group
;
276 static lck_grp_t
*ifnet_head_lock_group
;
277 static lck_grp_t
*ifnet_snd_lock_group
;
278 static lck_grp_t
*ifnet_rcv_lock_group
;
279 lck_attr_t
*ifnet_lock_attr
;
280 decl_lck_rw_data(static, ifnet_head_lock
);
281 decl_lck_mtx_data(static, dlil_ifnet_lock
);
282 u_int32_t dlil_filter_disable_tso_count
= 0;
285 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
287 static unsigned int ifnet_debug
; /* debugging (disabled) */
289 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
290 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
291 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
292 #define DLIF_ZONE_NAME "ifnet" /* zone name */
294 static ZONE_DECLARE(dlif_filt_zone
, "ifnet_filter",
295 sizeof(struct ifnet_filter
), ZC_ZFREE_CLEARMEM
);
297 static ZONE_DECLARE(dlif_phash_zone
, "ifnet_proto_hash",
298 sizeof(struct proto_hash_entry
) * PROTO_HASH_SLOTS
, ZC_ZFREE_CLEARMEM
);
300 static ZONE_DECLARE(dlif_proto_zone
, "ifnet_proto",
301 sizeof(struct if_proto
), ZC_ZFREE_CLEARMEM
);
303 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
304 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
305 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
306 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
308 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
309 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
310 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
311 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
313 static u_int32_t net_rtref
;
315 static struct dlil_main_threading_info dlil_main_input_thread_info
;
316 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
317 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
319 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
, bool update_generation
);
320 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
321 static void dlil_if_trace(struct dlil_ifnet
*, int);
322 static void if_proto_ref(struct if_proto
*);
323 static void if_proto_free(struct if_proto
*);
324 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
325 static u_int32_t
dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
326 u_int32_t list_count
);
327 static void if_flt_monitor_busy(struct ifnet
*);
328 static void if_flt_monitor_unbusy(struct ifnet
*);
329 static void if_flt_monitor_enter(struct ifnet
*);
330 static void if_flt_monitor_leave(struct ifnet
*);
331 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
332 char **, protocol_family_t
);
333 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
335 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
336 const struct sockaddr_dl
*);
337 static int ifnet_lookup(struct ifnet
*);
338 static void if_purgeaddrs(struct ifnet
*);
340 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
341 struct mbuf
*, char *);
342 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
344 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
345 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
346 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
347 const struct kev_msg
*);
348 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
349 unsigned long, void *);
350 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
351 struct sockaddr_dl
*, size_t);
352 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
353 const struct sockaddr_dl
*, const struct sockaddr
*,
354 const struct sockaddr_dl
*, const struct sockaddr
*);
356 static errno_t
ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
357 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
358 boolean_t poll
, struct thread
*tp
);
359 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
360 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
361 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
362 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
363 protocol_family_t
*);
364 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
365 const struct ifnet_demux_desc
*, u_int32_t
);
366 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
367 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
368 #if !XNU_TARGET_OS_OSX
369 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
370 const struct sockaddr
*, const char *, const char *,
371 u_int32_t
*, u_int32_t
*);
372 #else /* XNU_TARGET_OS_OSX */
373 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
374 const struct sockaddr
*, const char *, const char *);
375 #endif /* XNU_TARGET_OS_OSX */
376 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
377 const struct sockaddr
*, const char *, const char *,
378 u_int32_t
*, u_int32_t
*);
379 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
380 static void ifp_if_free(struct ifnet
*);
381 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
382 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
383 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
385 static errno_t
dlil_input_async(struct dlil_threading_info
*, struct ifnet
*,
386 struct mbuf
*, struct mbuf
*, const struct ifnet_stat_increment_param
*,
387 boolean_t
, struct thread
*);
388 static errno_t
dlil_input_sync(struct dlil_threading_info
*, struct ifnet
*,
389 struct mbuf
*, struct mbuf
*, const struct ifnet_stat_increment_param
*,
390 boolean_t
, struct thread
*);
392 static void dlil_main_input_thread_func(void *, wait_result_t
);
393 static void dlil_main_input_thread_cont(void *, wait_result_t
);
395 static void dlil_input_thread_func(void *, wait_result_t
);
396 static void dlil_input_thread_cont(void *, wait_result_t
);
398 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
399 static void dlil_rxpoll_input_thread_cont(void *, wait_result_t
);
401 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*,
402 thread_continue_t
*);
403 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
404 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
405 struct dlil_threading_info
*, struct ifnet
*, boolean_t
);
406 static boolean_t
dlil_input_stats_sync(struct ifnet
*,
407 struct dlil_threading_info
*);
408 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
409 u_int32_t
, ifnet_model_t
, boolean_t
);
410 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
411 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
412 static int dlil_is_clat_needed(protocol_family_t
, mbuf_t
);
413 static errno_t
dlil_clat46(ifnet_t
, protocol_family_t
*, mbuf_t
*);
414 static errno_t
dlil_clat64(ifnet_t
, protocol_family_t
*, mbuf_t
*);
415 #if DEBUG || DEVELOPMENT
416 static void dlil_verify_sum16(void);
417 #endif /* DEBUG || DEVELOPMENT */
418 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
420 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
423 static void dlil_incr_pending_thread_count(void);
424 static void dlil_decr_pending_thread_count(void);
426 static void ifnet_detacher_thread_func(void *, wait_result_t
);
427 static void ifnet_detacher_thread_cont(void *, wait_result_t
);
428 static void ifnet_detach_final(struct ifnet
*);
429 static void ifnet_detaching_enqueue(struct ifnet
*);
430 static struct ifnet
*ifnet_detaching_dequeue(void);
432 static void ifnet_start_thread_func(void *, wait_result_t
);
433 static void ifnet_start_thread_cont(void *, wait_result_t
);
435 static void ifnet_poll_thread_func(void *, wait_result_t
);
436 static void ifnet_poll_thread_cont(void *, wait_result_t
);
438 static errno_t
ifnet_enqueue_common(struct ifnet
*, classq_pkt_t
*,
439 boolean_t
, boolean_t
*);
441 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
442 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
443 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
444 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
446 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
447 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
448 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
449 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
450 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
451 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
452 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
453 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
454 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
455 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
456 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
458 struct chain_len_stats tx_chain_len_stats
;
459 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
;
461 #if TEST_INPUT_THREAD_TERMINATION
462 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
;
463 #endif /* TEST_INPUT_THREAD_TERMINATION */
465 /* The following are protected by dlil_ifnet_lock */
466 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
467 static u_int32_t ifnet_detaching_cnt
;
468 static boolean_t ifnet_detaching_embryonic
;
469 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
471 decl_lck_mtx_data(static, ifnet_fc_lock
);
473 static uint32_t ifnet_flowhash_seed
;
475 struct ifnet_flowhash_key
{
476 char ifk_name
[IFNAMSIZ
];
480 uint32_t ifk_capabilities
;
481 uint32_t ifk_capenable
;
482 uint32_t ifk_output_sched_model
;
487 /* Flow control entry per interface */
488 struct ifnet_fc_entry
{
489 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
490 u_int32_t ifce_flowhash
;
491 struct ifnet
*ifce_ifp
;
494 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
495 static int ifce_cmp(const struct ifnet_fc_entry
*,
496 const struct ifnet_fc_entry
*);
497 static int ifnet_fc_add(struct ifnet
*);
498 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
499 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
501 /* protected by ifnet_fc_lock */
502 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
503 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
504 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
506 static ZONE_DECLARE(ifnet_fc_zone
, "ifnet_fc_zone",
507 sizeof(struct ifnet_fc_entry
), ZC_ZFREE_CLEARMEM
);
509 extern void bpfdetach(struct ifnet
*);
510 extern void proto_input_run(void);
512 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
514 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
517 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
520 #if !XNU_TARGET_OS_OSX
521 int dlil_lladdr_ckreq
= 1;
522 #else /* XNU_TARGET_OS_OSX */
523 int dlil_lladdr_ckreq
= 0;
524 #endif /* XNU_TARGET_OS_OSX */
525 #endif /* CONFIG_MACF */
528 int dlil_verbose
= 1;
530 int dlil_verbose
= 0;
532 #if IFNET_INPUT_SANITY_CHK
533 /* sanity checking of input packet lists received */
534 static u_int32_t dlil_input_sanity_check
= 0;
535 #endif /* IFNET_INPUT_SANITY_CHK */
536 /* rate limit debug messages */
537 struct timespec dlil_dbgrate
= { .tv_sec
= 1, .tv_nsec
= 0 };
539 SYSCTL_DECL(_net_link_generic_system
);
541 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
542 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
544 #define IF_SNDQ_MINLEN 32
545 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
546 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
547 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
548 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
550 #define IF_RCVQ_MINLEN 32
551 #define IF_RCVQ_MAXLEN 256
552 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
553 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
554 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
555 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
557 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
558 u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
559 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
560 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
561 "ilog2 of EWMA decay rate of avg inbound packets");
563 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
564 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
565 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
566 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
567 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
568 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
569 "Q", "input poll mode freeze time");
571 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
572 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
573 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
574 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
575 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
576 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
577 "Q", "input poll sampling time");
579 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
580 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
581 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
582 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
583 "Q", "input poll interval (time)");
585 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
586 u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
587 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
588 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
589 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
591 #define IF_RXPOLL_WLOWAT 10
592 static u_int32_t if_sysctl_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
593 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
594 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sysctl_rxpoll_wlowat
,
595 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
596 "I", "input poll wakeup low watermark");
598 #define IF_RXPOLL_WHIWAT 100
599 static u_int32_t if_sysctl_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
600 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
601 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sysctl_rxpoll_whiwat
,
602 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
603 "I", "input poll wakeup high watermark");
605 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
606 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
607 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
608 "max packets per poll call");
610 u_int32_t if_rxpoll
= 1;
611 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
612 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
613 sysctl_rxpoll
, "I", "enable opportunistic input polling");
615 #if TEST_INPUT_THREAD_TERMINATION
616 static u_int32_t if_input_thread_termination_spin
= 0;
617 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, input_thread_termination_spin
,
618 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
619 &if_input_thread_termination_spin
, 0,
620 sysctl_input_thread_termination_spin
,
621 "I", "input thread termination spin limit");
622 #endif /* TEST_INPUT_THREAD_TERMINATION */
624 static u_int32_t cur_dlil_input_threads
= 0;
625 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
626 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
627 "Current number of DLIL input threads");
629 #if IFNET_INPUT_SANITY_CHK
630 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
631 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
632 "Turn on sanity checking in DLIL input");
633 #endif /* IFNET_INPUT_SANITY_CHK */
635 static u_int32_t if_flowadv
= 1;
636 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
637 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
638 "enable flow-advisory mechanism");
640 static u_int32_t if_delaybased_queue
= 1;
641 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
642 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
643 "enable delay based dynamic queue sizing");
645 static uint64_t hwcksum_in_invalidated
= 0;
646 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
647 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
648 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
650 uint32_t hwcksum_dbg
= 0;
651 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
652 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
653 "enable hardware cksum debugging");
655 u_int32_t ifnet_start_delayed
= 0;
656 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delayed
,
657 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_start_delayed
, 0,
658 "number of times start was delayed");
660 u_int32_t ifnet_delay_start_disabled
= 0;
661 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delay_disabled
,
662 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_delay_start_disabled
, 0,
663 "number of times start was delayed");
666 ifnet_delay_start_disabled_increment(void)
668 OSIncrementAtomic(&ifnet_delay_start_disabled
);
671 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
672 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
673 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
674 #define HWCKSUM_DBG_MASK \
675 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
676 HWCKSUM_DBG_FINALIZE_FORCED)
678 static uint32_t hwcksum_dbg_mode
= 0;
679 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
680 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
681 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
683 static uint64_t hwcksum_dbg_partial_forced
= 0;
684 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
685 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
686 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
688 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
689 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
690 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
691 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
693 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
694 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
695 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
696 &hwcksum_dbg_partial_rxoff_forced
, 0,
697 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
698 "forced partial cksum rx offset");
700 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
701 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
702 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
703 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
704 "adjusted partial cksum rx offset");
706 static uint64_t hwcksum_dbg_verified
= 0;
707 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
708 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
709 &hwcksum_dbg_verified
, "packets verified for having good checksum");
711 static uint64_t hwcksum_dbg_bad_cksum
= 0;
712 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
713 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
714 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
716 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
717 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
718 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
719 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
721 static uint64_t hwcksum_dbg_adjusted
= 0;
722 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
723 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
724 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
726 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
727 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
728 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
729 &hwcksum_dbg_finalized_hdr
, "finalized headers");
731 static uint64_t hwcksum_dbg_finalized_data
= 0;
732 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
733 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
734 &hwcksum_dbg_finalized_data
, "finalized payloads");
736 uint32_t hwcksum_tx
= 1;
737 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
738 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
739 "enable transmit hardware checksum offload");
741 uint32_t hwcksum_rx
= 1;
742 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
743 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
744 "enable receive hardware checksum offload");
746 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, tx_chain_len_stats
,
747 CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 9,
748 sysctl_tx_chain_len_stats
, "S", "");
750 uint32_t tx_chain_len_count
= 0;
751 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, tx_chain_len_count
,
752 CTLFLAG_RW
| CTLFLAG_LOCKED
, &tx_chain_len_count
, 0, "");
754 static uint32_t threshold_notify
= 1; /* enable/disable */
755 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_notify
,
756 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_notify
, 0, "");
758 static uint32_t threshold_interval
= 2; /* in seconds */
759 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_interval
,
760 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_interval
, 0, "");
762 #if (DEVELOPMENT || DEBUG)
763 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
;
764 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_kao_frames
,
765 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_kao_frames
, "");
766 #endif /* DEVELOPMENT || DEBUG */
768 struct net_api_stats net_api_stats
;
769 SYSCTL_STRUCT(_net
, OID_AUTO
, api_stats
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
770 &net_api_stats
, net_api_stats
, "");
772 unsigned int net_rxpoll
= 1;
773 unsigned int net_affinity
= 1;
774 unsigned int net_async
= 1; /* 0: synchronous, 1: asynchronous */
776 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
778 extern u_int32_t inject_buckets
;
780 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
781 static lck_attr_t
*dlil_lck_attributes
= NULL
;
783 /* DLIL data threshold thread call */
784 static void dlil_dt_tcall_fn(thread_call_param_t
, thread_call_param_t
);
787 ifnet_filter_update_tso(boolean_t filter_enable
)
790 * update filter count and route_generation ID to let TCP
791 * know it should reevalute doing TSO or not
793 OSAddAtomic(filter_enable
? 1 : -1, &dlil_filter_disable_tso_count
);
798 #define DLIL_INPUT_CHECK(m, ifp) { \
799 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
800 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
801 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
802 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
807 #define DLIL_EWMA(old, new, decay) do { \
809 if ((_avg = (old)) > 0) \
810 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
816 #define MBPS (1ULL * 1000 * 1000)
817 #define GBPS (MBPS * 1000)
819 struct rxpoll_time_tbl
{
820 u_int64_t speed
; /* downlink speed */
821 u_int32_t plowat
; /* packets low watermark */
822 u_int32_t phiwat
; /* packets high watermark */
823 u_int32_t blowat
; /* bytes low watermark */
824 u_int32_t bhiwat
; /* bytes high watermark */
827 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
828 { .speed
= 10 * MBPS
, .plowat
= 2, .phiwat
= 8, .blowat
= (1 * 1024), .bhiwat
= (6 * 1024) },
829 { .speed
= 100 * MBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
830 { .speed
= 1 * GBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
831 { .speed
= 10 * GBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
832 { .speed
= 100 * GBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
833 { .speed
= 0, .plowat
= 0, .phiwat
= 0, .blowat
= 0, .bhiwat
= 0 }
836 decl_lck_mtx_data(static, dlil_thread_sync_lock
);
837 static uint32_t dlil_pending_thread_cnt
= 0;
840 dlil_incr_pending_thread_count(void)
842 LCK_MTX_ASSERT(&dlil_thread_sync_lock
, LCK_MTX_ASSERT_NOTOWNED
);
843 lck_mtx_lock(&dlil_thread_sync_lock
);
844 dlil_pending_thread_cnt
++;
845 lck_mtx_unlock(&dlil_thread_sync_lock
);
849 dlil_decr_pending_thread_count(void)
851 LCK_MTX_ASSERT(&dlil_thread_sync_lock
, LCK_MTX_ASSERT_NOTOWNED
);
852 lck_mtx_lock(&dlil_thread_sync_lock
);
853 VERIFY(dlil_pending_thread_cnt
> 0);
854 dlil_pending_thread_cnt
--;
855 if (dlil_pending_thread_cnt
== 0) {
856 wakeup(&dlil_pending_thread_cnt
);
858 lck_mtx_unlock(&dlil_thread_sync_lock
);
862 proto_hash_value(u_int32_t protocol_family
)
865 * dlil_proto_unplumb_all() depends on the mapping between
866 * the hash bucket index and the protocol family defined
867 * here; future changes must be applied there as well.
869 switch (protocol_family
) {
885 * Caller must already be holding ifnet lock.
887 static struct if_proto
*
888 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
890 struct if_proto
*proto
= NULL
;
891 u_int32_t i
= proto_hash_value(protocol_family
);
893 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
895 if (ifp
->if_proto_hash
!= NULL
) {
896 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
899 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
) {
900 proto
= SLIST_NEXT(proto
, next_hash
);
911 if_proto_ref(struct if_proto
*proto
)
913 atomic_add_32(&proto
->refcount
, 1);
916 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
919 if_proto_free(struct if_proto
*proto
)
922 struct ifnet
*ifp
= proto
->ifp
;
923 u_int32_t proto_family
= proto
->protocol_family
;
924 struct kev_dl_proto_data ev_pr_data
;
926 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
931 if (proto
->proto_kpi
== kProtoKPI_v1
) {
932 if (proto
->kpi
.v1
.detached
) {
933 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
936 if (proto
->proto_kpi
== kProtoKPI_v2
) {
937 if (proto
->kpi
.v2
.detached
) {
938 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
943 * Cleanup routes that may still be in the routing table for that
944 * interface/protocol pair.
946 if_rtproto_del(ifp
, proto_family
);
948 ifnet_lock_shared(ifp
);
950 /* No more reference on this, protocol must have been detached */
951 VERIFY(proto
->detached
);
954 * The reserved field carries the number of protocol still attached
955 * (subject to change)
957 ev_pr_data
.proto_family
= proto_family
;
958 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
960 ifnet_lock_done(ifp
);
962 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
963 (struct net_event_data
*)&ev_pr_data
,
964 sizeof(struct kev_dl_proto_data
));
966 if (ev_pr_data
.proto_remaining_count
== 0) {
968 * The protocol count has gone to zero, mark the interface down.
969 * This used to be done by configd.KernelEventMonitor, but that
970 * is inherently prone to races (rdar://problem/30810208).
972 (void) ifnet_set_flags(ifp
, 0, IFF_UP
);
973 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
974 dlil_post_sifflags_msg(ifp
);
977 zfree(dlif_proto_zone
, proto
);
980 __private_extern__
void
981 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
986 unsigned int type
= 0;
990 case IFNET_LCK_ASSERT_EXCLUSIVE
:
991 type
= LCK_RW_ASSERT_EXCLUSIVE
;
994 case IFNET_LCK_ASSERT_SHARED
:
995 type
= LCK_RW_ASSERT_SHARED
;
998 case IFNET_LCK_ASSERT_OWNED
:
999 type
= LCK_RW_ASSERT_HELD
;
1002 case IFNET_LCK_ASSERT_NOTOWNED
:
1003 /* nothing to do here for RW lock; bypass assert */
1008 panic("bad ifnet assert type: %d", what
);
1012 LCK_RW_ASSERT(&ifp
->if_lock
, type
);
1016 __private_extern__
void
1017 ifnet_lock_shared(struct ifnet
*ifp
)
1019 lck_rw_lock_shared(&ifp
->if_lock
);
1022 __private_extern__
void
1023 ifnet_lock_exclusive(struct ifnet
*ifp
)
1025 lck_rw_lock_exclusive(&ifp
->if_lock
);
1028 __private_extern__
void
1029 ifnet_lock_done(struct ifnet
*ifp
)
1031 lck_rw_done(&ifp
->if_lock
);
1035 __private_extern__
void
1036 if_inetdata_lock_shared(struct ifnet
*ifp
)
1038 lck_rw_lock_shared(&ifp
->if_inetdata_lock
);
1041 __private_extern__
void
1042 if_inetdata_lock_exclusive(struct ifnet
*ifp
)
1044 lck_rw_lock_exclusive(&ifp
->if_inetdata_lock
);
1047 __private_extern__
void
1048 if_inetdata_lock_done(struct ifnet
*ifp
)
1050 lck_rw_done(&ifp
->if_inetdata_lock
);
1054 __private_extern__
void
1055 if_inet6data_lock_shared(struct ifnet
*ifp
)
1057 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
1060 __private_extern__
void
1061 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
1063 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
1066 __private_extern__
void
1067 if_inet6data_lock_done(struct ifnet
*ifp
)
1069 lck_rw_done(&ifp
->if_inet6data_lock
);
1072 __private_extern__
void
1073 ifnet_head_lock_shared(void)
1075 lck_rw_lock_shared(&ifnet_head_lock
);
1078 __private_extern__
void
1079 ifnet_head_lock_exclusive(void)
1081 lck_rw_lock_exclusive(&ifnet_head_lock
);
1084 __private_extern__
void
1085 ifnet_head_done(void)
1087 lck_rw_done(&ifnet_head_lock
);
1090 __private_extern__
void
1091 ifnet_head_assert_exclusive(void)
1093 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
1097 * dlil_ifp_protolist
1098 * - get the list of protocols attached to the interface, or just the number
1099 * of attached protocols
1100 * - if the number returned is greater than 'list_count', truncation occurred
1103 * - caller must already be holding ifnet lock.
1106 dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
1107 u_int32_t list_count
)
1109 u_int32_t count
= 0;
1112 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
1114 if (ifp
->if_proto_hash
== NULL
) {
1118 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1119 struct if_proto
*proto
;
1120 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1121 if (list
!= NULL
&& count
< list_count
) {
1122 list
[count
] = proto
->protocol_family
;
1131 __private_extern__ u_int32_t
1132 if_get_protolist(struct ifnet
* ifp
, u_int32_t
*protolist
, u_int32_t count
)
1134 ifnet_lock_shared(ifp
);
1135 count
= dlil_ifp_protolist(ifp
, protolist
, count
);
1136 ifnet_lock_done(ifp
);
1140 __private_extern__
void
1141 if_free_protolist(u_int32_t
*list
)
1143 _FREE(list
, M_TEMP
);
1146 __private_extern__
int
1147 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
1148 u_int32_t event_code
, struct net_event_data
*event_data
,
1149 u_int32_t event_data_len
)
1151 struct net_event_data ev_data
;
1152 struct kev_msg ev_msg
;
1154 bzero(&ev_msg
, sizeof(ev_msg
));
1155 bzero(&ev_data
, sizeof(ev_data
));
1157 * a net event always starts with a net_event_data structure
1158 * but the caller can generate a simple net event or
1159 * provide a longer event structure to post
1161 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1162 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1163 ev_msg
.kev_subclass
= event_subclass
;
1164 ev_msg
.event_code
= event_code
;
1166 if (event_data
== NULL
) {
1167 event_data
= &ev_data
;
1168 event_data_len
= sizeof(struct net_event_data
);
1171 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
1172 event_data
->if_family
= ifp
->if_family
;
1173 event_data
->if_unit
= (u_int32_t
)ifp
->if_unit
;
1175 ev_msg
.dv
[0].data_length
= event_data_len
;
1176 ev_msg
.dv
[0].data_ptr
= event_data
;
1177 ev_msg
.dv
[1].data_length
= 0;
1179 bool update_generation
= true;
1180 if (event_subclass
== KEV_DL_SUBCLASS
) {
1181 /* Don't update interface generation for frequent link quality and state changes */
1182 switch (event_code
) {
1183 case KEV_DL_LINK_QUALITY_METRIC_CHANGED
:
1184 case KEV_DL_RRC_STATE_CHANGED
:
1185 case KEV_DL_NODE_PRESENCE
:
1186 case KEV_DL_NODE_ABSENCE
:
1187 case KEV_DL_MASTER_ELECTED
:
1188 update_generation
= false;
1195 return dlil_event_internal(ifp
, &ev_msg
, update_generation
);
1198 __private_extern__
int
1199 dlil_alloc_local_stats(struct ifnet
*ifp
)
1202 void *buf
, *base
, **pbuf
;
1208 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1209 /* allocate tcpstat_local structure */
1210 buf
= zalloc_flags(dlif_tcpstat_zone
, Z_WAITOK
| Z_ZERO
);
1216 /* Get the 64-bit aligned base address for this object */
1217 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
1219 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1220 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1223 * Wind back a pointer size from the aligned base and
1224 * save the original address so we can free it later.
1226 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
1228 ifp
->if_tcp_stat
= base
;
1230 /* allocate udpstat_local structure */
1231 buf
= zalloc_flags(dlif_udpstat_zone
, Z_WAITOK
| Z_ZERO
);
1237 /* Get the 64-bit aligned base address for this object */
1238 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
1240 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1241 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1244 * Wind back a pointer size from the aligned base and
1245 * save the original address so we can free it later.
1247 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
1249 ifp
->if_udp_stat
= base
;
1251 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof(u_int64_t
)) &&
1252 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof(u_int64_t
)));
1257 if (ifp
->if_ipv4_stat
== NULL
) {
1258 MALLOC(ifp
->if_ipv4_stat
, struct if_tcp_ecn_stat
*,
1259 sizeof(struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
| M_ZERO
);
1260 if (ifp
->if_ipv4_stat
== NULL
) {
1266 if (ifp
->if_ipv6_stat
== NULL
) {
1267 MALLOC(ifp
->if_ipv6_stat
, struct if_tcp_ecn_stat
*,
1268 sizeof(struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
| M_ZERO
);
1269 if (ifp
->if_ipv6_stat
== NULL
) {
1275 if (ifp
!= NULL
&& ret
!= 0) {
1276 if (ifp
->if_tcp_stat
!= NULL
) {
1278 ((intptr_t)ifp
->if_tcp_stat
- sizeof(void *));
1279 zfree(dlif_tcpstat_zone
, *pbuf
);
1280 ifp
->if_tcp_stat
= NULL
;
1282 if (ifp
->if_udp_stat
!= NULL
) {
1284 ((intptr_t)ifp
->if_udp_stat
- sizeof(void *));
1285 zfree(dlif_udpstat_zone
, *pbuf
);
1286 ifp
->if_udp_stat
= NULL
;
1288 if (ifp
->if_ipv4_stat
!= NULL
) {
1289 FREE(ifp
->if_ipv4_stat
, M_TEMP
);
1290 ifp
->if_ipv4_stat
= NULL
;
1292 if (ifp
->if_ipv6_stat
!= NULL
) {
1293 FREE(ifp
->if_ipv6_stat
, M_TEMP
);
1294 ifp
->if_ipv6_stat
= NULL
;
1302 dlil_reset_rxpoll_params(ifnet_t ifp
)
1304 ASSERT(ifp
!= NULL
);
1305 ifnet_set_poll_cycle(ifp
, NULL
);
1306 ifp
->if_poll_update
= 0;
1307 ifp
->if_poll_flags
= 0;
1308 ifp
->if_poll_req
= 0;
1309 ifp
->if_poll_mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1310 bzero(&ifp
->if_poll_tstats
, sizeof(ifp
->if_poll_tstats
));
1311 bzero(&ifp
->if_poll_pstats
, sizeof(ifp
->if_poll_pstats
));
1312 bzero(&ifp
->if_poll_sstats
, sizeof(ifp
->if_poll_sstats
));
1313 net_timerclear(&ifp
->if_poll_mode_holdtime
);
1314 net_timerclear(&ifp
->if_poll_mode_lasttime
);
1315 net_timerclear(&ifp
->if_poll_sample_holdtime
);
1316 net_timerclear(&ifp
->if_poll_sample_lasttime
);
1317 net_timerclear(&ifp
->if_poll_dbg_lasttime
);
1321 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
,
1322 thread_continue_t
*thfunc
)
1324 boolean_t dlil_rxpoll_input
;
1325 thread_continue_t func
= NULL
;
1329 dlil_rxpoll_input
= (ifp
!= NULL
&& net_rxpoll
&&
1330 (ifp
->if_eflags
& IFEF_RXPOLL
) && (ifp
->if_xflags
& IFXF_LEGACY
));
1332 /* default strategy utilizes the DLIL worker thread */
1333 inp
->dlth_strategy
= dlil_input_async
;
1335 /* NULL ifp indicates the main input thread, called at dlil_init time */
1338 * Main input thread only.
1340 func
= dlil_main_input_thread_func
;
1341 VERIFY(inp
== dlil_main_input_thread
);
1342 (void) strlcat(inp
->dlth_name
,
1343 "main_input", DLIL_THREADNAME_LEN
);
1344 } else if (dlil_rxpoll_input
) {
1346 * Legacy (non-netif) hybrid polling.
1348 func
= dlil_rxpoll_input_thread_func
;
1349 VERIFY(inp
!= dlil_main_input_thread
);
1350 (void) snprintf(inp
->dlth_name
, DLIL_THREADNAME_LEN
,
1351 "%s_input_poll", if_name(ifp
));
1352 } else if (net_async
|| (ifp
->if_xflags
& IFXF_LEGACY
)) {
1354 * Asynchronous strategy.
1356 func
= dlil_input_thread_func
;
1357 VERIFY(inp
!= dlil_main_input_thread
);
1358 (void) snprintf(inp
->dlth_name
, DLIL_THREADNAME_LEN
,
1359 "%s_input", if_name(ifp
));
1362 * Synchronous strategy if there's a netif below and
1363 * the device isn't capable of hybrid polling.
1365 ASSERT(func
== NULL
);
1366 ASSERT(!(ifp
->if_xflags
& IFXF_LEGACY
));
1367 VERIFY(inp
!= dlil_main_input_thread
);
1368 ASSERT(!inp
->dlth_affinity
);
1369 inp
->dlth_strategy
= dlil_input_sync
;
1371 VERIFY(inp
->dlth_thread
== THREAD_NULL
);
1373 /* let caller know */
1374 if (thfunc
!= NULL
) {
1378 inp
->dlth_lock_grp
= lck_grp_alloc_init(inp
->dlth_name
,
1379 dlil_grp_attributes
);
1380 lck_mtx_init(&inp
->dlth_lock
, inp
->dlth_lock_grp
, dlil_lck_attributes
);
1382 inp
->dlth_ifp
= ifp
; /* NULL for main input thread */
1384 * For interfaces that support opportunistic polling, set the
1385 * low and high watermarks for outstanding inbound packets/bytes.
1386 * Also define freeze times for transitioning between modes
1387 * and updating the average.
1389 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1390 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1391 if (ifp
->if_xflags
& IFXF_LEGACY
) {
1392 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1395 limit
= (u_int32_t
)-1;
1398 _qinit(&inp
->dlth_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1399 if (inp
== dlil_main_input_thread
) {
1400 struct dlil_main_threading_info
*inpm
=
1401 (struct dlil_main_threading_info
*)inp
;
1402 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1406 ASSERT(!(ifp
->if_xflags
& IFXF_LEGACY
));
1412 error
= kernel_thread_start(func
, inp
, &inp
->dlth_thread
);
1413 if (error
== KERN_SUCCESS
) {
1414 thread_precedence_policy_data_t info
;
1415 __unused kern_return_t kret
;
1417 bzero(&info
, sizeof(info
));
1418 info
.importance
= 0;
1419 kret
= thread_policy_set(inp
->dlth_thread
,
1420 THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&info
,
1421 THREAD_PRECEDENCE_POLICY_COUNT
);
1422 ASSERT(kret
== KERN_SUCCESS
);
1424 * We create an affinity set so that the matching workloop
1425 * thread or the starter thread (for loopback) can be
1426 * scheduled on the same processor set as the input thread.
1429 struct thread
*tp
= inp
->dlth_thread
;
1432 * Randomize to reduce the probability
1433 * of affinity tag namespace collision.
1435 read_frandom(&tag
, sizeof(tag
));
1436 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1437 thread_reference(tp
);
1438 inp
->dlth_affinity_tag
= tag
;
1439 inp
->dlth_affinity
= TRUE
;
1442 } else if (inp
== dlil_main_input_thread
) {
1443 panic_plain("%s: couldn't create main input thread", __func__
);
1446 panic_plain("%s: couldn't create %s input thread", __func__
,
1450 OSAddAtomic(1, &cur_dlil_input_threads
);
1456 #if TEST_INPUT_THREAD_TERMINATION
1458 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1460 #pragma unused(arg1, arg2)
1464 i
= if_input_thread_termination_spin
;
1466 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
1467 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
1471 if (net_rxpoll
== 0) {
1475 if_input_thread_termination_spin
= i
;
1478 #endif /* TEST_INPUT_THREAD_TERMINATION */
1481 dlil_clean_threading_info(struct dlil_threading_info
*inp
)
1483 lck_mtx_destroy(&inp
->dlth_lock
, inp
->dlth_lock_grp
);
1484 lck_grp_free(inp
->dlth_lock_grp
);
1485 inp
->dlth_lock_grp
= NULL
;
1487 inp
->dlth_flags
= 0;
1489 bzero(inp
->dlth_name
, sizeof(inp
->dlth_name
));
1490 inp
->dlth_ifp
= NULL
;
1491 VERIFY(qhead(&inp
->dlth_pkts
) == NULL
&& qempty(&inp
->dlth_pkts
));
1492 qlimit(&inp
->dlth_pkts
) = 0;
1493 bzero(&inp
->dlth_stats
, sizeof(inp
->dlth_stats
));
1495 VERIFY(!inp
->dlth_affinity
);
1496 inp
->dlth_thread
= THREAD_NULL
;
1497 inp
->dlth_strategy
= NULL
;
1498 VERIFY(inp
->dlth_driver_thread
== THREAD_NULL
);
1499 VERIFY(inp
->dlth_poller_thread
== THREAD_NULL
);
1500 VERIFY(inp
->dlth_affinity_tag
== 0);
1501 #if IFNET_INPUT_SANITY_CHK
1502 inp
->dlth_pkts_cnt
= 0;
1503 #endif /* IFNET_INPUT_SANITY_CHK */
1507 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1509 struct ifnet
*ifp
= inp
->dlth_ifp
;
1510 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
1512 VERIFY(current_thread() == inp
->dlth_thread
);
1513 VERIFY(inp
!= dlil_main_input_thread
);
1515 OSAddAtomic(-1, &cur_dlil_input_threads
);
1517 #if TEST_INPUT_THREAD_TERMINATION
1518 { /* do something useless that won't get optimized away */
1520 for (uint32_t i
= 0;
1521 i
< if_input_thread_termination_spin
;
1525 DLIL_PRINTF("the value is %d\n", v
);
1527 #endif /* TEST_INPUT_THREAD_TERMINATION */
1529 lck_mtx_lock_spin(&inp
->dlth_lock
);
1530 _getq_all(&inp
->dlth_pkts
, &pkt
, NULL
, NULL
, NULL
);
1531 VERIFY((inp
->dlth_flags
& DLIL_INPUT_TERMINATE
) != 0);
1532 inp
->dlth_flags
|= DLIL_INPUT_TERMINATE_COMPLETE
;
1533 wakeup_one((caddr_t
)&inp
->dlth_flags
);
1534 lck_mtx_unlock(&inp
->dlth_lock
);
1536 /* free up pending packets */
1537 if (pkt
.cp_mbuf
!= NULL
) {
1538 mbuf_freem_list(pkt
.cp_mbuf
);
1541 /* for the extra refcnt from kernel_thread_start() */
1542 thread_deallocate(current_thread());
1545 DLIL_PRINTF("%s: input thread terminated\n",
1549 /* this is the end */
1550 thread_terminate(current_thread());
1554 static kern_return_t
1555 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1557 thread_affinity_policy_data_t policy
;
1559 bzero(&policy
, sizeof(policy
));
1560 policy
.affinity_tag
= tag
;
1561 return thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1562 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
);
1568 thread_t thread
= THREAD_NULL
;
1571 * The following fields must be 64-bit aligned for atomic operations.
1573 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1574 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1575 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1576 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1577 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1578 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1579 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1580 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1581 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1582 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1583 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1584 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1585 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1586 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1587 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1589 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1590 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1591 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1592 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1593 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1594 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1595 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1596 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1597 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1598 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1599 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1600 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1601 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1602 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1603 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1606 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1608 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1609 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1610 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1611 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1612 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1613 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1614 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1615 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1616 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1617 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT
== IFNET_CSUM_ZERO_INVERT
);
1618 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1619 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1620 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1621 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1624 * ... as well as the mbuf checksum flags counterparts.
1626 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1627 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1628 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1629 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1630 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1631 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1632 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1633 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1634 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1635 _CASSERT(CSUM_ZERO_INVERT
== IF_HWASSIST_CSUM_ZERO_INVERT
);
1636 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1639 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1641 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1642 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1644 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1645 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1646 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1647 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1649 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1650 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1651 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1653 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1654 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1655 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1656 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1657 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1658 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1659 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1660 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1661 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1662 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1663 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1664 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1665 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1666 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1667 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1668 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1669 _CASSERT(IFRTYPE_FAMILY_6LOWPAN
== IFNET_FAMILY_6LOWPAN
);
1670 _CASSERT(IFRTYPE_FAMILY_UTUN
== IFNET_FAMILY_UTUN
);
1671 _CASSERT(IFRTYPE_FAMILY_IPSEC
== IFNET_FAMILY_IPSEC
);
1673 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1674 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1675 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1676 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1677 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1678 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1679 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC
== IFNET_SUBFAMILY_INTCOPROC
);
1680 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY
== IFNET_SUBFAMILY_QUICKRELAY
);
1681 _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT
== IFNET_SUBFAMILY_DEFAULT
);
1683 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1684 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1686 PE_parse_boot_argn("net_affinity", &net_affinity
,
1687 sizeof(net_affinity
));
1689 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof(net_rxpoll
));
1691 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof(net_rtref
));
1693 PE_parse_boot_argn("net_async", &net_async
, sizeof(net_async
));
1695 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof(ifnet_debug
));
1697 VERIFY(dlil_pending_thread_cnt
== 0);
1698 dlif_size
= (ifnet_debug
== 0) ? sizeof(struct dlil_ifnet
) :
1699 sizeof(struct dlil_ifnet_dbg
);
1700 /* Enforce 64-bit alignment for dlil_ifnet structure */
1701 dlif_bufsize
= dlif_size
+ sizeof(void *) + sizeof(u_int64_t
);
1702 dlif_bufsize
= (uint32_t)P2ROUNDUP(dlif_bufsize
, sizeof(u_int64_t
));
1703 dlif_zone
= zone_create(DLIF_ZONE_NAME
, dlif_bufsize
, ZC_ZFREE_CLEARMEM
);
1705 dlif_tcpstat_size
= sizeof(struct tcpstat_local
);
1706 /* Enforce 64-bit alignment for tcpstat_local structure */
1707 dlif_tcpstat_bufsize
=
1708 dlif_tcpstat_size
+ sizeof(void *) + sizeof(u_int64_t
);
1709 dlif_tcpstat_bufsize
= (uint32_t)
1710 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof(u_int64_t
));
1711 dlif_tcpstat_zone
= zone_create(DLIF_TCPSTAT_ZONE_NAME
,
1712 dlif_tcpstat_bufsize
, ZC_ZFREE_CLEARMEM
);
1714 dlif_udpstat_size
= sizeof(struct udpstat_local
);
1715 /* Enforce 64-bit alignment for udpstat_local structure */
1716 dlif_udpstat_bufsize
=
1717 dlif_udpstat_size
+ sizeof(void *) + sizeof(u_int64_t
);
1718 dlif_udpstat_bufsize
= (uint32_t)
1719 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof(u_int64_t
));
1720 dlif_udpstat_zone
= zone_create(DLIF_UDPSTAT_ZONE_NAME
,
1721 dlif_udpstat_bufsize
, ZC_ZFREE_CLEARMEM
);
1723 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt
);
1725 TAILQ_INIT(&dlil_ifnet_head
);
1726 TAILQ_INIT(&ifnet_head
);
1727 TAILQ_INIT(&ifnet_detaching_head
);
1728 TAILQ_INIT(&ifnet_ordered_head
);
1730 /* Setup the lock groups we will use */
1731 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1733 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1734 dlil_grp_attributes
);
1735 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1736 dlil_grp_attributes
);
1737 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1738 dlil_grp_attributes
);
1739 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1740 dlil_grp_attributes
);
1741 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1742 dlil_grp_attributes
);
1744 /* Setup the lock attributes we will use */
1745 dlil_lck_attributes
= lck_attr_alloc_init();
1747 ifnet_lock_attr
= lck_attr_alloc_init();
1749 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1750 dlil_lck_attributes
);
1751 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1752 lck_mtx_init(&dlil_thread_sync_lock
, dlil_lock_group
, dlil_lck_attributes
);
1754 /* Setup interface flow control related items */
1755 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1757 /* Initialize interface address subsystem */
1761 /* Initialize the packet filter */
1765 /* Initialize queue algorithms */
1768 /* Initialize packet schedulers */
1771 /* Initialize flow advisory subsystem */
1774 /* Initialize the pktap virtual interface */
1777 /* Initialize the service class to dscp map */
1780 /* Initialize the interface port list */
1781 if_ports_used_init();
1783 /* Initialize the interface low power mode event handler */
1784 if_low_power_evhdlr_init();
1786 #if DEBUG || DEVELOPMENT
1787 /* Run self-tests */
1788 dlil_verify_sum16();
1789 #endif /* DEBUG || DEVELOPMENT */
1791 /* Initialize link layer table */
1792 lltable_glbl_init();
1795 * Create and start up the main DLIL input thread and the interface
1796 * detacher threads once everything is initialized.
1798 dlil_incr_pending_thread_count();
1799 (void) dlil_create_input_thread(NULL
, dlil_main_input_thread
, NULL
);
1802 * Create ifnet detacher thread.
1803 * When an interface gets detached, part of the detach processing
1804 * is delayed. The interface is added to delayed detach list
1805 * and this thread is woken up to call ifnet_detach_final
1806 * on these interfaces.
1808 dlil_incr_pending_thread_count();
1809 if (kernel_thread_start(ifnet_detacher_thread_func
,
1810 NULL
, &thread
) != KERN_SUCCESS
) {
1811 panic_plain("%s: couldn't create detacher thread", __func__
);
1814 thread_deallocate(thread
);
1817 * Wait for the created kernel threads for dlil to get
1818 * scheduled and run at least once before we proceed
1820 lck_mtx_lock(&dlil_thread_sync_lock
);
1821 while (dlil_pending_thread_cnt
!= 0) {
1822 DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
1823 "threads to get scheduled at least once.\n", __func__
);
1824 (void) msleep(&dlil_pending_thread_cnt
, &dlil_thread_sync_lock
,
1825 (PZERO
- 1), __func__
, NULL
);
1826 LCK_MTX_ASSERT(&dlil_thread_sync_lock
, LCK_ASSERT_OWNED
);
1828 lck_mtx_unlock(&dlil_thread_sync_lock
);
1829 DLIL_PRINTF("%s: All the created dlil kernel threads have been "
1830 "scheduled at least once. Proceeding.\n", __func__
);
1834 if_flt_monitor_busy(struct ifnet
*ifp
)
1836 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1839 VERIFY(ifp
->if_flt_busy
!= 0);
1843 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1845 if_flt_monitor_leave(ifp
);
1849 if_flt_monitor_enter(struct ifnet
*ifp
)
1851 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1853 while (ifp
->if_flt_busy
) {
1854 ++ifp
->if_flt_waiters
;
1855 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1856 (PZERO
- 1), "if_flt_monitor", NULL
);
1858 if_flt_monitor_busy(ifp
);
1862 if_flt_monitor_leave(struct ifnet
*ifp
)
1864 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1866 VERIFY(ifp
->if_flt_busy
!= 0);
1869 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1870 ifp
->if_flt_waiters
= 0;
1871 wakeup(&ifp
->if_flt_head
);
1875 __private_extern__
int
1876 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1877 interface_filter_t
*filter_ref
, u_int32_t flags
)
1880 struct ifnet_filter
*filter
= NULL
;
1882 ifnet_head_lock_shared();
1883 /* Check that the interface is in the global list */
1884 if (!ifnet_lookup(ifp
)) {
1889 filter
= zalloc_flags(dlif_filt_zone
, Z_WAITOK
| Z_ZERO
);
1890 if (filter
== NULL
) {
1895 /* refcnt held above during lookup */
1896 filter
->filt_flags
= flags
;
1897 filter
->filt_ifp
= ifp
;
1898 filter
->filt_cookie
= if_filter
->iff_cookie
;
1899 filter
->filt_name
= if_filter
->iff_name
;
1900 filter
->filt_protocol
= if_filter
->iff_protocol
;
1902 * Do not install filter callbacks for internal coproc interface
1904 if (!IFNET_IS_INTCOPROC(ifp
)) {
1905 filter
->filt_input
= if_filter
->iff_input
;
1906 filter
->filt_output
= if_filter
->iff_output
;
1907 filter
->filt_event
= if_filter
->iff_event
;
1908 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1910 filter
->filt_detached
= if_filter
->iff_detached
;
1912 lck_mtx_lock(&ifp
->if_flt_lock
);
1913 if_flt_monitor_enter(ifp
);
1915 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1916 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1918 if_flt_monitor_leave(ifp
);
1919 lck_mtx_unlock(&ifp
->if_flt_lock
);
1921 *filter_ref
= filter
;
1924 * Bump filter count and route_generation ID to let TCP
1925 * know it shouldn't do TSO on this connection
1927 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1928 ifnet_filter_update_tso(TRUE
);
1930 OSIncrementAtomic64(&net_api_stats
.nas_iflt_attach_count
);
1931 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_total
);
1932 if ((filter
->filt_flags
& DLIL_IFF_INTERNAL
)) {
1933 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_os_total
);
1936 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp
),
1937 if_filter
->iff_name
);
1941 if (retval
!= 0 && ifp
!= NULL
) {
1942 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1943 if_name(ifp
), if_filter
->iff_name
, retval
);
1945 if (retval
!= 0 && filter
!= NULL
) {
1946 zfree(dlif_filt_zone
, filter
);
1953 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1957 if (detached
== 0) {
1960 ifnet_head_lock_shared();
1961 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1962 interface_filter_t entry
= NULL
;
1964 lck_mtx_lock(&ifp
->if_flt_lock
);
1965 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1966 if (entry
!= filter
|| entry
->filt_skip
) {
1970 * We've found a match; since it's possible
1971 * that the thread gets blocked in the monitor,
1972 * we do the lock dance. Interface should
1973 * not be detached since we still have a use
1974 * count held during filter attach.
1976 entry
->filt_skip
= 1; /* skip input/output */
1977 lck_mtx_unlock(&ifp
->if_flt_lock
);
1980 lck_mtx_lock(&ifp
->if_flt_lock
);
1981 if_flt_monitor_enter(ifp
);
1982 LCK_MTX_ASSERT(&ifp
->if_flt_lock
,
1983 LCK_MTX_ASSERT_OWNED
);
1985 /* Remove the filter from the list */
1986 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
1989 if_flt_monitor_leave(ifp
);
1990 lck_mtx_unlock(&ifp
->if_flt_lock
);
1992 DLIL_PRINTF("%s: %s filter detached\n",
1993 if_name(ifp
), filter
->filt_name
);
1997 lck_mtx_unlock(&ifp
->if_flt_lock
);
2001 /* filter parameter is not a valid filter ref */
2007 DLIL_PRINTF("%s filter detached\n", filter
->filt_name
);
2012 /* Call the detached function if there is one */
2013 if (filter
->filt_detached
) {
2014 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
2018 * Decrease filter count and route_generation ID to let TCP
2019 * know it should reevalute doing TSO or not
2021 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
2022 ifnet_filter_update_tso(FALSE
);
2025 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_iflt_attach_count
) > 0);
2027 /* Free the filter */
2028 zfree(dlif_filt_zone
, filter
);
2031 if (retval
!= 0 && filter
!= NULL
) {
2032 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
2033 filter
->filt_name
, retval
);
2039 __private_extern__
void
2040 dlil_detach_filter(interface_filter_t filter
)
2042 if (filter
== NULL
) {
2045 dlil_detach_filter_internal(filter
, 0);
2048 __private_extern__ boolean_t
2049 dlil_has_ip_filter(void)
2051 boolean_t has_filter
= (net_api_stats
.nas_ipf_add_count
> 0);
2052 DTRACE_IP1(dlil_has_ip_filter
, boolean_t
, has_filter
);
2056 __private_extern__ boolean_t
2057 dlil_has_if_filter(struct ifnet
*ifp
)
2059 boolean_t has_filter
= !TAILQ_EMPTY(&ifp
->if_flt_head
);
2060 DTRACE_IP1(dlil_has_if_filter
, boolean_t
, has_filter
);
2065 dlil_input_wakeup(struct dlil_threading_info
*inp
)
2067 LCK_MTX_ASSERT(&inp
->dlth_lock
, LCK_MTX_ASSERT_OWNED
);
2069 inp
->dlth_flags
|= DLIL_INPUT_WAITING
;
2070 if (!(inp
->dlth_flags
& DLIL_INPUT_RUNNING
)) {
2072 wakeup_one((caddr_t
)&inp
->dlth_flags
);
2076 __attribute__((noreturn
))
2078 dlil_main_input_thread_func(void *v
, wait_result_t w
)
2081 struct dlil_threading_info
*inp
= v
;
2083 VERIFY(inp
== dlil_main_input_thread
);
2084 VERIFY(inp
->dlth_ifp
== NULL
);
2085 VERIFY(current_thread() == inp
->dlth_thread
);
2087 lck_mtx_lock(&inp
->dlth_lock
);
2088 VERIFY(!(inp
->dlth_flags
& (DLIL_INPUT_EMBRYONIC
| DLIL_INPUT_RUNNING
)));
2089 (void) assert_wait(&inp
->dlth_flags
, THREAD_UNINT
);
2090 inp
->dlth_flags
|= DLIL_INPUT_EMBRYONIC
;
2091 /* wake up once to get out of embryonic state */
2092 dlil_input_wakeup(inp
);
2093 lck_mtx_unlock(&inp
->dlth_lock
);
2094 (void) thread_block_parameter(dlil_main_input_thread_cont
, inp
);
2096 __builtin_unreachable();
2100 * Main input thread:
2102 * a) handles all inbound packets for lo0
2103 * b) handles all inbound packets for interfaces with no dedicated
2104 * input thread (e.g. anything but Ethernet/PDP or those that support
2105 * opportunistic polling.)
2106 * c) protocol registrations
2107 * d) packet injections
2109 __attribute__((noreturn
))
2111 dlil_main_input_thread_cont(void *v
, wait_result_t wres
)
2113 struct dlil_main_threading_info
*inpm
= v
;
2114 struct dlil_threading_info
*inp
= v
;
2116 /* main input thread is uninterruptible */
2117 VERIFY(wres
!= THREAD_INTERRUPTED
);
2118 lck_mtx_lock_spin(&inp
->dlth_lock
);
2119 VERIFY(!(inp
->dlth_flags
& (DLIL_INPUT_TERMINATE
|
2120 DLIL_INPUT_RUNNING
)));
2121 inp
->dlth_flags
|= DLIL_INPUT_RUNNING
;
2124 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
2125 u_int32_t m_cnt
, m_cnt_loop
;
2126 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
2127 boolean_t proto_req
;
2128 boolean_t embryonic
;
2130 inp
->dlth_flags
&= ~DLIL_INPUT_WAITING
;
2132 if (__improbable(embryonic
=
2133 (inp
->dlth_flags
& DLIL_INPUT_EMBRYONIC
))) {
2134 inp
->dlth_flags
&= ~DLIL_INPUT_EMBRYONIC
;
2137 proto_req
= (inp
->dlth_flags
&
2138 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
2140 /* Packets for non-dedicated interfaces other than lo0 */
2141 m_cnt
= qlen(&inp
->dlth_pkts
);
2142 _getq_all(&inp
->dlth_pkts
, &pkt
, NULL
, NULL
, NULL
);
2145 /* Packets exclusive to lo0 */
2146 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
2147 _getq_all(&inpm
->lo_rcvq_pkts
, &pkt
, NULL
, NULL
, NULL
);
2148 m_loop
= pkt
.cp_mbuf
;
2152 lck_mtx_unlock(&inp
->dlth_lock
);
2154 if (__improbable(embryonic
)) {
2155 dlil_decr_pending_thread_count();
2159 * NOTE warning %%% attention !!!!
2160 * We should think about putting some thread starvation
2161 * safeguards if we deal with long chains of packets.
2163 if (__probable(m_loop
!= NULL
)) {
2164 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
2165 m_cnt_loop
, IFNET_MODEL_INPUT_POLL_OFF
);
2168 if (__probable(m
!= NULL
)) {
2169 dlil_input_packet_list_extended(NULL
, m
,
2170 m_cnt
, IFNET_MODEL_INPUT_POLL_OFF
);
2173 if (__improbable(proto_req
)) {
2177 lck_mtx_lock_spin(&inp
->dlth_lock
);
2178 VERIFY(inp
->dlth_flags
& DLIL_INPUT_RUNNING
);
2179 /* main input thread cannot be terminated */
2180 VERIFY(!(inp
->dlth_flags
& DLIL_INPUT_TERMINATE
));
2181 if (!(inp
->dlth_flags
& ~DLIL_INPUT_RUNNING
)) {
2186 inp
->dlth_flags
&= ~DLIL_INPUT_RUNNING
;
2187 (void) assert_wait(&inp
->dlth_flags
, THREAD_UNINT
);
2188 lck_mtx_unlock(&inp
->dlth_lock
);
2189 (void) thread_block_parameter(dlil_main_input_thread_cont
, inp
);
2191 VERIFY(0); /* we should never get here */
2193 __builtin_unreachable();
2197 * Input thread for interfaces with legacy input model.
2199 __attribute__((noreturn
))
2201 dlil_input_thread_func(void *v
, wait_result_t w
)
2204 char thread_name
[MAXTHREADNAMESIZE
];
2205 struct dlil_threading_info
*inp
= v
;
2206 struct ifnet
*ifp
= inp
->dlth_ifp
;
2208 VERIFY(inp
!= dlil_main_input_thread
);
2209 VERIFY(ifp
!= NULL
);
2210 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
||
2211 !(ifp
->if_xflags
& IFXF_LEGACY
));
2212 VERIFY(ifp
->if_poll_mode
== IFNET_MODEL_INPUT_POLL_OFF
||
2213 !(ifp
->if_xflags
& IFXF_LEGACY
));
2214 VERIFY(current_thread() == inp
->dlth_thread
);
2216 /* construct the name for this thread, and then apply it */
2217 bzero(thread_name
, sizeof(thread_name
));
2218 (void) snprintf(thread_name
, sizeof(thread_name
),
2219 "dlil_input_%s", ifp
->if_xname
);
2220 thread_set_thread_name(inp
->dlth_thread
, thread_name
);
2222 lck_mtx_lock(&inp
->dlth_lock
);
2223 VERIFY(!(inp
->dlth_flags
& (DLIL_INPUT_EMBRYONIC
| DLIL_INPUT_RUNNING
)));
2224 (void) assert_wait(&inp
->dlth_flags
, THREAD_UNINT
);
2225 inp
->dlth_flags
|= DLIL_INPUT_EMBRYONIC
;
2226 /* wake up once to get out of embryonic state */
2227 dlil_input_wakeup(inp
);
2228 lck_mtx_unlock(&inp
->dlth_lock
);
2229 (void) thread_block_parameter(dlil_input_thread_cont
, inp
);
2231 __builtin_unreachable();
2234 __attribute__((noreturn
))
2236 dlil_input_thread_cont(void *v
, wait_result_t wres
)
2238 struct dlil_threading_info
*inp
= v
;
2239 struct ifnet
*ifp
= inp
->dlth_ifp
;
2241 lck_mtx_lock_spin(&inp
->dlth_lock
);
2242 if (__improbable(wres
== THREAD_INTERRUPTED
||
2243 (inp
->dlth_flags
& DLIL_INPUT_TERMINATE
))) {
2247 VERIFY(!(inp
->dlth_flags
& DLIL_INPUT_RUNNING
));
2248 inp
->dlth_flags
|= DLIL_INPUT_RUNNING
;
2251 struct mbuf
*m
= NULL
;
2252 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
2253 boolean_t notify
= FALSE
;
2254 boolean_t embryonic
;
2257 inp
->dlth_flags
&= ~DLIL_INPUT_WAITING
;
2259 if (__improbable(embryonic
=
2260 (inp
->dlth_flags
& DLIL_INPUT_EMBRYONIC
))) {
2261 inp
->dlth_flags
&= ~DLIL_INPUT_EMBRYONIC
;
2265 * Protocol registration and injection must always use
2266 * the main input thread; in theory the latter can utilize
2267 * the corresponding input thread where the packet arrived
2268 * on, but that requires our knowing the interface in advance
2269 * (and the benefits might not worth the trouble.)
2271 VERIFY(!(inp
->dlth_flags
&
2272 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
)));
2274 /* Packets for this interface */
2275 m_cnt
= qlen(&inp
->dlth_pkts
);
2276 _getq_all(&inp
->dlth_pkts
, &pkt
, NULL
, NULL
, NULL
);
2281 notify
= dlil_input_stats_sync(ifp
, inp
);
2283 lck_mtx_unlock(&inp
->dlth_lock
);
2285 if (__improbable(embryonic
)) {
2286 ifnet_decr_pending_thread_count(ifp
);
2289 if (__improbable(notify
)) {
2290 ifnet_notify_data_threshold(ifp
);
2294 * NOTE warning %%% attention !!!!
2295 * We should think about putting some thread starvation
2296 * safeguards if we deal with long chains of packets.
2298 if (__probable(m
!= NULL
)) {
2299 dlil_input_packet_list_extended(NULL
, m
,
2300 m_cnt
, ifp
->if_poll_mode
);
2303 lck_mtx_lock_spin(&inp
->dlth_lock
);
2304 VERIFY(inp
->dlth_flags
& DLIL_INPUT_RUNNING
);
2305 if (!(inp
->dlth_flags
& ~(DLIL_INPUT_RUNNING
|
2306 DLIL_INPUT_TERMINATE
))) {
2311 inp
->dlth_flags
&= ~DLIL_INPUT_RUNNING
;
2313 if (__improbable(inp
->dlth_flags
& DLIL_INPUT_TERMINATE
)) {
2315 lck_mtx_unlock(&inp
->dlth_lock
);
2316 dlil_terminate_input_thread(inp
);
2319 (void) assert_wait(&inp
->dlth_flags
, THREAD_UNINT
);
2320 lck_mtx_unlock(&inp
->dlth_lock
);
2321 (void) thread_block_parameter(dlil_input_thread_cont
, inp
);
2325 VERIFY(0); /* we should never get here */
2327 __builtin_unreachable();
2331 * Input thread for interfaces with opportunistic polling input model.
2333 __attribute__((noreturn
))
2335 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
2338 char thread_name
[MAXTHREADNAMESIZE
];
2339 struct dlil_threading_info
*inp
= v
;
2340 struct ifnet
*ifp
= inp
->dlth_ifp
;
2342 VERIFY(inp
!= dlil_main_input_thread
);
2343 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
) &&
2344 (ifp
->if_xflags
& IFXF_LEGACY
));
2345 VERIFY(current_thread() == inp
->dlth_thread
);
2347 /* construct the name for this thread, and then apply it */
2348 bzero(thread_name
, sizeof(thread_name
));
2349 (void) snprintf(thread_name
, sizeof(thread_name
),
2350 "dlil_input_poll_%s", ifp
->if_xname
);
2351 thread_set_thread_name(inp
->dlth_thread
, thread_name
);
2353 lck_mtx_lock(&inp
->dlth_lock
);
2354 VERIFY(!(inp
->dlth_flags
& (DLIL_INPUT_EMBRYONIC
| DLIL_INPUT_RUNNING
)));
2355 (void) assert_wait(&inp
->dlth_flags
, THREAD_UNINT
);
2356 inp
->dlth_flags
|= DLIL_INPUT_EMBRYONIC
;
2357 /* wake up once to get out of embryonic state */
2358 dlil_input_wakeup(inp
);
2359 lck_mtx_unlock(&inp
->dlth_lock
);
2360 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont
, inp
);
2362 __builtin_unreachable();
2365 __attribute__((noreturn
))
2367 dlil_rxpoll_input_thread_cont(void *v
, wait_result_t wres
)
2369 struct dlil_threading_info
*inp
= v
;
2370 struct ifnet
*ifp
= inp
->dlth_ifp
;
2373 lck_mtx_lock_spin(&inp
->dlth_lock
);
2374 if (__improbable(wres
== THREAD_INTERRUPTED
||
2375 (inp
->dlth_flags
& DLIL_INPUT_TERMINATE
))) {
2379 VERIFY(!(inp
->dlth_flags
& DLIL_INPUT_RUNNING
));
2380 inp
->dlth_flags
|= DLIL_INPUT_RUNNING
;
2383 struct mbuf
*m
= NULL
;
2384 uint32_t m_cnt
, poll_req
= 0;
2385 uint64_t m_size
= 0;
2387 struct timespec now
, delta
;
2388 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
2390 boolean_t embryonic
;
2393 inp
->dlth_flags
&= ~DLIL_INPUT_WAITING
;
2395 if (__improbable(embryonic
=
2396 (inp
->dlth_flags
& DLIL_INPUT_EMBRYONIC
))) {
2397 inp
->dlth_flags
&= ~DLIL_INPUT_EMBRYONIC
;
2401 if ((ival
= ifp
->if_rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
) {
2402 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2405 /* Link parameters changed? */
2406 if (ifp
->if_poll_update
!= 0) {
2407 ifp
->if_poll_update
= 0;
2408 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
2411 /* Current operating mode */
2412 mode
= ifp
->if_poll_mode
;
2415 * Protocol registration and injection must always use
2416 * the main input thread; in theory the latter can utilize
2417 * the corresponding input thread where the packet arrived
2418 * on, but that requires our knowing the interface in advance
2419 * (and the benefits might not worth the trouble.)
2421 VERIFY(!(inp
->dlth_flags
&
2422 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
)));
2424 /* Total count of all packets */
2425 m_cnt
= qlen(&inp
->dlth_pkts
);
2427 /* Total bytes of all packets */
2428 m_size
= qsize(&inp
->dlth_pkts
);
2430 /* Packets for this interface */
2431 _getq_all(&inp
->dlth_pkts
, &pkt
, NULL
, NULL
, NULL
);
2433 VERIFY(m
!= NULL
|| m_cnt
== 0);
2436 if (!net_timerisset(&ifp
->if_poll_sample_lasttime
)) {
2437 *(&ifp
->if_poll_sample_lasttime
) = *(&now
);
2440 net_timersub(&now
, &ifp
->if_poll_sample_lasttime
, &delta
);
2441 if (if_rxpoll
&& net_timerisset(&ifp
->if_poll_sample_holdtime
)) {
2442 u_int32_t ptot
, btot
;
2444 /* Accumulate statistics for current sampling */
2445 PKTCNTR_ADD(&ifp
->if_poll_sstats
, m_cnt
, m_size
);
2447 if (net_timercmp(&delta
, &ifp
->if_poll_sample_holdtime
, <)) {
2451 *(&ifp
->if_poll_sample_lasttime
) = *(&now
);
2453 /* Calculate min/max of inbound bytes */
2454 btot
= (u_int32_t
)ifp
->if_poll_sstats
.bytes
;
2455 if (ifp
->if_rxpoll_bmin
== 0 || ifp
->if_rxpoll_bmin
> btot
) {
2456 ifp
->if_rxpoll_bmin
= btot
;
2458 if (btot
> ifp
->if_rxpoll_bmax
) {
2459 ifp
->if_rxpoll_bmax
= btot
;
2462 /* Calculate EWMA of inbound bytes */
2463 DLIL_EWMA(ifp
->if_rxpoll_bavg
, btot
, if_rxpoll_decay
);
2465 /* Calculate min/max of inbound packets */
2466 ptot
= (u_int32_t
)ifp
->if_poll_sstats
.packets
;
2467 if (ifp
->if_rxpoll_pmin
== 0 || ifp
->if_rxpoll_pmin
> ptot
) {
2468 ifp
->if_rxpoll_pmin
= ptot
;
2470 if (ptot
> ifp
->if_rxpoll_pmax
) {
2471 ifp
->if_rxpoll_pmax
= ptot
;
2474 /* Calculate EWMA of inbound packets */
2475 DLIL_EWMA(ifp
->if_rxpoll_pavg
, ptot
, if_rxpoll_decay
);
2477 /* Reset sampling statistics */
2478 PKTCNTR_CLEAR(&ifp
->if_poll_sstats
);
2480 /* Calculate EWMA of wakeup requests */
2481 DLIL_EWMA(ifp
->if_rxpoll_wavg
, inp
->dlth_wtot
,
2486 if (!net_timerisset(&ifp
->if_poll_dbg_lasttime
)) {
2487 *(&ifp
->if_poll_dbg_lasttime
) = *(&now
);
2489 net_timersub(&now
, &ifp
->if_poll_dbg_lasttime
, &delta
);
2490 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
2491 *(&ifp
->if_poll_dbg_lasttime
) = *(&now
);
2492 DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
2493 "limits [%d/%d], wreq avg %d "
2494 "limits [%d/%d], bytes avg %d "
2495 "limits [%d/%d]\n", if_name(ifp
),
2496 (ifp
->if_poll_mode
==
2497 IFNET_MODEL_INPUT_POLL_ON
) ?
2498 "ON" : "OFF", ifp
->if_rxpoll_pavg
,
2499 ifp
->if_rxpoll_pmax
,
2500 ifp
->if_rxpoll_plowat
,
2501 ifp
->if_rxpoll_phiwat
,
2502 ifp
->if_rxpoll_wavg
,
2503 ifp
->if_rxpoll_wlowat
,
2504 ifp
->if_rxpoll_whiwat
,
2505 ifp
->if_rxpoll_bavg
,
2506 ifp
->if_rxpoll_blowat
,
2507 ifp
->if_rxpoll_bhiwat
);
2511 /* Perform mode transition, if necessary */
2512 if (!net_timerisset(&ifp
->if_poll_mode_lasttime
)) {
2513 *(&ifp
->if_poll_mode_lasttime
) = *(&now
);
2516 net_timersub(&now
, &ifp
->if_poll_mode_lasttime
, &delta
);
2517 if (net_timercmp(&delta
, &ifp
->if_poll_mode_holdtime
, <)) {
2521 if (ifp
->if_rxpoll_pavg
<= ifp
->if_rxpoll_plowat
&&
2522 ifp
->if_rxpoll_bavg
<= ifp
->if_rxpoll_blowat
&&
2523 ifp
->if_poll_mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2524 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2525 } else if (ifp
->if_rxpoll_pavg
>= ifp
->if_rxpoll_phiwat
&&
2526 (ifp
->if_rxpoll_bavg
>= ifp
->if_rxpoll_bhiwat
||
2527 ifp
->if_rxpoll_wavg
>= ifp
->if_rxpoll_whiwat
) &&
2528 ifp
->if_poll_mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2529 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2532 if (mode
!= ifp
->if_poll_mode
) {
2533 ifp
->if_poll_mode
= mode
;
2534 *(&ifp
->if_poll_mode_lasttime
) = *(&now
);
2539 notify
= dlil_input_stats_sync(ifp
, inp
);
2541 lck_mtx_unlock(&inp
->dlth_lock
);
2543 if (__improbable(embryonic
)) {
2544 ifnet_decr_pending_thread_count(ifp
);
2547 if (__improbable(notify
)) {
2548 ifnet_notify_data_threshold(ifp
);
2552 * If there's a mode change and interface is still attached,
2553 * perform a downcall to the driver for the new mode. Also
2554 * hold an IO refcnt on the interface to prevent it from
2555 * being detached (will be release below.)
2557 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2558 struct ifnet_model_params p
= {
2559 .model
= mode
, .reserved
= { 0 }
2564 DLIL_PRINTF("%s: polling is now %s, "
2565 "pkts avg %d max %d limits [%d/%d], "
2566 "wreq avg %d limits [%d/%d], "
2567 "bytes avg %d limits [%d/%d]\n",
2569 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2570 "ON" : "OFF", ifp
->if_rxpoll_pavg
,
2571 ifp
->if_rxpoll_pmax
, ifp
->if_rxpoll_plowat
,
2572 ifp
->if_rxpoll_phiwat
, ifp
->if_rxpoll_wavg
,
2573 ifp
->if_rxpoll_wlowat
, ifp
->if_rxpoll_whiwat
,
2574 ifp
->if_rxpoll_bavg
, ifp
->if_rxpoll_blowat
,
2575 ifp
->if_rxpoll_bhiwat
);
2578 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2579 IFNET_CTL_SET_INPUT_MODEL
, sizeof(p
), &p
))) != 0) {
2580 DLIL_PRINTF("%s: error setting polling mode "
2581 "to %s (%d)\n", if_name(ifp
),
2582 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2587 case IFNET_MODEL_INPUT_POLL_OFF
:
2588 ifnet_set_poll_cycle(ifp
, NULL
);
2589 ifp
->if_rxpoll_offreq
++;
2591 ifp
->if_rxpoll_offerr
++;
2595 case IFNET_MODEL_INPUT_POLL_ON
:
2596 net_nsectimer(&ival
, &ts
);
2597 ifnet_set_poll_cycle(ifp
, &ts
);
2599 ifp
->if_rxpoll_onreq
++;
2601 ifp
->if_rxpoll_onerr
++;
2610 /* Release the IO refcnt */
2611 ifnet_decr_iorefcnt(ifp
);
2615 * NOTE warning %%% attention !!!!
2616 * We should think about putting some thread starvation
2617 * safeguards if we deal with long chains of packets.
2619 if (__probable(m
!= NULL
)) {
2620 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2623 lck_mtx_lock_spin(&inp
->dlth_lock
);
2624 VERIFY(inp
->dlth_flags
& DLIL_INPUT_RUNNING
);
2625 if (!(inp
->dlth_flags
& ~(DLIL_INPUT_RUNNING
|
2626 DLIL_INPUT_TERMINATE
))) {
2631 inp
->dlth_flags
&= ~DLIL_INPUT_RUNNING
;
2633 if (__improbable(inp
->dlth_flags
& DLIL_INPUT_TERMINATE
)) {
2635 lck_mtx_unlock(&inp
->dlth_lock
);
2636 dlil_terminate_input_thread(inp
);
2639 (void) assert_wait(&inp
->dlth_flags
, THREAD_UNINT
);
2640 lck_mtx_unlock(&inp
->dlth_lock
);
2641 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont
,
2646 VERIFY(0); /* we should never get here */
2648 __builtin_unreachable();
2652 dlil_rxpoll_validate_params(struct ifnet_poll_params
*p
)
2655 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2656 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0)) {
2659 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2660 p
->packets_lowat
>= p
->packets_hiwat
) {
2663 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2664 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0)) {
2667 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2668 p
->bytes_lowat
>= p
->bytes_hiwat
) {
2671 if (p
->interval_time
!= 0 &&
2672 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
) {
2673 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2680 dlil_rxpoll_update_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2682 u_int64_t sample_holdtime
, inbw
;
2684 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2685 sample_holdtime
= 0; /* polling is disabled */
2686 ifp
->if_rxpoll_wlowat
= ifp
->if_rxpoll_plowat
=
2687 ifp
->if_rxpoll_blowat
= 0;
2688 ifp
->if_rxpoll_whiwat
= ifp
->if_rxpoll_phiwat
=
2689 ifp
->if_rxpoll_bhiwat
= (u_int32_t
)-1;
2690 ifp
->if_rxpoll_plim
= 0;
2691 ifp
->if_rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2693 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2697 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2698 if (inbw
< rxpoll_tbl
[i
].speed
) {
2703 /* auto-tune if caller didn't specify a value */
2704 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2705 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2706 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2707 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2708 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2709 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2710 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2711 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2712 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2713 if_rxpoll_max
: p
->packets_limit
);
2714 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2715 if_rxpoll_interval_time
: p
->interval_time
);
2717 VERIFY(plowat
!= 0 && phiwat
!= 0);
2718 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2719 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2721 sample_holdtime
= if_rxpoll_sample_holdtime
;
2722 ifp
->if_rxpoll_wlowat
= if_sysctl_rxpoll_wlowat
;
2723 ifp
->if_rxpoll_whiwat
= if_sysctl_rxpoll_whiwat
;
2724 ifp
->if_rxpoll_plowat
= plowat
;
2725 ifp
->if_rxpoll_phiwat
= phiwat
;
2726 ifp
->if_rxpoll_blowat
= blowat
;
2727 ifp
->if_rxpoll_bhiwat
= bhiwat
;
2728 ifp
->if_rxpoll_plim
= plim
;
2729 ifp
->if_rxpoll_ival
= ival
;
2732 net_nsectimer(&if_rxpoll_mode_holdtime
, &ifp
->if_poll_mode_holdtime
);
2733 net_nsectimer(&sample_holdtime
, &ifp
->if_poll_sample_holdtime
);
2736 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
2737 "poll interval %llu nsec, pkts per poll %u, "
2738 "pkt limits [%u/%u], wreq limits [%u/%u], "
2739 "bytes limits [%u/%u]\n", if_name(ifp
),
2740 inbw
, sample_holdtime
, ifp
->if_rxpoll_ival
,
2741 ifp
->if_rxpoll_plim
, ifp
->if_rxpoll_plowat
,
2742 ifp
->if_rxpoll_phiwat
, ifp
->if_rxpoll_wlowat
,
2743 ifp
->if_rxpoll_whiwat
, ifp
->if_rxpoll_blowat
,
2744 ifp
->if_rxpoll_bhiwat
);
2749 * Must be called on an attached ifnet (caller is expected to check.)
2750 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2753 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2757 struct dlil_threading_info
*inp
;
2759 VERIFY(ifp
!= NULL
);
2760 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
) {
2763 err
= dlil_rxpoll_validate_params(p
);
2769 lck_mtx_lock(&inp
->dlth_lock
);
2771 LCK_MTX_ASSERT(&inp
->dlth_lock
, LCK_MTX_ASSERT_OWNED
);
2773 * Normally, we'd reset the parameters to the auto-tuned values
2774 * if the the input thread detects a change in link rate. If the
2775 * driver provides its own parameters right after a link rate
2776 * changes, but before the input thread gets to run, we want to
2777 * make sure to keep the driver's values. Clearing if_poll_update
2778 * will achieve that.
2780 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0) {
2781 ifp
->if_poll_update
= 0;
2783 dlil_rxpoll_update_params(ifp
, p
);
2785 lck_mtx_unlock(&inp
->dlth_lock
);
2791 * Must be called on an attached ifnet (caller is expected to check.)
2794 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2796 struct dlil_threading_info
*inp
;
2798 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2799 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
) {
2803 bzero(p
, sizeof(*p
));
2805 lck_mtx_lock(&inp
->dlth_lock
);
2806 p
->packets_limit
= ifp
->if_rxpoll_plim
;
2807 p
->packets_lowat
= ifp
->if_rxpoll_plowat
;
2808 p
->packets_hiwat
= ifp
->if_rxpoll_phiwat
;
2809 p
->bytes_lowat
= ifp
->if_rxpoll_blowat
;
2810 p
->bytes_hiwat
= ifp
->if_rxpoll_bhiwat
;
2811 p
->interval_time
= ifp
->if_rxpoll_ival
;
2812 lck_mtx_unlock(&inp
->dlth_lock
);
2818 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2819 const struct ifnet_stat_increment_param
*s
)
2821 return ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
);
2825 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2826 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2828 return ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
);
2832 ifnet_input_poll(struct ifnet
*ifp
, struct mbuf
*m_head
,
2833 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2835 return ifnet_input_common(ifp
, m_head
, m_tail
, s
,
2836 (m_head
!= NULL
), TRUE
);
2840 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2841 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2843 dlil_input_func input_func
;
2844 struct ifnet_stat_increment_param _s
;
2845 u_int32_t m_cnt
= 0, m_size
= 0;
2849 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2850 if (m_head
!= NULL
) {
2851 mbuf_freem_list(m_head
);
2856 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2857 VERIFY(m_tail
== NULL
|| ext
);
2858 VERIFY(s
!= NULL
|| !ext
);
2861 * Drop the packet(s) if the parameters are invalid, or if the
2862 * interface is no longer attached; else hold an IO refcnt to
2863 * prevent it from being detached (will be released below.)
2865 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_datamov_begin(ifp
))) {
2866 if (m_head
!= NULL
) {
2867 mbuf_freem_list(m_head
);
2872 input_func
= ifp
->if_input_dlil
;
2873 VERIFY(input_func
!= NULL
);
2875 if (m_tail
== NULL
) {
2877 while (m_head
!= NULL
) {
2878 #if IFNET_INPUT_SANITY_CHK
2879 if (__improbable(dlil_input_sanity_check
!= 0)) {
2880 DLIL_INPUT_CHECK(last
, ifp
);
2882 #endif /* IFNET_INPUT_SANITY_CHK */
2884 m_size
+= m_length(last
);
2885 if (mbuf_nextpkt(last
) == NULL
) {
2888 last
= mbuf_nextpkt(last
);
2892 #if IFNET_INPUT_SANITY_CHK
2893 if (__improbable(dlil_input_sanity_check
!= 0)) {
2896 DLIL_INPUT_CHECK(last
, ifp
);
2898 m_size
+= m_length(last
);
2899 if (mbuf_nextpkt(last
) == NULL
) {
2902 last
= mbuf_nextpkt(last
);
2905 m_cnt
= s
->packets_in
;
2906 m_size
= s
->bytes_in
;
2910 m_cnt
= s
->packets_in
;
2911 m_size
= s
->bytes_in
;
2913 #endif /* IFNET_INPUT_SANITY_CHK */
2916 if (last
!= m_tail
) {
2917 panic_plain("%s: invalid input packet chain for %s, "
2918 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2923 * Assert packet count only for the extended variant, for backwards
2924 * compatibility, since this came directly from the device driver.
2925 * Relax this assertion for input bytes, as the driver may have
2926 * included the link-layer headers in the computation; hence
2927 * m_size is just an approximation.
2929 if (ext
&& s
->packets_in
!= m_cnt
) {
2930 panic_plain("%s: input packet count mismatch for %s, "
2931 "%d instead of %d\n", __func__
, if_name(ifp
),
2932 s
->packets_in
, m_cnt
);
2936 bzero(&_s
, sizeof(_s
));
2941 _s
.packets_in
= m_cnt
;
2942 _s
.bytes_in
= m_size
;
2944 err
= (*input_func
)(ifp
, m_head
, m_tail
, s
, poll
, current_thread());
2946 if (ifp
!= lo_ifp
) {
2947 /* Release the IO refcnt */
2948 ifnet_datamov_end(ifp
);
2956 dlil_output_handler(struct ifnet
*ifp
, struct mbuf
*m
)
2958 return ifp
->if_output(ifp
, m
);
2962 dlil_input_handler(struct ifnet
*ifp
, struct mbuf
*m_head
,
2963 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
2964 boolean_t poll
, struct thread
*tp
)
2966 struct dlil_threading_info
*inp
= ifp
->if_inp
;
2968 if (__improbable(inp
== NULL
)) {
2969 inp
= dlil_main_input_thread
;
2972 return inp
->dlth_strategy(inp
, ifp
, m_head
, m_tail
, s
, poll
, tp
);
2976 dlil_input_async(struct dlil_threading_info
*inp
,
2977 struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2978 const struct ifnet_stat_increment_param
*s
, boolean_t poll
,
2981 u_int32_t m_cnt
= s
->packets_in
;
2982 u_int32_t m_size
= s
->bytes_in
;
2983 boolean_t notify
= FALSE
;
2986 * If there is a matching DLIL input thread associated with an
2987 * affinity set, associate this thread with the same set. We
2988 * will only do this once.
2990 lck_mtx_lock_spin(&inp
->dlth_lock
);
2991 if (inp
!= dlil_main_input_thread
&& inp
->dlth_affinity
&& tp
!= NULL
&&
2992 ((!poll
&& inp
->dlth_driver_thread
== THREAD_NULL
) ||
2993 (poll
&& inp
->dlth_poller_thread
== THREAD_NULL
))) {
2994 u_int32_t tag
= inp
->dlth_affinity_tag
;
2997 VERIFY(inp
->dlth_poller_thread
== THREAD_NULL
);
2998 inp
->dlth_poller_thread
= tp
;
3000 VERIFY(inp
->dlth_driver_thread
== THREAD_NULL
);
3001 inp
->dlth_driver_thread
= tp
;
3003 lck_mtx_unlock(&inp
->dlth_lock
);
3005 /* Associate the current thread with the new affinity tag */
3006 (void) dlil_affinity_set(tp
, tag
);
3009 * Take a reference on the current thread; during detach,
3010 * we will need to refer to it in order to tear down its
3013 thread_reference(tp
);
3014 lck_mtx_lock_spin(&inp
->dlth_lock
);
3017 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
3020 * Because of loopbacked multicast we cannot stuff the ifp in
3021 * the rcvif of the packet header: loopback (lo0) packets use a
3022 * dedicated list so that we can later associate them with lo_ifp
3023 * on their way up the stack. Packets for other interfaces without
3024 * dedicated input threads go to the regular list.
3026 if (m_head
!= NULL
) {
3027 classq_pkt_t head
, tail
;
3028 CLASSQ_PKT_INIT_MBUF(&head
, m_head
);
3029 CLASSQ_PKT_INIT_MBUF(&tail
, m_tail
);
3030 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
3031 struct dlil_main_threading_info
*inpm
=
3032 (struct dlil_main_threading_info
*)inp
;
3033 _addq_multi(&inpm
->lo_rcvq_pkts
, &head
, &tail
,
3036 _addq_multi(&inp
->dlth_pkts
, &head
, &tail
,
3041 #if IFNET_INPUT_SANITY_CHK
3042 if (__improbable(dlil_input_sanity_check
!= 0)) {
3043 u_int32_t count
= 0, size
= 0;
3046 for (m0
= m_head
; m0
; m0
= mbuf_nextpkt(m0
)) {
3047 size
+= m_length(m0
);
3051 if (count
!= m_cnt
) {
3052 panic_plain("%s: invalid total packet count %u "
3053 "(expected %u)\n", if_name(ifp
), count
, m_cnt
);
3055 __builtin_unreachable();
3056 } else if (size
!= m_size
) {
3057 panic_plain("%s: invalid total packet size %u "
3058 "(expected %u)\n", if_name(ifp
), size
, m_size
);
3060 __builtin_unreachable();
3063 inp
->dlth_pkts_cnt
+= m_cnt
;
3065 #endif /* IFNET_INPUT_SANITY_CHK */
3067 dlil_input_stats_add(s
, inp
, ifp
, poll
);
3069 * If we're using the main input thread, synchronize the
3070 * stats now since we have the interface context. All
3071 * other cases involving dedicated input threads will
3072 * have their stats synchronized there.
3074 if (inp
== dlil_main_input_thread
) {
3075 notify
= dlil_input_stats_sync(ifp
, inp
);
3078 dlil_input_wakeup(inp
);
3079 lck_mtx_unlock(&inp
->dlth_lock
);
3082 ifnet_notify_data_threshold(ifp
);
3089 dlil_input_sync(struct dlil_threading_info
*inp
,
3090 struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
3091 const struct ifnet_stat_increment_param
*s
, boolean_t poll
,
3095 u_int32_t m_cnt
= s
->packets_in
;
3096 u_int32_t m_size
= s
->bytes_in
;
3097 boolean_t notify
= FALSE
;
3098 classq_pkt_t head
, tail
;
3100 ASSERT(inp
!= dlil_main_input_thread
);
3102 /* XXX: should we just assert instead? */
3103 if (__improbable(m_head
== NULL
)) {
3107 CLASSQ_PKT_INIT_MBUF(&head
, m_head
);
3108 CLASSQ_PKT_INIT_MBUF(&tail
, m_tail
);
3110 lck_mtx_lock_spin(&inp
->dlth_lock
);
3111 _addq_multi(&inp
->dlth_pkts
, &head
, &tail
, m_cnt
, m_size
);
3113 #if IFNET_INPUT_SANITY_CHK
3114 if (__improbable(dlil_input_sanity_check
!= 0)) {
3115 u_int32_t count
= 0, size
= 0;
3118 for (m0
= m_head
; m0
; m0
= mbuf_nextpkt(m0
)) {
3119 size
+= m_length(m0
);
3123 if (count
!= m_cnt
) {
3124 panic_plain("%s: invalid total packet count %u "
3125 "(expected %u)\n", if_name(ifp
), count
, m_cnt
);
3127 __builtin_unreachable();
3128 } else if (size
!= m_size
) {
3129 panic_plain("%s: invalid total packet size %u "
3130 "(expected %u)\n", if_name(ifp
), size
, m_size
);
3132 __builtin_unreachable();
3135 inp
->dlth_pkts_cnt
+= m_cnt
;
3137 #endif /* IFNET_INPUT_SANITY_CHK */
3139 dlil_input_stats_add(s
, inp
, ifp
, poll
);
3141 m_cnt
= qlen(&inp
->dlth_pkts
);
3142 _getq_all(&inp
->dlth_pkts
, &head
, NULL
, NULL
, NULL
);
3144 notify
= dlil_input_stats_sync(ifp
, inp
);
3146 lck_mtx_unlock(&inp
->dlth_lock
);
3149 ifnet_notify_data_threshold(ifp
);
3153 * NOTE warning %%% attention !!!!
3154 * We should think about putting some thread starvation
3155 * safeguards if we deal with long chains of packets.
3157 if (head
.cp_mbuf
!= NULL
) {
3158 dlil_input_packet_list_extended(NULL
, head
.cp_mbuf
,
3159 m_cnt
, ifp
->if_poll_mode
);
3167 ifnet_start_common(struct ifnet
*ifp
, boolean_t resetfc
)
3169 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3173 * If the starter thread is inactive, signal it to do work,
3174 * unless the interface is being flow controlled from below,
3175 * e.g. a virtual interface being flow controlled by a real
3176 * network interface beneath it, or it's been disabled via
3177 * a call to ifnet_disable_output().
3179 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3181 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
3182 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
3183 lck_mtx_unlock(&ifp
->if_start_lock
);
3186 ifp
->if_start_req
++;
3187 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
&&
3188 (resetfc
|| !(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) ||
3189 IFCQ_LEN(&ifp
->if_snd
) >= ifp
->if_start_delay_qlen
||
3190 ifp
->if_start_delayed
== 0)) {
3191 (void) wakeup_one((caddr_t
)&ifp
->if_start_thread
);
3193 lck_mtx_unlock(&ifp
->if_start_lock
);
3197 ifnet_start(struct ifnet
*ifp
)
3199 ifnet_start_common(ifp
, FALSE
);
3202 __attribute__((noreturn
))
3204 ifnet_start_thread_func(void *v
, wait_result_t w
)
3207 struct ifnet
*ifp
= v
;
3208 char thread_name
[MAXTHREADNAMESIZE
];
3210 /* Construct the name for this thread, and then apply it. */
3211 bzero(thread_name
, sizeof(thread_name
));
3212 (void) snprintf(thread_name
, sizeof(thread_name
),
3213 "ifnet_start_%s", ifp
->if_xname
);
3214 ASSERT(ifp
->if_start_thread
== current_thread());
3215 thread_set_thread_name(current_thread(), thread_name
);
3218 * Treat the dedicated starter thread for lo0 as equivalent to
3219 * the driver workloop thread; if net_affinity is enabled for
3220 * the main input thread, associate this starter thread to it
3221 * by binding them with the same affinity tag. This is done
3222 * only once (as we only have one lo_ifp which never goes away.)
3224 if (ifp
== lo_ifp
) {
3225 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
3226 struct thread
*tp
= current_thread();
3228 lck_mtx_lock(&inp
->dlth_lock
);
3229 if (inp
->dlth_affinity
) {
3230 u_int32_t tag
= inp
->dlth_affinity_tag
;
3232 VERIFY(inp
->dlth_driver_thread
== THREAD_NULL
);
3233 VERIFY(inp
->dlth_poller_thread
== THREAD_NULL
);
3234 inp
->dlth_driver_thread
= tp
;
3235 lck_mtx_unlock(&inp
->dlth_lock
);
3237 /* Associate this thread with the affinity tag */
3238 (void) dlil_affinity_set(tp
, tag
);
3240 lck_mtx_unlock(&inp
->dlth_lock
);
3244 lck_mtx_lock(&ifp
->if_start_lock
);
3245 VERIFY(!ifp
->if_start_embryonic
&& !ifp
->if_start_active
);
3246 (void) assert_wait(&ifp
->if_start_thread
, THREAD_UNINT
);
3247 ifp
->if_start_embryonic
= 1;
3248 /* wake up once to get out of embryonic state */
3249 ifp
->if_start_req
++;
3250 (void) wakeup_one((caddr_t
)&ifp
->if_start_thread
);
3251 lck_mtx_unlock(&ifp
->if_start_lock
);
3252 (void) thread_block_parameter(ifnet_start_thread_cont
, ifp
);
3254 __builtin_unreachable();
3257 __attribute__((noreturn
))
3259 ifnet_start_thread_cont(void *v
, wait_result_t wres
)
3261 struct ifnet
*ifp
= v
;
3262 struct ifclassq
*ifq
= &ifp
->if_snd
;
3264 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3265 if (__improbable(wres
== THREAD_INTERRUPTED
||
3266 ifp
->if_start_thread
== THREAD_NULL
)) {
3270 if (__improbable(ifp
->if_start_embryonic
)) {
3271 ifp
->if_start_embryonic
= 0;
3272 lck_mtx_unlock(&ifp
->if_start_lock
);
3273 ifnet_decr_pending_thread_count(ifp
);
3274 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3278 ifp
->if_start_active
= 1;
3281 * Keep on servicing until no more request.
3284 u_int32_t req
= ifp
->if_start_req
;
3285 if (!IFCQ_IS_EMPTY(ifq
) &&
3286 (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3287 ifp
->if_start_delayed
== 0 &&
3288 IFCQ_LEN(ifq
) < ifp
->if_start_delay_qlen
&&
3289 (ifp
->if_eflags
& IFEF_DELAY_START
)) {
3290 ifp
->if_start_delayed
= 1;
3291 ifnet_start_delayed
++;
3294 ifp
->if_start_delayed
= 0;
3296 lck_mtx_unlock(&ifp
->if_start_lock
);
3299 * If no longer attached, don't call start because ifp
3300 * is being destroyed; else hold an IO refcnt to
3301 * prevent the interface from being detached (will be
3304 if (!ifnet_datamov_begin(ifp
)) {
3305 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3309 /* invoke the driver's start routine */
3310 ((*ifp
->if_start
)(ifp
));
3313 * Release the io ref count taken above.
3315 ifnet_datamov_end(ifp
);
3317 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3320 * If there's no pending request or if the
3321 * interface has been disabled, we're done.
3323 if (req
== ifp
->if_start_req
||
3324 (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
)) {
3329 ifp
->if_start_req
= 0;
3330 ifp
->if_start_active
= 0;
3333 if (__probable(ifp
->if_start_thread
!= THREAD_NULL
)) {
3334 uint64_t deadline
= TIMEOUT_WAIT_FOREVER
;
3335 struct timespec delay_start_ts
;
3336 struct timespec
*ts
;
3339 * Wakeup N ns from now if rate-controlled by TBR, and if
3340 * there are still packets in the send queue which haven't
3341 * been dequeued so far; else sleep indefinitely (ts = NULL)
3342 * until ifnet_start() is called again.
3344 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
3345 &ifp
->if_start_cycle
: NULL
);
3347 if (ts
== NULL
&& ifp
->if_start_delayed
== 1) {
3348 delay_start_ts
.tv_sec
= 0;
3349 delay_start_ts
.tv_nsec
= ifp
->if_start_delay_timeout
;
3350 ts
= &delay_start_ts
;
3353 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0) {
3357 if (__improbable(ts
!= NULL
)) {
3358 clock_interval_to_deadline((uint32_t)(ts
->tv_nsec
+
3359 (ts
->tv_sec
* NSEC_PER_SEC
)), 1, &deadline
);
3362 (void) assert_wait_deadline(&ifp
->if_start_thread
,
3363 THREAD_UNINT
, deadline
);
3364 lck_mtx_unlock(&ifp
->if_start_lock
);
3365 (void) thread_block_parameter(ifnet_start_thread_cont
, ifp
);
3369 /* interface is detached? */
3370 ifnet_set_start_cycle(ifp
, NULL
);
3371 lck_mtx_unlock(&ifp
->if_start_lock
);
3375 DLIL_PRINTF("%s: starter thread terminated\n",
3379 /* for the extra refcnt from kernel_thread_start() */
3380 thread_deallocate(current_thread());
3381 /* this is the end */
3382 thread_terminate(current_thread());
3386 /* must never get here */
3389 __builtin_unreachable();
3393 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3396 bzero(&ifp
->if_start_cycle
, sizeof(ifp
->if_start_cycle
));
3398 *(&ifp
->if_start_cycle
) = *ts
;
3401 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
) {
3402 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
3403 if_name(ifp
), ts
->tv_nsec
);
3408 ifnet_poll_wakeup(struct ifnet
*ifp
)
3410 LCK_MTX_ASSERT(&ifp
->if_poll_lock
, LCK_MTX_ASSERT_OWNED
);
3413 if (!(ifp
->if_poll_flags
& IF_POLLF_RUNNING
) &&
3414 ifp
->if_poll_thread
!= THREAD_NULL
) {
3415 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
3420 ifnet_poll(struct ifnet
*ifp
)
3423 * If the poller thread is inactive, signal it to do work.
3425 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3426 ifnet_poll_wakeup(ifp
);
3427 lck_mtx_unlock(&ifp
->if_poll_lock
);
3430 __attribute__((noreturn
))
3432 ifnet_poll_thread_func(void *v
, wait_result_t w
)
3435 char thread_name
[MAXTHREADNAMESIZE
];
3436 struct ifnet
*ifp
= v
;
3438 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
3439 VERIFY(current_thread() == ifp
->if_poll_thread
);
3441 /* construct the name for this thread, and then apply it */
3442 bzero(thread_name
, sizeof(thread_name
));
3443 (void) snprintf(thread_name
, sizeof(thread_name
),
3444 "ifnet_poller_%s", ifp
->if_xname
);
3445 thread_set_thread_name(ifp
->if_poll_thread
, thread_name
);
3447 lck_mtx_lock(&ifp
->if_poll_lock
);
3448 VERIFY(!(ifp
->if_poll_flags
& (IF_POLLF_EMBRYONIC
| IF_POLLF_RUNNING
)));
3449 (void) assert_wait(&ifp
->if_poll_thread
, THREAD_UNINT
);
3450 ifp
->if_poll_flags
|= IF_POLLF_EMBRYONIC
;
3451 /* wake up once to get out of embryonic state */
3452 ifnet_poll_wakeup(ifp
);
3453 lck_mtx_unlock(&ifp
->if_poll_lock
);
3454 (void) thread_block_parameter(ifnet_poll_thread_cont
, ifp
);
3456 __builtin_unreachable();
3459 __attribute__((noreturn
))
3461 ifnet_poll_thread_cont(void *v
, wait_result_t wres
)
3463 struct dlil_threading_info
*inp
;
3464 struct ifnet
*ifp
= v
;
3465 struct ifnet_stat_increment_param s
;
3466 struct timespec start_time
;
3468 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
3470 bzero(&s
, sizeof(s
));
3471 net_timerclear(&start_time
);
3473 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3474 if (__improbable(wres
== THREAD_INTERRUPTED
||
3475 ifp
->if_poll_thread
== THREAD_NULL
)) {
3480 VERIFY(inp
!= NULL
);
3482 if (__improbable(ifp
->if_poll_flags
& IF_POLLF_EMBRYONIC
)) {
3483 ifp
->if_poll_flags
&= ~IF_POLLF_EMBRYONIC
;
3484 lck_mtx_unlock(&ifp
->if_poll_lock
);
3485 ifnet_decr_pending_thread_count(ifp
);
3486 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3490 ifp
->if_poll_flags
|= IF_POLLF_RUNNING
;
3493 * Keep on servicing until no more request.
3496 struct mbuf
*m_head
, *m_tail
;
3497 u_int32_t m_lim
, m_cnt
, m_totlen
;
3498 u_int16_t req
= ifp
->if_poll_req
;
3500 m_lim
= (ifp
->if_rxpoll_plim
!= 0) ? ifp
->if_rxpoll_plim
:
3501 MAX((qlimit(&inp
->dlth_pkts
)), (ifp
->if_rxpoll_phiwat
<< 2));
3502 lck_mtx_unlock(&ifp
->if_poll_lock
);
3505 * If no longer attached, there's nothing to do;
3506 * else hold an IO refcnt to prevent the interface
3507 * from being detached (will be released below.)
3509 if (!ifnet_is_attached(ifp
, 1)) {
3510 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3514 if (dlil_verbose
> 1) {
3515 DLIL_PRINTF("%s: polling up to %d pkts, "
3516 "pkts avg %d max %d, wreq avg %d, "
3518 if_name(ifp
), m_lim
,
3519 ifp
->if_rxpoll_pavg
, ifp
->if_rxpoll_pmax
,
3520 ifp
->if_rxpoll_wavg
, ifp
->if_rxpoll_bavg
);
3523 /* invoke the driver's input poll routine */
3524 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
3525 &m_cnt
, &m_totlen
));
3527 if (m_head
!= NULL
) {
3528 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
3530 if (dlil_verbose
> 1) {
3531 DLIL_PRINTF("%s: polled %d pkts, "
3532 "pkts avg %d max %d, wreq avg %d, "
3534 if_name(ifp
), m_cnt
,
3535 ifp
->if_rxpoll_pavg
, ifp
->if_rxpoll_pmax
,
3536 ifp
->if_rxpoll_wavg
, ifp
->if_rxpoll_bavg
);
3539 /* stats are required for extended variant */
3540 s
.packets_in
= m_cnt
;
3541 s
.bytes_in
= m_totlen
;
3543 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
3546 if (dlil_verbose
> 1) {
3547 DLIL_PRINTF("%s: no packets, "
3548 "pkts avg %d max %d, wreq avg %d, "
3550 if_name(ifp
), ifp
->if_rxpoll_pavg
,
3551 ifp
->if_rxpoll_pmax
, ifp
->if_rxpoll_wavg
,
3552 ifp
->if_rxpoll_bavg
);
3555 (void) ifnet_input_common(ifp
, NULL
, NULL
,
3559 /* Release the io ref count */
3560 ifnet_decr_iorefcnt(ifp
);
3562 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3564 /* if there's no pending request, we're done */
3565 if (req
== ifp
->if_poll_req
||
3566 ifp
->if_poll_thread
== THREAD_NULL
) {
3571 ifp
->if_poll_req
= 0;
3572 ifp
->if_poll_flags
&= ~IF_POLLF_RUNNING
;
3574 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
3575 uint64_t deadline
= TIMEOUT_WAIT_FOREVER
;
3576 struct timespec
*ts
;
3579 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3580 * until ifnet_poll() is called again.
3582 ts
= &ifp
->if_poll_cycle
;
3583 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0) {
3588 clock_interval_to_deadline((uint32_t)(ts
->tv_nsec
+
3589 (ts
->tv_sec
* NSEC_PER_SEC
)), 1, &deadline
);
3592 (void) assert_wait_deadline(&ifp
->if_poll_thread
,
3593 THREAD_UNINT
, deadline
);
3594 lck_mtx_unlock(&ifp
->if_poll_lock
);
3595 (void) thread_block_parameter(ifnet_poll_thread_cont
, ifp
);
3599 /* interface is detached (maybe while asleep)? */
3600 ifnet_set_poll_cycle(ifp
, NULL
);
3601 lck_mtx_unlock(&ifp
->if_poll_lock
);
3604 DLIL_PRINTF("%s: poller thread terminated\n",
3608 /* for the extra refcnt from kernel_thread_start() */
3609 thread_deallocate(current_thread());
3610 /* this is the end */
3611 thread_terminate(current_thread());
3615 /* must never get here */
3618 __builtin_unreachable();
3622 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3625 bzero(&ifp
->if_poll_cycle
, sizeof(ifp
->if_poll_cycle
));
3627 *(&ifp
->if_poll_cycle
) = *ts
;
3630 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
) {
3631 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
3632 if_name(ifp
), ts
->tv_nsec
);
3637 ifnet_purge(struct ifnet
*ifp
)
3639 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
)) {
3645 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
3647 IFCQ_LOCK_ASSERT_HELD(ifq
);
3649 if (!(IFCQ_IS_READY(ifq
))) {
3653 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
3654 struct tb_profile tb
= {
3655 .rate
= ifq
->ifcq_tbr
.tbr_rate_raw
,
3656 .percent
= ifq
->ifcq_tbr
.tbr_percent
, .depth
= 0
3658 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
3661 ifclassq_update(ifq
, ev
);
3665 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
3668 case CLASSQ_EV_LINK_BANDWIDTH
:
3669 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
3670 ifp
->if_poll_update
++;
3680 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
3682 struct ifclassq
*ifq
;
3686 if (ifp
== NULL
|| model
>= IFNET_SCHED_MODEL_MAX
) {
3688 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3694 omodel
= ifp
->if_output_sched_model
;
3695 ifp
->if_output_sched_model
= model
;
3696 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0) {
3697 ifp
->if_output_sched_model
= omodel
;
3705 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3709 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3713 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
3719 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3721 if (ifp
== NULL
|| maxqlen
== NULL
) {
3723 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3727 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
3733 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
3737 if (ifp
== NULL
|| pkts
== NULL
) {
3739 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3742 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
3750 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3751 u_int32_t
*pkts
, u_int32_t
*bytes
)
3755 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
3756 (pkts
== NULL
&& bytes
== NULL
)) {
3758 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3761 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
3768 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3770 struct dlil_threading_info
*inp
;
3774 } else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
) {
3779 maxqlen
= if_rcvq_maxlen
;
3780 } else if (maxqlen
< IF_RCVQ_MINLEN
) {
3781 maxqlen
= IF_RCVQ_MINLEN
;
3785 lck_mtx_lock(&inp
->dlth_lock
);
3786 qlimit(&inp
->dlth_pkts
) = maxqlen
;
3787 lck_mtx_unlock(&inp
->dlth_lock
);
3793 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3795 struct dlil_threading_info
*inp
;
3797 if (ifp
== NULL
|| maxqlen
== NULL
) {
3799 } else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
) {
3804 lck_mtx_lock(&inp
->dlth_lock
);
3805 *maxqlen
= qlimit(&inp
->dlth_pkts
);
3806 lck_mtx_unlock(&inp
->dlth_lock
);
3811 ifnet_enqueue_multi_setup(struct ifnet
*ifp
, uint16_t delay_qlen
,
3812 uint16_t delay_timeout
)
3814 if (delay_qlen
> 0 && delay_timeout
> 0) {
3815 if_set_eflags(ifp
, IFEF_ENQUEUE_MULTI
);
3816 ifp
->if_start_delay_qlen
= MIN(100, delay_qlen
);
3817 ifp
->if_start_delay_timeout
= min(20000, delay_timeout
);
3818 /* convert timeout to nanoseconds */
3819 ifp
->if_start_delay_timeout
*= 1000;
3820 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3821 ifp
->if_xname
, (uint32_t)delay_qlen
,
3822 (uint32_t)delay_timeout
);
3824 if_clear_eflags(ifp
, IFEF_ENQUEUE_MULTI
);
3829 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
3830 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
3831 * buf holds the full header.
3833 static __attribute__((noinline
)) void
3834 ifnet_mcast_clear_dscp(uint8_t *buf
, uint8_t ip_ver
)
3837 struct ip6_hdr
*ip6
;
3838 uint8_t lbuf
[64] __attribute__((aligned(8)));
3841 if (ip_ver
== IPVERSION
) {
3845 if (__improbable(!IP_HDR_ALIGNED_P(p
))) {
3846 DTRACE_IP1(not__aligned__v4
, uint8_t *, buf
);
3847 bcopy(buf
, lbuf
, sizeof(struct ip
));
3850 ip
= (struct ip
*)(void *)p
;
3851 if (__probable((ip
->ip_tos
& ~IPTOS_ECN_MASK
) == 0)) {
3855 DTRACE_IP1(clear__v4
, struct ip
*, ip
);
3856 old_tos
= ip
->ip_tos
;
3857 ip
->ip_tos
&= IPTOS_ECN_MASK
;
3858 sum
= ip
->ip_sum
+ htons(old_tos
) - htons(ip
->ip_tos
);
3859 sum
= (sum
>> 16) + (sum
& 0xffff);
3860 ip
->ip_sum
= (uint16_t)(sum
& 0xffff);
3862 if (__improbable(p
== lbuf
)) {
3863 bcopy(lbuf
, buf
, sizeof(struct ip
));
3867 ASSERT(ip_ver
== IPV6_VERSION
);
3869 if (__improbable(!IP_HDR_ALIGNED_P(p
))) {
3870 DTRACE_IP1(not__aligned__v6
, uint8_t *, buf
);
3871 bcopy(buf
, lbuf
, sizeof(struct ip6_hdr
));
3874 ip6
= (struct ip6_hdr
*)(void *)p
;
3875 flow
= ntohl(ip6
->ip6_flow
);
3876 if (__probable((flow
& IP6FLOW_DSCP_MASK
) == 0)) {
3880 DTRACE_IP1(clear__v6
, struct ip6_hdr
*, ip6
);
3881 ip6
->ip6_flow
= htonl(flow
& ~IP6FLOW_DSCP_MASK
);
3883 if (__improbable(p
== lbuf
)) {
3884 bcopy(lbuf
, buf
, sizeof(struct ip6_hdr
));
3889 static inline errno_t
3890 ifnet_enqueue_ifclassq(struct ifnet
*ifp
, classq_pkt_t
*p
, boolean_t flush
,
3893 volatile uint64_t *fg_ts
= NULL
;
3894 volatile uint64_t *rt_ts
= NULL
;
3895 struct timespec now
;
3896 u_int64_t now_nsec
= 0;
3898 uint8_t *mcast_buf
= NULL
;
3902 ASSERT(ifp
->if_eflags
& IFEF_TXSTART
);
3905 * If packet already carries a timestamp, either from dlil_output()
3906 * or from flowswitch, use it here. Otherwise, record timestamp.
3907 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3908 * the timestamp value is used internally there.
3910 switch (p
->cp_ptype
) {
3912 ASSERT(p
->cp_mbuf
->m_flags
& M_PKTHDR
);
3913 ASSERT(p
->cp_mbuf
->m_nextpkt
== NULL
);
3915 if (!(p
->cp_mbuf
->m_pkthdr
.pkt_flags
& PKTF_TS_VALID
) ||
3916 p
->cp_mbuf
->m_pkthdr
.pkt_timestamp
== 0) {
3918 net_timernsec(&now
, &now_nsec
);
3919 p
->cp_mbuf
->m_pkthdr
.pkt_timestamp
= now_nsec
;
3921 p
->cp_mbuf
->m_pkthdr
.pkt_flags
&= ~PKTF_TS_VALID
;
3923 * If the packet service class is not background,
3924 * update the timestamp to indicate recent activity
3925 * on a foreground socket.
3927 if ((p
->cp_mbuf
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
3928 p
->cp_mbuf
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
) {
3929 if (!(p
->cp_mbuf
->m_pkthdr
.pkt_flags
&
3930 PKTF_SO_BACKGROUND
)) {
3931 ifp
->if_fg_sendts
= (uint32_t)_net_uptime
;
3932 if (fg_ts
!= NULL
) {
3933 *fg_ts
= (uint32_t)_net_uptime
;
3936 if (p
->cp_mbuf
->m_pkthdr
.pkt_flags
& PKTF_SO_REALTIME
) {
3937 ifp
->if_rt_sendts
= (uint32_t)_net_uptime
;
3938 if (rt_ts
!= NULL
) {
3939 *rt_ts
= (uint32_t)_net_uptime
;
3943 pktlen
= m_pktlen(p
->cp_mbuf
);
3946 * Some Wi-Fi AP implementations do not correctly handle
3947 * multicast IP packets with DSCP bits set (radr://9331522).
3948 * As a workaround we clear the DSCP bits but keep service
3949 * class (rdar://51507725).
3951 if ((p
->cp_mbuf
->m_flags
& M_MCAST
) != 0 &&
3952 IFNET_IS_WIFI_INFRA(ifp
)) {
3953 size_t len
= mbuf_len(p
->cp_mbuf
), hlen
;
3954 struct ether_header
*eh
;
3955 boolean_t pullup
= FALSE
;
3958 if (__improbable(len
< sizeof(struct ether_header
))) {
3959 DTRACE_IP1(small__ether
, size_t, len
);
3960 if ((p
->cp_mbuf
= m_pullup(p
->cp_mbuf
,
3961 sizeof(struct ether_header
))) == NULL
) {
3965 eh
= (struct ether_header
*)mbuf_data(p
->cp_mbuf
);
3966 etype
= ntohs(eh
->ether_type
);
3967 if (etype
== ETHERTYPE_IP
) {
3968 hlen
= sizeof(struct ether_header
) +
3971 DTRACE_IP1(small__v4
, size_t, len
);
3975 } else if (etype
== ETHERTYPE_IPV6
) {
3976 hlen
= sizeof(struct ether_header
) +
3977 sizeof(struct ip6_hdr
);
3979 DTRACE_IP1(small__v6
, size_t, len
);
3982 ip_ver
= IPV6_VERSION
;
3984 DTRACE_IP1(invalid__etype
, uint16_t, etype
);
3988 if ((p
->cp_mbuf
= m_pullup(p
->cp_mbuf
, (int)hlen
)) ==
3993 eh
= (struct ether_header
*)mbuf_data(
3996 mcast_buf
= (uint8_t *)(eh
+ 1);
3998 * ifnet_mcast_clear_dscp() will finish the work below.
3999 * Note that the pullups above ensure that mcast_buf
4000 * points to a full IP header.
4009 __builtin_unreachable();
4012 if (mcast_buf
!= NULL
) {
4013 ifnet_mcast_clear_dscp(mcast_buf
, ip_ver
);
4016 if (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) {
4017 if (now_nsec
== 0) {
4019 net_timernsec(&now
, &now_nsec
);
4022 * If the driver chose to delay start callback for
4023 * coalescing multiple packets, Then use the following
4024 * heuristics to make sure that start callback will
4025 * be delayed only when bulk data transfer is detected.
4026 * 1. number of packets enqueued in (delay_win * 2) is
4027 * greater than or equal to the delay qlen.
4028 * 2. If delay_start is enabled it will stay enabled for
4029 * another 10 idle windows. This is to take into account
4030 * variable RTT and burst traffic.
4031 * 3. If the time elapsed since last enqueue is more
4032 * than 200ms we disable delaying start callback. This is
4033 * is to take idle time into account.
4035 u_int64_t dwin
= (ifp
->if_start_delay_timeout
<< 1);
4036 if (ifp
->if_start_delay_swin
> 0) {
4037 if ((ifp
->if_start_delay_swin
+ dwin
) > now_nsec
) {
4038 ifp
->if_start_delay_cnt
++;
4039 } else if ((now_nsec
- ifp
->if_start_delay_swin
)
4040 >= (200 * 1000 * 1000)) {
4041 ifp
->if_start_delay_swin
= now_nsec
;
4042 ifp
->if_start_delay_cnt
= 1;
4043 ifp
->if_start_delay_idle
= 0;
4044 if (ifp
->if_eflags
& IFEF_DELAY_START
) {
4045 if_clear_eflags(ifp
, IFEF_DELAY_START
);
4046 ifnet_delay_start_disabled_increment();
4049 if (ifp
->if_start_delay_cnt
>=
4050 ifp
->if_start_delay_qlen
) {
4051 if_set_eflags(ifp
, IFEF_DELAY_START
);
4052 ifp
->if_start_delay_idle
= 0;
4054 if (ifp
->if_start_delay_idle
>= 10) {
4055 if_clear_eflags(ifp
,
4057 ifnet_delay_start_disabled_increment();
4059 ifp
->if_start_delay_idle
++;
4062 ifp
->if_start_delay_swin
= now_nsec
;
4063 ifp
->if_start_delay_cnt
= 1;
4066 ifp
->if_start_delay_swin
= now_nsec
;
4067 ifp
->if_start_delay_cnt
= 1;
4068 ifp
->if_start_delay_idle
= 0;
4069 if_clear_eflags(ifp
, IFEF_DELAY_START
);
4072 if_clear_eflags(ifp
, IFEF_DELAY_START
);
4075 /* enqueue the packet (caller consumes object) */
4076 error
= ifclassq_enqueue(&ifp
->if_snd
, p
, p
, 1, pktlen
, pdrop
);
4079 * Tell the driver to start dequeueing; do this even when the queue
4080 * for the packet is suspended (EQSUSPENDED), as the driver could still
4081 * be dequeueing from other unsuspended queues.
4083 if (!(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
4084 ((error
== 0 && flush
) || error
== EQFULL
|| error
== EQSUSPENDED
)) {
4091 static inline errno_t
4092 ifnet_enqueue_ifclassq_chain(struct ifnet
*ifp
, classq_pkt_t
*head
,
4093 classq_pkt_t
*tail
, uint32_t cnt
, uint32_t bytes
, boolean_t flush
,
4098 /* enqueue the packet (caller consumes object) */
4099 error
= ifclassq_enqueue(&ifp
->if_snd
, head
, tail
, cnt
, bytes
, pdrop
);
4102 * Tell the driver to start dequeueing; do this even when the queue
4103 * for the packet is suspended (EQSUSPENDED), as the driver could still
4104 * be dequeueing from other unsuspended queues.
4106 if ((error
== 0 && flush
) || error
== EQFULL
|| error
== EQSUSPENDED
) {
4113 ifnet_enqueue_netem(void *handle
, pktsched_pkt_t
*pkts
, uint32_t n_pkts
)
4115 struct ifnet
*ifp
= handle
;
4116 boolean_t pdrop
; /* dummy */
4119 ASSERT(n_pkts
>= 1);
4120 for (i
= 0; i
< n_pkts
- 1; i
++) {
4121 (void) ifnet_enqueue_ifclassq(ifp
, &pkts
[i
].pktsched_pkt
,
4124 /* flush with the last packet */
4125 (void) ifnet_enqueue_ifclassq(ifp
, &pkts
[i
].pktsched_pkt
, TRUE
, &pdrop
);
4130 static inline errno_t
4131 ifnet_enqueue_common(struct ifnet
*ifp
, classq_pkt_t
*pkt
, boolean_t flush
,
4134 if (ifp
->if_output_netem
!= NULL
) {
4135 return netem_enqueue(ifp
->if_output_netem
, pkt
, pdrop
);
4137 return ifnet_enqueue_ifclassq(ifp
, pkt
, flush
, pdrop
);
4142 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
4145 return ifnet_enqueue_mbuf(ifp
, m
, TRUE
, &pdrop
);
4149 ifnet_enqueue_mbuf(struct ifnet
*ifp
, struct mbuf
*m
, boolean_t flush
,
4154 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
4155 m
->m_nextpkt
!= NULL
) {
4161 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4162 !IF_FULLY_ATTACHED(ifp
)) {
4163 /* flag tested without lock for performance */
4167 } else if (!(ifp
->if_flags
& IFF_UP
)) {
4173 CLASSQ_PKT_INIT_MBUF(&pkt
, m
);
4174 return ifnet_enqueue_common(ifp
, &pkt
, flush
, pdrop
);
4178 ifnet_enqueue_mbuf_chain(struct ifnet
*ifp
, struct mbuf
*m_head
,
4179 struct mbuf
*m_tail
, uint32_t cnt
, uint32_t bytes
, boolean_t flush
,
4182 classq_pkt_t head
, tail
;
4184 ASSERT(m_head
!= NULL
);
4185 ASSERT((m_head
->m_flags
& M_PKTHDR
) != 0);
4186 ASSERT(m_tail
!= NULL
);
4187 ASSERT((m_tail
->m_flags
& M_PKTHDR
) != 0);
4188 ASSERT(ifp
!= NULL
);
4189 ASSERT((ifp
->if_eflags
& IFEF_TXSTART
) != 0);
4191 if (!IF_FULLY_ATTACHED(ifp
)) {
4192 /* flag tested without lock for performance */
4193 m_freem_list(m_head
);
4196 } else if (!(ifp
->if_flags
& IFF_UP
)) {
4197 m_freem_list(m_head
);
4202 CLASSQ_PKT_INIT_MBUF(&head
, m_head
);
4203 CLASSQ_PKT_INIT_MBUF(&tail
, m_tail
);
4204 return ifnet_enqueue_ifclassq_chain(ifp
, &head
, &tail
, cnt
, bytes
,
4210 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
4213 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
4215 if (ifp
== NULL
|| mp
== NULL
) {
4217 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4218 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4221 if (!ifnet_is_attached(ifp
, 1)) {
4225 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
4226 &pkt
, NULL
, NULL
, NULL
);
4227 VERIFY((pkt
.cp_ptype
== QP_MBUF
) || (pkt
.cp_mbuf
== NULL
));
4228 ifnet_decr_iorefcnt(ifp
);
4234 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
4238 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
4240 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
)) {
4242 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4243 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4246 if (!ifnet_is_attached(ifp
, 1)) {
4250 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1,
4251 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &pkt
, NULL
, NULL
, NULL
);
4252 VERIFY((pkt
.cp_ptype
== QP_MBUF
) || (pkt
.cp_mbuf
== NULL
));
4253 ifnet_decr_iorefcnt(ifp
);
4259 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t pkt_limit
,
4260 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
4263 classq_pkt_t pkt_head
= CLASSQ_PKT_INITIALIZER(pkt_head
);
4264 classq_pkt_t pkt_tail
= CLASSQ_PKT_INITIALIZER(pkt_tail
);
4266 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1) {
4268 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4269 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4272 if (!ifnet_is_attached(ifp
, 1)) {
4276 rc
= ifclassq_dequeue(&ifp
->if_snd
, pkt_limit
,
4277 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &pkt_head
, &pkt_tail
, cnt
, len
);
4278 VERIFY((pkt_head
.cp_ptype
== QP_MBUF
) || (pkt_head
.cp_mbuf
== NULL
));
4279 ifnet_decr_iorefcnt(ifp
);
4280 *head
= pkt_head
.cp_mbuf
;
4282 *tail
= pkt_tail
.cp_mbuf
;
4288 ifnet_dequeue_multi_bytes(struct ifnet
*ifp
, u_int32_t byte_limit
,
4289 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
4292 classq_pkt_t pkt_head
= CLASSQ_PKT_INITIALIZER(pkt_head
);
4293 classq_pkt_t pkt_tail
= CLASSQ_PKT_INITIALIZER(pkt_tail
);
4295 if (ifp
== NULL
|| head
== NULL
|| byte_limit
< 1) {
4297 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4298 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4301 if (!ifnet_is_attached(ifp
, 1)) {
4305 rc
= ifclassq_dequeue(&ifp
->if_snd
, CLASSQ_DEQUEUE_MAX_PKT_LIMIT
,
4306 byte_limit
, &pkt_head
, &pkt_tail
, cnt
, len
);
4307 VERIFY((pkt_head
.cp_ptype
== QP_MBUF
) || (pkt_head
.cp_mbuf
== NULL
));
4308 ifnet_decr_iorefcnt(ifp
);
4309 *head
= pkt_head
.cp_mbuf
;
4311 *tail
= pkt_tail
.cp_mbuf
;
4317 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
4318 u_int32_t pkt_limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
4322 classq_pkt_t pkt_head
= CLASSQ_PKT_INITIALIZER(pkt_head
);
4323 classq_pkt_t pkt_tail
= CLASSQ_PKT_INITIALIZER(pkt_tail
);
4325 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1 ||
4326 !MBUF_VALID_SC(sc
)) {
4328 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4329 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4332 if (!ifnet_is_attached(ifp
, 1)) {
4336 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, pkt_limit
,
4337 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &pkt_head
, &pkt_tail
,
4339 VERIFY((pkt_head
.cp_ptype
== QP_MBUF
) || (pkt_head
.cp_mbuf
== NULL
));
4340 ifnet_decr_iorefcnt(ifp
);
4341 *head
= pkt_head
.cp_mbuf
;
4343 *tail
= pkt_tail
.cp_mbuf
;
4348 #if XNU_TARGET_OS_OSX
4350 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
4351 const struct sockaddr
*dest
, const char *dest_linkaddr
,
4352 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
4361 return ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
);
4363 #endif /* XNU_TARGET_OS_OSX */
4366 packet_has_vlan_tag(struct mbuf
* m
)
4370 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) != 0) {
4371 tag
= EVL_VLANOFTAG(m
->m_pkthdr
.vlan_tag
);
4373 /* the packet is just priority-tagged, clear the bit */
4374 m
->m_pkthdr
.csum_flags
&= ~CSUM_VLAN_TAG_VALID
;
4381 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
4382 char **frame_header_p
, protocol_family_t protocol_family
)
4384 boolean_t is_vlan_packet
= FALSE
;
4385 struct ifnet_filter
*filter
;
4386 struct mbuf
*m
= *m_p
;
4388 is_vlan_packet
= packet_has_vlan_tag(m
);
4390 if (TAILQ_EMPTY(&ifp
->if_flt_head
)) {
4395 * Pass the inbound packet to the interface filters
4397 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4398 /* prevent filter list from changing in case we drop the lock */
4399 if_flt_monitor_busy(ifp
);
4400 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4403 /* exclude VLAN packets from external filters PR-3586856 */
4404 if (is_vlan_packet
&&
4405 (filter
->filt_flags
& DLIL_IFF_INTERNAL
) == 0) {
4409 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
4410 (filter
->filt_protocol
== 0 ||
4411 filter
->filt_protocol
== protocol_family
)) {
4412 lck_mtx_unlock(&ifp
->if_flt_lock
);
4414 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
4415 ifp
, protocol_family
, m_p
, frame_header_p
);
4417 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4419 /* we're done with the filter list */
4420 if_flt_monitor_unbusy(ifp
);
4421 lck_mtx_unlock(&ifp
->if_flt_lock
);
4426 /* we're done with the filter list */
4427 if_flt_monitor_unbusy(ifp
);
4428 lck_mtx_unlock(&ifp
->if_flt_lock
);
4431 * Strip away M_PROTO1 bit prior to sending packet up the stack as
4432 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
4435 (*m_p
)->m_flags
&= ~M_PROTO1
;
4442 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
4443 protocol_family_t protocol_family
)
4445 boolean_t is_vlan_packet
;
4446 struct ifnet_filter
*filter
;
4447 struct mbuf
*m
= *m_p
;
4449 is_vlan_packet
= packet_has_vlan_tag(m
);
4452 * Pass the outbound packet to the interface filters
4454 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4455 /* prevent filter list from changing in case we drop the lock */
4456 if_flt_monitor_busy(ifp
);
4457 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4460 /* exclude VLAN packets from external filters PR-3586856 */
4461 if (is_vlan_packet
&&
4462 (filter
->filt_flags
& DLIL_IFF_INTERNAL
) == 0) {
4466 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
4467 (filter
->filt_protocol
== 0 ||
4468 filter
->filt_protocol
== protocol_family
)) {
4469 lck_mtx_unlock(&ifp
->if_flt_lock
);
4471 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
4472 protocol_family
, m_p
);
4474 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4476 /* we're done with the filter list */
4477 if_flt_monitor_unbusy(ifp
);
4478 lck_mtx_unlock(&ifp
->if_flt_lock
);
4483 /* we're done with the filter list */
4484 if_flt_monitor_unbusy(ifp
);
4485 lck_mtx_unlock(&ifp
->if_flt_lock
);
4491 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
4495 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
4496 /* Version 1 protocols get one packet at a time */
4498 char * frame_header
;
4501 next_packet
= m
->m_nextpkt
;
4502 m
->m_nextpkt
= NULL
;
4503 frame_header
= m
->m_pkthdr
.pkt_hdr
;
4504 m
->m_pkthdr
.pkt_hdr
= NULL
;
4505 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
4506 ifproto
->protocol_family
, m
, frame_header
);
4507 if (error
!= 0 && error
!= EJUSTRETURN
) {
4512 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
4513 /* Version 2 protocols support packet lists */
4514 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
4515 ifproto
->protocol_family
, m
);
4516 if (error
!= 0 && error
!= EJUSTRETURN
) {
4523 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
4524 struct dlil_threading_info
*inp
, struct ifnet
*ifp
, boolean_t poll
)
4526 struct ifnet_stat_increment_param
*d
= &inp
->dlth_stats
;
4528 if (s
->packets_in
!= 0) {
4529 d
->packets_in
+= s
->packets_in
;
4531 if (s
->bytes_in
!= 0) {
4532 d
->bytes_in
+= s
->bytes_in
;
4534 if (s
->errors_in
!= 0) {
4535 d
->errors_in
+= s
->errors_in
;
4538 if (s
->packets_out
!= 0) {
4539 d
->packets_out
+= s
->packets_out
;
4541 if (s
->bytes_out
!= 0) {
4542 d
->bytes_out
+= s
->bytes_out
;
4544 if (s
->errors_out
!= 0) {
4545 d
->errors_out
+= s
->errors_out
;
4548 if (s
->collisions
!= 0) {
4549 d
->collisions
+= s
->collisions
;
4551 if (s
->dropped
!= 0) {
4552 d
->dropped
+= s
->dropped
;
4556 PKTCNTR_ADD(&ifp
->if_poll_tstats
, s
->packets_in
, s
->bytes_in
);
4561 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
4563 struct ifnet_stat_increment_param
*s
= &inp
->dlth_stats
;
4566 * Use of atomic operations is unavoidable here because
4567 * these stats may also be incremented elsewhere via KPIs.
4569 if (s
->packets_in
!= 0) {
4570 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
4573 if (s
->bytes_in
!= 0) {
4574 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
4577 if (s
->errors_in
!= 0) {
4578 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
4582 if (s
->packets_out
!= 0) {
4583 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
4586 if (s
->bytes_out
!= 0) {
4587 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
4590 if (s
->errors_out
!= 0) {
4591 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
4595 if (s
->collisions
!= 0) {
4596 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
4599 if (s
->dropped
!= 0) {
4600 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
4605 * No need for atomic operations as they are modified here
4606 * only from within the DLIL input thread context.
4608 if (ifp
->if_poll_tstats
.packets
!= 0) {
4609 ifp
->if_poll_pstats
.ifi_poll_packets
+= ifp
->if_poll_tstats
.packets
;
4610 ifp
->if_poll_tstats
.packets
= 0;
4612 if (ifp
->if_poll_tstats
.bytes
!= 0) {
4613 ifp
->if_poll_pstats
.ifi_poll_bytes
+= ifp
->if_poll_tstats
.bytes
;
4614 ifp
->if_poll_tstats
.bytes
= 0;
4617 return ifp
->if_data_threshold
!= 0;
4620 __private_extern__
void
4621 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
4623 return dlil_input_packet_list_common(ifp
, m
, 0,
4624 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
);
4627 __private_extern__
void
4628 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
4629 u_int32_t cnt
, ifnet_model_t mode
)
4631 return dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
);
4635 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
4636 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
4639 protocol_family_t protocol_family
;
4641 ifnet_t ifp
= ifp_param
;
4642 char *frame_header
= NULL
;
4643 struct if_proto
*last_ifproto
= NULL
;
4644 mbuf_t pkt_first
= NULL
;
4645 mbuf_t
*pkt_next
= NULL
;
4646 u_int32_t poll_thresh
= 0, poll_ival
= 0;
4649 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4651 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
4652 (poll_ival
= if_rxpoll_interval_pkts
) > 0) {
4657 struct if_proto
*ifproto
= NULL
;
4658 uint32_t pktf_mask
; /* pkt flags to preserve */
4660 if (ifp_param
== NULL
) {
4661 ifp
= m
->m_pkthdr
.rcvif
;
4664 if ((ifp
->if_eflags
& IFEF_RXPOLL
) &&
4665 (ifp
->if_xflags
& IFXF_LEGACY
) && poll_thresh
!= 0 &&
4666 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0) {
4670 /* Check if this mbuf looks valid */
4671 MBUF_INPUT_CHECK(m
, ifp
);
4673 next_packet
= m
->m_nextpkt
;
4674 m
->m_nextpkt
= NULL
;
4675 frame_header
= m
->m_pkthdr
.pkt_hdr
;
4676 m
->m_pkthdr
.pkt_hdr
= NULL
;
4679 * Get an IO reference count if the interface is not
4680 * loopback (lo0) and it is attached; lo0 never goes
4681 * away, so optimize for that.
4683 if (ifp
!= lo_ifp
) {
4684 /* iorefcnt is 0 if it hasn't been taken yet */
4685 if (iorefcnt
== 0) {
4686 if (!ifnet_datamov_begin(ifp
)) {
4693 * Preserve the time stamp and skip pktap flags.
4695 pktf_mask
= PKTF_TS_VALID
| PKTF_SKIP_PKTAP
;
4698 * If this arrived on lo0, preserve interface addr
4699 * info to allow for connectivity between loopback
4700 * and local interface addresses.
4702 pktf_mask
= (PKTF_LOOP
| PKTF_IFAINFO
);
4705 /* make sure packet comes in clean */
4706 m_classifier_init(m
, pktf_mask
);
4708 ifp_inc_traffic_class_in(ifp
, m
);
4710 /* find which protocol family this packet is for */
4711 ifnet_lock_shared(ifp
);
4712 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
4714 ifnet_lock_done(ifp
);
4716 if (error
== EJUSTRETURN
) {
4719 protocol_family
= 0;
4722 pktap_input(ifp
, protocol_family
, m
, frame_header
);
4724 /* Drop v4 packets received on CLAT46 enabled interface */
4725 if (protocol_family
== PF_INET
&& IS_INTF_CLAT46(ifp
)) {
4727 ip6stat
.ip6s_clat464_in_v4_drop
++;
4731 /* Translate the packet if it is received on CLAT interface */
4732 if (protocol_family
== PF_INET6
&& IS_INTF_CLAT46(ifp
)
4733 && dlil_is_clat_needed(protocol_family
, m
)) {
4735 struct ether_header eh
;
4736 struct ether_header
*ehp
= NULL
;
4738 if (ifp
->if_type
== IFT_ETHER
) {
4739 ehp
= (struct ether_header
*)(void *)frame_header
;
4740 /* Skip RX Ethernet packets if they are not IPV6 */
4741 if (ntohs(ehp
->ether_type
) != ETHERTYPE_IPV6
) {
4745 /* Keep a copy of frame_header for Ethernet packets */
4746 bcopy(frame_header
, (caddr_t
)&eh
, ETHER_HDR_LEN
);
4748 error
= dlil_clat64(ifp
, &protocol_family
, &m
);
4749 data
= (char *) mbuf_data(m
);
4752 ip6stat
.ip6s_clat464_in_drop
++;
4755 /* Native v6 should be No-op */
4756 if (protocol_family
!= PF_INET
) {
4760 /* Do this only for translated v4 packets. */
4761 switch (ifp
->if_type
) {
4763 frame_header
= data
;
4767 * Drop if the mbuf doesn't have enough
4768 * space for Ethernet header
4770 if (M_LEADINGSPACE(m
) < ETHER_HDR_LEN
) {
4772 ip6stat
.ip6s_clat464_in_drop
++;
4776 * Set the frame_header ETHER_HDR_LEN bytes
4777 * preceeding the data pointer. Change
4778 * the ether_type too.
4780 frame_header
= data
- ETHER_HDR_LEN
;
4781 eh
.ether_type
= htons(ETHERTYPE_IP
);
4782 bcopy((caddr_t
)&eh
, frame_header
, ETHER_HDR_LEN
);
4787 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
4788 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
)) {
4789 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
4793 * For partial checksum offload, we expect the driver to
4794 * set the start offset indicating the start of the span
4795 * that is covered by the hardware-computed checksum;
4796 * adjust this start offset accordingly because the data
4797 * pointer has been advanced beyond the link-layer header.
4799 * Virtual lan types (bridge, vlan, bond) can call
4800 * dlil_input_packet_list() with the same packet with the
4801 * checksum flags set. Set a flag indicating that the
4802 * adjustment has already been done.
4804 if ((m
->m_pkthdr
.csum_flags
& CSUM_ADJUST_DONE
) != 0) {
4805 /* adjustment has already been done */
4806 } else if ((m
->m_pkthdr
.csum_flags
&
4807 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4808 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4810 if (frame_header
== NULL
||
4811 frame_header
< (char *)mbuf_datastart(m
) ||
4812 frame_header
> (char *)m
->m_data
||
4813 (adj
= (int)(m
->m_data
- frame_header
)) >
4814 m
->m_pkthdr
.csum_rx_start
) {
4815 m
->m_pkthdr
.csum_data
= 0;
4816 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
4817 hwcksum_in_invalidated
++;
4819 m
->m_pkthdr
.csum_rx_start
-= adj
;
4821 /* make sure we don't adjust more than once */
4822 m
->m_pkthdr
.csum_flags
|= CSUM_ADJUST_DONE
;
4825 pktap_input(ifp
, protocol_family
, m
, frame_header
);
4828 if (m
->m_flags
& (M_BCAST
| M_MCAST
)) {
4829 atomic_add_64(&ifp
->if_imcasts
, 1);
4832 /* run interface filters */
4833 error
= dlil_interface_filters_input(ifp
, &m
,
4834 &frame_header
, protocol_family
);
4836 if (error
!= EJUSTRETURN
) {
4841 if ((m
->m_flags
& M_PROMISC
) != 0) {
4846 /* Lookup the protocol attachment to this interface */
4847 if (protocol_family
== 0) {
4849 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
4850 (last_ifproto
->protocol_family
== protocol_family
)) {
4851 VERIFY(ifproto
== NULL
);
4852 ifproto
= last_ifproto
;
4853 if_proto_ref(last_ifproto
);
4855 VERIFY(ifproto
== NULL
);
4856 ifnet_lock_shared(ifp
);
4857 /* callee holds a proto refcnt upon success */
4858 ifproto
= find_attached_proto(ifp
, protocol_family
);
4859 ifnet_lock_done(ifp
);
4861 if (ifproto
== NULL
) {
4862 /* no protocol for this packet, discard */
4866 if (ifproto
!= last_ifproto
) {
4867 if (last_ifproto
!= NULL
) {
4868 /* pass up the list for the previous protocol */
4869 dlil_ifproto_input(last_ifproto
, pkt_first
);
4871 if_proto_free(last_ifproto
);
4873 last_ifproto
= ifproto
;
4874 if_proto_ref(ifproto
);
4876 /* extend the list */
4877 m
->m_pkthdr
.pkt_hdr
= frame_header
;
4878 if (pkt_first
== NULL
) {
4883 pkt_next
= &m
->m_nextpkt
;
4886 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
4887 /* pass up the last list of packets */
4888 dlil_ifproto_input(last_ifproto
, pkt_first
);
4889 if_proto_free(last_ifproto
);
4890 last_ifproto
= NULL
;
4892 if (ifproto
!= NULL
) {
4893 if_proto_free(ifproto
);
4899 /* update the driver's multicast filter, if needed */
4900 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0) {
4901 ifp
->if_updatemcasts
= 0;
4903 if (iorefcnt
== 1) {
4904 /* If the next mbuf is on a different interface, unlock data-mov */
4905 if (!m
|| (ifp
!= ifp_param
&& ifp
!= m
->m_pkthdr
.rcvif
)) {
4906 ifnet_datamov_end(ifp
);
4912 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4916 if_mcasts_update(struct ifnet
*ifp
)
4920 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
4921 if (err
== EAFNOSUPPORT
) {
4924 DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
4925 "(err=%d)\n", if_name(ifp
),
4926 (err
== 0 ? "successfully restored" : "failed to restore"),
4927 ifp
->if_updatemcasts
, err
);
4929 /* just return success */
4933 /* If ifp is set, we will increment the generation for the interface */
4935 dlil_post_complete_msg(struct ifnet
*ifp
, struct kev_msg
*event
)
4938 ifnet_increment_generation(ifp
);
4942 necp_update_all_clients();
4945 return kev_post_msg(event
);
4948 __private_extern__
void
4949 dlil_post_sifflags_msg(struct ifnet
* ifp
)
4951 struct kev_msg ev_msg
;
4952 struct net_event_data ev_data
;
4954 bzero(&ev_data
, sizeof(ev_data
));
4955 bzero(&ev_msg
, sizeof(ev_msg
));
4956 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
4957 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
4958 ev_msg
.kev_subclass
= KEV_DL_SUBCLASS
;
4959 ev_msg
.event_code
= KEV_DL_SIFFLAGS
;
4960 strlcpy(&ev_data
.if_name
[0], ifp
->if_name
, IFNAMSIZ
);
4961 ev_data
.if_family
= ifp
->if_family
;
4962 ev_data
.if_unit
= (u_int32_t
) ifp
->if_unit
;
4963 ev_msg
.dv
[0].data_length
= sizeof(struct net_event_data
);
4964 ev_msg
.dv
[0].data_ptr
= &ev_data
;
4965 ev_msg
.dv
[1].data_length
= 0;
4966 dlil_post_complete_msg(ifp
, &ev_msg
);
4969 #define TMP_IF_PROTO_ARR_SIZE 10
4971 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
, bool update_generation
)
4973 struct ifnet_filter
*filter
= NULL
;
4974 struct if_proto
*proto
= NULL
;
4975 int if_proto_count
= 0;
4976 struct if_proto
**tmp_ifproto_arr
= NULL
;
4977 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
4978 int tmp_ifproto_arr_idx
= 0;
4979 bool tmp_malloc
= false;
4982 * Pass the event to the interface filters
4984 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4985 /* prevent filter list from changing in case we drop the lock */
4986 if_flt_monitor_busy(ifp
);
4987 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4988 if (filter
->filt_event
!= NULL
) {
4989 lck_mtx_unlock(&ifp
->if_flt_lock
);
4991 filter
->filt_event(filter
->filt_cookie
, ifp
,
4992 filter
->filt_protocol
, event
);
4994 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4997 /* we're done with the filter list */
4998 if_flt_monitor_unbusy(ifp
);
4999 lck_mtx_unlock(&ifp
->if_flt_lock
);
5001 /* Get an io ref count if the interface is attached */
5002 if (!ifnet_is_attached(ifp
, 1)) {
5007 * An embedded tmp_list_entry in if_proto may still get
5008 * over-written by another thread after giving up ifnet lock,
5009 * therefore we are avoiding embedded pointers here.
5011 ifnet_lock_shared(ifp
);
5012 if_proto_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
5013 if (if_proto_count
) {
5015 VERIFY(ifp
->if_proto_hash
!= NULL
);
5016 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
5017 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
5019 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
5020 sizeof(*tmp_ifproto_arr
) * if_proto_count
,
5022 if (tmp_ifproto_arr
== NULL
) {
5023 ifnet_lock_done(ifp
);
5029 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
5030 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
5032 if_proto_ref(proto
);
5033 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
5034 tmp_ifproto_arr_idx
++;
5037 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
5039 ifnet_lock_done(ifp
);
5041 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
5042 tmp_ifproto_arr_idx
++) {
5043 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
5044 VERIFY(proto
!= NULL
);
5045 proto_media_event eventp
=
5046 (proto
->proto_kpi
== kProtoKPI_v1
?
5047 proto
->kpi
.v1
.event
:
5048 proto
->kpi
.v2
.event
);
5050 if (eventp
!= NULL
) {
5051 eventp(ifp
, proto
->protocol_family
,
5054 if_proto_free(proto
);
5059 FREE(tmp_ifproto_arr
, M_TEMP
);
5062 /* Pass the event to the interface */
5063 if (ifp
->if_event
!= NULL
) {
5064 ifp
->if_event(ifp
, event
);
5067 /* Release the io ref count */
5068 ifnet_decr_iorefcnt(ifp
);
5070 return dlil_post_complete_msg(update_generation
? ifp
: NULL
, event
);
5074 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
5076 struct kev_msg kev_msg
;
5079 if (ifp
== NULL
|| event
== NULL
) {
5083 bzero(&kev_msg
, sizeof(kev_msg
));
5084 kev_msg
.vendor_code
= event
->vendor_code
;
5085 kev_msg
.kev_class
= event
->kev_class
;
5086 kev_msg
.kev_subclass
= event
->kev_subclass
;
5087 kev_msg
.event_code
= event
->event_code
;
5088 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
5089 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
5090 kev_msg
.dv
[1].data_length
= 0;
5092 result
= dlil_event_internal(ifp
, &kev_msg
, TRUE
);
5098 dlil_count_chain_len(mbuf_t m
, struct chain_len_stats
*cls
)
5111 atomic_add_64(&cls
->cls_one
, 1);
5114 atomic_add_64(&cls
->cls_two
, 1);
5117 atomic_add_64(&cls
->cls_three
, 1);
5120 atomic_add_64(&cls
->cls_four
, 1);
5124 atomic_add_64(&cls
->cls_five_or_more
, 1);
5132 * Caller should have a lock on the protocol domain if the protocol
5133 * doesn't support finer grained locking. In most cases, the lock
5134 * will be held from the socket layer and won't be released until
5135 * we return back to the socket layer.
5137 * This does mean that we must take a protocol lock before we take
5138 * an interface lock if we're going to take both. This makes sense
5139 * because a protocol is likely to interact with an ifp while it
5140 * is under the protocol lock.
5142 * An advisory code will be returned if adv is not null. This
5143 * can be used to provide feedback about interface queues to the
5147 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
5148 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
5150 char *frame_type
= NULL
;
5151 char *dst_linkaddr
= NULL
;
5153 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
5154 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
5155 struct if_proto
*proto
= NULL
;
5157 mbuf_t send_head
= NULL
;
5158 mbuf_t
*send_tail
= &send_head
;
5160 u_int32_t pre
= 0, post
= 0;
5161 u_int32_t fpkts
= 0, fbytes
= 0;
5163 struct timespec now
;
5165 boolean_t did_clat46
= FALSE
;
5166 protocol_family_t old_proto_family
= proto_family
;
5167 struct sockaddr_in6 dest6
;
5168 struct rtentry
*rt
= NULL
;
5169 u_int32_t m_loop_set
= 0;
5171 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
5174 * Get an io refcnt if the interface is attached to prevent ifnet_detach
5175 * from happening while this operation is in progress
5177 if (!ifnet_datamov_begin(ifp
)) {
5183 VERIFY(ifp
->if_output_dlil
!= NULL
);
5185 /* update the driver's multicast filter, if needed */
5186 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0) {
5187 ifp
->if_updatemcasts
= 0;
5190 frame_type
= frame_type_buffer
;
5191 dst_linkaddr
= dst_linkaddr_buffer
;
5194 ifnet_lock_shared(ifp
);
5195 /* callee holds a proto refcnt upon success */
5196 proto
= find_attached_proto(ifp
, proto_family
);
5197 if (proto
== NULL
) {
5198 ifnet_lock_done(ifp
);
5202 ifnet_lock_done(ifp
);
5206 if (packetlist
== NULL
) {
5211 packetlist
= packetlist
->m_nextpkt
;
5212 m
->m_nextpkt
= NULL
;
5215 * Perform address family translation for the first
5216 * packet outside the loop in order to perform address
5217 * lookup for the translated proto family.
5219 if (proto_family
== PF_INET
&& IS_INTF_CLAT46(ifp
) &&
5220 (ifp
->if_type
== IFT_CELLULAR
||
5221 dlil_is_clat_needed(proto_family
, m
))) {
5222 retval
= dlil_clat46(ifp
, &proto_family
, &m
);
5224 * Go to the next packet if translation fails
5229 ip6stat
.ip6s_clat464_out_drop
++;
5230 /* Make sure that the proto family is PF_INET */
5231 ASSERT(proto_family
== PF_INET
);
5235 * Free the old one and make it point to the IPv6 proto structure.
5237 * Change proto for the first time we have successfully
5238 * performed address family translation.
5240 if (!did_clat46
&& proto_family
== PF_INET6
) {
5243 if (proto
!= NULL
) {
5244 if_proto_free(proto
);
5246 ifnet_lock_shared(ifp
);
5247 /* callee holds a proto refcnt upon success */
5248 proto
= find_attached_proto(ifp
, proto_family
);
5249 if (proto
== NULL
) {
5250 ifnet_lock_done(ifp
);
5256 ifnet_lock_done(ifp
);
5257 if (ifp
->if_type
== IFT_ETHER
) {
5258 /* Update the dest to translated v6 address */
5259 dest6
.sin6_len
= sizeof(struct sockaddr_in6
);
5260 dest6
.sin6_family
= AF_INET6
;
5261 dest6
.sin6_addr
= (mtod(m
, struct ip6_hdr
*))->ip6_dst
;
5262 dest
= (const struct sockaddr
*)&dest6
;
5265 * Lookup route to the translated destination
5266 * Free this route ref during cleanup
5268 rt
= rtalloc1_scoped((struct sockaddr
*)&dest6
,
5269 0, 0, ifp
->if_index
);
5277 * This path gets packet chain going to the same destination.
5278 * The pre output routine is used to either trigger resolution of
5279 * the next hop or retreive the next hop's link layer addressing.
5280 * For ex: ether_inet(6)_pre_output routine.
5282 * If the routine returns EJUSTRETURN, it implies that packet has
5283 * been queued, and therefore we have to call preout_again for the
5284 * following packet in the chain.
5286 * For errors other than EJUSTRETURN, the current packet is freed
5287 * and the rest of the chain (pointed by packetlist is freed as
5290 * Else if there is no error the retrieved information is used for
5291 * all the packets in the chain.
5294 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
5295 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
5297 if (preoutp
!= NULL
) {
5298 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
5299 frame_type
, dst_linkaddr
);
5302 if (retval
== EJUSTRETURN
) {
5314 * pkt_hdr is set here to point to m_data prior to
5315 * calling into the framer. This value of pkt_hdr is
5316 * used by the netif gso logic to retrieve the ip header
5317 * for the TCP packets, offloaded for TSO processing.
5319 if ((raw
!= 0) && (ifp
->if_family
== IFNET_FAMILY_ETHERNET
)) {
5320 uint8_t vlan_encap_len
= 0;
5322 if ((old_proto_family
== PF_VLAN
) &&
5323 ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0)) {
5324 vlan_encap_len
= ETHER_VLAN_ENCAP_LEN
;
5326 m
->m_pkthdr
.pkt_hdr
= mtod(m
, char *) + ETHER_HDR_LEN
+ vlan_encap_len
;
5328 m
->m_pkthdr
.pkt_hdr
= mtod(m
, void *);
5332 * Perform address family translation if needed.
5333 * For now we only support stateless 4 to 6 translation
5336 * The routine below translates IP header, updates protocol
5337 * checksum and also translates ICMP.
5339 * We skip the first packet as it is already translated and
5340 * the proto family is set to PF_INET6.
5342 if (proto_family
== PF_INET
&& IS_INTF_CLAT46(ifp
) &&
5343 (ifp
->if_type
== IFT_CELLULAR
||
5344 dlil_is_clat_needed(proto_family
, m
))) {
5345 retval
= dlil_clat46(ifp
, &proto_family
, &m
);
5346 /* Goto the next packet if the translation fails */
5350 ip6stat
.ip6s_clat464_out_drop
++;
5356 if (!raw
&& proto_family
== PF_INET
) {
5357 struct ip
*ip
= mtod(m
, struct ip
*);
5358 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
5359 struct ip
*, ip
, struct ifnet
*, ifp
,
5360 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
5361 } else if (!raw
&& proto_family
== PF_INET6
) {
5362 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
5363 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
5364 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
5365 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
5367 #endif /* CONFIG_DTRACE */
5369 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
5373 * If this is a broadcast packet that needs to be
5374 * looped back into the system, set the inbound ifp
5375 * to that of the outbound ifp. This will allow
5376 * us to determine that it is a legitimate packet
5377 * for the system. Only set the ifp if it's not
5378 * already set, just to be safe.
5380 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
5381 m
->m_pkthdr
.rcvif
== NULL
) {
5382 m
->m_pkthdr
.rcvif
= ifp
;
5385 m_loop_set
= m
->m_flags
& M_LOOP
;
5386 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
5387 frame_type
, &pre
, &post
);
5389 if (retval
!= EJUSTRETURN
) {
5396 * For partial checksum offload, adjust the start
5397 * and stuff offsets based on the prepended header.
5399 if ((m
->m_pkthdr
.csum_flags
&
5400 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
5401 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
5402 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
5403 m
->m_pkthdr
.csum_tx_start
+= pre
;
5406 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
)) {
5407 dlil_output_cksum_dbg(ifp
, m
, pre
,
5412 * Clear the ifp if it was set above, and to be
5413 * safe, only if it is still the same as the
5414 * outbound ifp we have in context. If it was
5415 * looped back, then a copy of it was sent to the
5416 * loopback interface with the rcvif set, and we
5417 * are clearing the one that will go down to the
5420 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
) {
5421 m
->m_pkthdr
.rcvif
= NULL
;
5426 * Let interface filters (if any) do their thing ...
5428 retval
= dlil_interface_filters_output(ifp
, &m
, proto_family
);
5430 if (retval
!= EJUSTRETURN
) {
5436 * Strip away M_PROTO1 bit prior to sending packet
5437 * to the driver as this field may be used by the driver
5439 m
->m_flags
&= ~M_PROTO1
;
5442 * If the underlying interface is not capable of handling a
5443 * packet whose data portion spans across physically disjoint
5444 * pages, we need to "normalize" the packet so that we pass
5445 * down a chain of mbufs where each mbuf points to a span that
5446 * resides in the system page boundary. If the packet does
5447 * not cross page(s), the following is a no-op.
5449 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
5450 if ((m
= m_normalize(m
)) == NULL
) {
5456 * If this is a TSO packet, make sure the interface still
5457 * advertise TSO capability.
5459 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
5465 ifp_inc_traffic_class_out(ifp
, m
);
5467 pktap_output(ifp
, proto_family
, m
, pre
, post
);
5470 * Count the number of elements in the mbuf chain
5472 if (tx_chain_len_count
) {
5473 dlil_count_chain_len(m
, &tx_chain_len_stats
);
5477 * Record timestamp; ifnet_enqueue() will use this info
5478 * rather than redoing the work. An optimization could
5479 * involve doing this just once at the top, if there are
5480 * no interface filters attached, but that's probably
5484 net_timernsec(&now
, &now_nsec
);
5485 (void) mbuf_set_timestamp(m
, now_nsec
, TRUE
);
5488 * Discard partial sum information if this packet originated
5489 * from another interface; the packet would already have the
5490 * final checksum and we shouldn't recompute it.
5492 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) &&
5493 (m
->m_pkthdr
.csum_flags
& (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
5494 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
5495 m
->m_pkthdr
.csum_flags
&= ~CSUM_TX_FLAGS
;
5496 m
->m_pkthdr
.csum_data
= 0;
5500 * Finally, call the driver.
5502 if (ifp
->if_eflags
& (IFEF_SENDLIST
| IFEF_ENQUEUE_MULTI
)) {
5503 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
5504 flen
+= (m_pktlen(m
) - (pre
+ post
));
5505 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
5508 send_tail
= &m
->m_nextpkt
;
5510 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
5511 flen
= (m_pktlen(m
) - (pre
+ post
));
5512 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
5516 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
5518 retval
= (*ifp
->if_output_dlil
)(ifp
, m
);
5519 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
5520 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
5521 adv
->code
= (retval
== EQFULL
?
5522 FADV_FLOW_CONTROLLED
:
5527 if (retval
== 0 && flen
> 0) {
5531 if (retval
!= 0 && dlil_verbose
) {
5532 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5533 __func__
, if_name(ifp
),
5536 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
5539 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
5544 m
->m_flags
|= m_loop_set
;
5545 packetlist
= packetlist
->m_nextpkt
;
5546 m
->m_nextpkt
= NULL
;
5548 /* Reset the proto family to old proto family for CLAT */
5550 proto_family
= old_proto_family
;
5552 } while (m
!= NULL
);
5554 if (send_head
!= NULL
) {
5555 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
5557 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
5558 retval
= (*ifp
->if_output_dlil
)(ifp
, send_head
);
5559 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
5561 adv
->code
= (retval
== EQFULL
?
5562 FADV_FLOW_CONTROLLED
:
5567 if (retval
== 0 && flen
> 0) {
5571 if (retval
!= 0 && dlil_verbose
) {
5572 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5573 __func__
, if_name(ifp
), retval
);
5576 struct mbuf
*send_m
;
5578 VERIFY(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
);
5579 while (send_head
!= NULL
) {
5581 send_head
= send_m
->m_nextpkt
;
5582 send_m
->m_nextpkt
= NULL
;
5583 retval
= (*ifp
->if_output_dlil
)(ifp
, send_m
);
5584 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
5586 adv
->code
= (retval
== EQFULL
?
5587 FADV_FLOW_CONTROLLED
:
5598 if (retval
!= 0 && dlil_verbose
) {
5599 DLIL_PRINTF("%s: output error on %s "
5601 __func__
, if_name(ifp
), retval
);
5609 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
5612 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
5616 ifp
->if_fbytes
+= fbytes
;
5619 ifp
->if_fpackets
+= fpkts
;
5621 if (proto
!= NULL
) {
5622 if_proto_free(proto
);
5624 if (packetlist
) { /* if any packets are left, clean up */
5625 mbuf_freem_list(packetlist
);
5627 if (retval
== EJUSTRETURN
) {
5630 if (iorefcnt
== 1) {
5631 ifnet_datamov_end(ifp
);
5642 * This routine checks if the destination address is not a loopback, link-local,
5643 * multicast or broadcast address.
5646 dlil_is_clat_needed(protocol_family_t proto_family
, mbuf_t m
)
5649 switch (proto_family
) {
5651 struct ip
*iph
= mtod(m
, struct ip
*);
5652 if (CLAT46_NEEDED(ntohl(iph
->ip_dst
.s_addr
))) {
5658 struct ip6_hdr
*ip6h
= mtod(m
, struct ip6_hdr
*);
5659 if ((size_t)m_pktlen(m
) >= sizeof(struct ip6_hdr
) &&
5660 CLAT64_NEEDED(&ip6h
->ip6_dst
)) {
5670 * @brief This routine translates IPv4 packet to IPv6 packet,
5671 * updates protocol checksum and also translates ICMP for code
5672 * along with inner header translation.
5674 * @param ifp Pointer to the interface
5675 * @param proto_family pointer to protocol family. It is updated if function
5676 * performs the translation successfully.
5677 * @param m Pointer to the pointer pointing to the packet. Needed because this
5678 * routine can end up changing the mbuf to a different one.
5680 * @return 0 on success or else a negative value.
5683 dlil_clat46(ifnet_t ifp
, protocol_family_t
*proto_family
, mbuf_t
*m
)
5685 VERIFY(*proto_family
== PF_INET
);
5686 VERIFY(IS_INTF_CLAT46(ifp
));
5688 pbuf_t pbuf_store
, *pbuf
= NULL
;
5689 struct ip
*iph
= NULL
;
5690 struct in_addr osrc
, odst
;
5692 struct in6_ifaddr
*ia6_clat_src
= NULL
;
5693 struct in6_addr
*src
= NULL
;
5694 struct in6_addr dst
;
5697 uint16_t tot_len
= 0;
5698 uint16_t ip_id_val
= 0;
5699 uint16_t ip_frag_off
= 0;
5701 boolean_t is_frag
= FALSE
;
5702 boolean_t is_first_frag
= TRUE
;
5703 boolean_t is_last_frag
= TRUE
;
5705 pbuf_init_mbuf(&pbuf_store
, *m
, ifp
);
5707 iph
= pbuf
->pb_data
;
5712 off
= (uint16_t)(iph
->ip_hl
<< 2);
5713 ip_id_val
= iph
->ip_id
;
5714 ip_frag_off
= ntohs(iph
->ip_off
) & IP_OFFMASK
;
5716 tot_len
= ntohs(iph
->ip_len
);
5719 * For packets that are not first frags
5720 * we only need to adjust CSUM.
5721 * For 4 to 6, Fragmentation header gets appended
5722 * after proto translation.
5724 if (ntohs(iph
->ip_off
) & ~(IP_DF
| IP_RF
)) {
5727 /* If the offset is not zero, it is not first frag */
5728 if (ip_frag_off
!= 0) {
5729 is_first_frag
= FALSE
;
5732 /* If IP_MF is set, then it is not last frag */
5733 if (ntohs(iph
->ip_off
) & IP_MF
) {
5734 is_last_frag
= FALSE
;
5739 * Retrive the local IPv6 CLAT46 address reserved for stateless
5742 ia6_clat_src
= in6ifa_ifpwithflag(ifp
, IN6_IFF_CLAT46
);
5743 if (ia6_clat_src
== NULL
) {
5744 ip6stat
.ip6s_clat464_out_nov6addr_drop
++;
5749 src
= &ia6_clat_src
->ia_addr
.sin6_addr
;
5752 * Translate IPv4 destination to IPv6 destination by using the
5753 * prefixes learned through prior PLAT discovery.
5755 if ((error
= nat464_synthesize_ipv6(ifp
, &odst
, &dst
)) != 0) {
5756 ip6stat
.ip6s_clat464_out_v6synthfail_drop
++;
5760 /* Translate the IP header part first */
5761 error
= (nat464_translate_46(pbuf
, off
, iph
->ip_tos
, iph
->ip_p
,
5762 iph
->ip_ttl
, *src
, dst
, tot_len
) == NT_NAT64
) ? 0 : -1;
5764 iph
= NULL
; /* Invalidate iph as pbuf has been modified */
5767 ip6stat
.ip6s_clat464_out_46transfail_drop
++;
5772 * Translate protocol header, update checksum, checksum flags
5773 * and related fields.
5775 error
= (nat464_translate_proto(pbuf
, (struct nat464_addr
*)&osrc
, (struct nat464_addr
*)&odst
,
5776 proto
, PF_INET
, PF_INET6
, NT_OUT
, !is_first_frag
) == NT_NAT64
) ? 0 : -1;
5779 ip6stat
.ip6s_clat464_out_46proto_transfail_drop
++;
5783 /* Now insert the IPv6 fragment header */
5785 error
= nat464_insert_frag46(pbuf
, ip_id_val
, ip_frag_off
, is_last_frag
);
5788 ip6stat
.ip6s_clat464_out_46frag_transfail_drop
++;
5794 if (ia6_clat_src
!= NULL
) {
5795 IFA_REMREF(&ia6_clat_src
->ia_ifa
);
5798 if (pbuf_is_valid(pbuf
)) {
5800 pbuf
->pb_mbuf
= NULL
;
5804 ip6stat
.ip6s_clat464_out_invalpbuf_drop
++;
5808 *proto_family
= PF_INET6
;
5809 ip6stat
.ip6s_clat464_out_success
++;
5816 * @brief This routine translates incoming IPv6 to IPv4 packet,
5817 * updates protocol checksum and also translates ICMPv6 outer
5820 * @return 0 on success or else a negative value.
5823 dlil_clat64(ifnet_t ifp
, protocol_family_t
*proto_family
, mbuf_t
*m
)
5825 VERIFY(*proto_family
== PF_INET6
);
5826 VERIFY(IS_INTF_CLAT46(ifp
));
5828 struct ip6_hdr
*ip6h
= NULL
;
5829 struct in6_addr osrc
, odst
;
5831 struct in6_ifaddr
*ia6_clat_dst
= NULL
;
5832 struct in_ifaddr
*ia4_clat_dst
= NULL
;
5833 struct in_addr
*dst
= NULL
;
5837 u_int64_t tot_len
= 0;
5839 boolean_t is_first_frag
= TRUE
;
5841 /* Incoming mbuf does not contain valid IP6 header */
5842 if ((size_t)(*m
)->m_pkthdr
.len
< sizeof(struct ip6_hdr
) ||
5843 ((size_t)(*m
)->m_len
< sizeof(struct ip6_hdr
) &&
5844 (*m
= m_pullup(*m
, sizeof(struct ip6_hdr
))) == NULL
)) {
5845 ip6stat
.ip6s_clat464_in_tooshort_drop
++;
5849 ip6h
= mtod(*m
, struct ip6_hdr
*);
5850 /* Validate that mbuf contains IP payload equal to ip6_plen */
5851 if ((size_t)(*m
)->m_pkthdr
.len
< ntohs(ip6h
->ip6_plen
) + sizeof(struct ip6_hdr
)) {
5852 ip6stat
.ip6s_clat464_in_tooshort_drop
++;
5856 osrc
= ip6h
->ip6_src
;
5857 odst
= ip6h
->ip6_dst
;
5860 * Retrieve the local CLAT46 reserved IPv6 address.
5861 * Let the packet pass if we don't find one, as the flag
5862 * may get set before IPv6 configuration has taken place.
5864 ia6_clat_dst
= in6ifa_ifpwithflag(ifp
, IN6_IFF_CLAT46
);
5865 if (ia6_clat_dst
== NULL
) {
5870 * Check if the original dest in the packet is same as the reserved
5871 * CLAT46 IPv6 address
5873 if (IN6_ARE_ADDR_EQUAL(&odst
, &ia6_clat_dst
->ia_addr
.sin6_addr
)) {
5874 pbuf_t pbuf_store
, *pbuf
= NULL
;
5875 pbuf_init_mbuf(&pbuf_store
, *m
, ifp
);
5879 * Retrive the local CLAT46 IPv4 address reserved for stateless
5882 ia4_clat_dst
= inifa_ifpclatv4(ifp
);
5883 if (ia4_clat_dst
== NULL
) {
5884 IFA_REMREF(&ia6_clat_dst
->ia_ifa
);
5885 ip6stat
.ip6s_clat464_in_nov4addr_drop
++;
5889 IFA_REMREF(&ia6_clat_dst
->ia_ifa
);
5891 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5892 dst
= &ia4_clat_dst
->ia_addr
.sin_addr
;
5893 if ((error
= nat464_synthesize_ipv4(ifp
, &osrc
, &src
)) != 0) {
5894 ip6stat
.ip6s_clat464_in_v4synthfail_drop
++;
5899 ip6h
= pbuf
->pb_data
;
5900 off
= sizeof(struct ip6_hdr
);
5901 proto
= ip6h
->ip6_nxt
;
5902 tos
= (ntohl(ip6h
->ip6_flow
) >> 20) & 0xff;
5903 tot_len
= ntohs(ip6h
->ip6_plen
) + sizeof(struct ip6_hdr
);
5906 * Translate the IP header and update the fragmentation
5909 error
= (nat464_translate_64(pbuf
, off
, tos
, &proto
,
5910 ip6h
->ip6_hlim
, src
, *dst
, tot_len
, &is_first_frag
) == NT_NAT64
) ?
5913 ip6h
= NULL
; /* Invalidate ip6h as pbuf has been changed */
5916 ip6stat
.ip6s_clat464_in_64transfail_drop
++;
5921 * Translate protocol header, update checksum, checksum flags
5922 * and related fields.
5924 error
= (nat464_translate_proto(pbuf
, (struct nat464_addr
*)&osrc
,
5925 (struct nat464_addr
*)&odst
, proto
, PF_INET6
, PF_INET
,
5926 NT_IN
, !is_first_frag
) == NT_NAT64
) ? 0 : -1;
5929 ip6stat
.ip6s_clat464_in_64proto_transfail_drop
++;
5934 if (ia4_clat_dst
!= NULL
) {
5935 IFA_REMREF(&ia4_clat_dst
->ia_ifa
);
5938 if (pbuf_is_valid(pbuf
)) {
5940 pbuf
->pb_mbuf
= NULL
;
5944 ip6stat
.ip6s_clat464_in_invalpbuf_drop
++;
5948 *proto_family
= PF_INET
;
5949 ip6stat
.ip6s_clat464_in_success
++;
5951 } /* CLAT traffic */
5958 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
5961 struct ifnet_filter
*filter
;
5962 int retval
= EOPNOTSUPP
;
5965 if (ifp
== NULL
|| ioctl_code
== 0) {
5969 /* Get an io ref count if the interface is attached */
5970 if (!ifnet_is_attached(ifp
, 1)) {
5975 * Run the interface filters first.
5976 * We want to run all filters before calling the protocol,
5977 * interface family, or interface.
5979 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5980 /* prevent filter list from changing in case we drop the lock */
5981 if_flt_monitor_busy(ifp
);
5982 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
5983 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
5984 filter
->filt_protocol
== proto_fam
)) {
5985 lck_mtx_unlock(&ifp
->if_flt_lock
);
5987 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
5988 proto_fam
, ioctl_code
, ioctl_arg
);
5990 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5992 /* Only update retval if no one has handled the ioctl */
5993 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
5994 if (result
== ENOTSUP
) {
5995 result
= EOPNOTSUPP
;
5998 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
5999 /* we're done with the filter list */
6000 if_flt_monitor_unbusy(ifp
);
6001 lck_mtx_unlock(&ifp
->if_flt_lock
);
6007 /* we're done with the filter list */
6008 if_flt_monitor_unbusy(ifp
);
6009 lck_mtx_unlock(&ifp
->if_flt_lock
);
6011 /* Allow the protocol to handle the ioctl */
6012 if (proto_fam
!= 0) {
6013 struct if_proto
*proto
;
6015 /* callee holds a proto refcnt upon success */
6016 ifnet_lock_shared(ifp
);
6017 proto
= find_attached_proto(ifp
, proto_fam
);
6018 ifnet_lock_done(ifp
);
6019 if (proto
!= NULL
) {
6020 proto_media_ioctl ioctlp
=
6021 (proto
->proto_kpi
== kProtoKPI_v1
?
6022 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
6023 result
= EOPNOTSUPP
;
6024 if (ioctlp
!= NULL
) {
6025 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
6028 if_proto_free(proto
);
6030 /* Only update retval if no one has handled the ioctl */
6031 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
6032 if (result
== ENOTSUP
) {
6033 result
= EOPNOTSUPP
;
6036 if (retval
&& retval
!= EOPNOTSUPP
) {
6043 /* retval is either 0 or EOPNOTSUPP */
6046 * Let the interface handle this ioctl.
6047 * If it returns EOPNOTSUPP, ignore that, we may have
6048 * already handled this in the protocol or family.
6050 if (ifp
->if_ioctl
) {
6051 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
6054 /* Only update retval if no one has handled the ioctl */
6055 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
6056 if (result
== ENOTSUP
) {
6057 result
= EOPNOTSUPP
;
6060 if (retval
&& retval
!= EOPNOTSUPP
) {
6066 if (retval
== EJUSTRETURN
) {
6070 ifnet_decr_iorefcnt(ifp
);
6075 __private_extern__ errno_t
6076 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
6081 if (ifp
->if_set_bpf_tap
) {
6082 /* Get an io reference on the interface if it is attached */
6083 if (!ifnet_is_attached(ifp
, 1)) {
6086 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
6087 ifnet_decr_iorefcnt(ifp
);
6093 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
6094 struct sockaddr
*ll_addr
, size_t ll_len
)
6096 errno_t result
= EOPNOTSUPP
;
6097 struct if_proto
*proto
;
6098 const struct sockaddr
*verify
;
6099 proto_media_resolve_multi resolvep
;
6101 if (!ifnet_is_attached(ifp
, 1)) {
6105 bzero(ll_addr
, ll_len
);
6107 /* Call the protocol first; callee holds a proto refcnt upon success */
6108 ifnet_lock_shared(ifp
);
6109 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
6110 ifnet_lock_done(ifp
);
6111 if (proto
!= NULL
) {
6112 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
6113 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
6114 if (resolvep
!= NULL
) {
6115 result
= resolvep(ifp
, proto_addr
,
6116 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
6118 if_proto_free(proto
);
6121 /* Let the interface verify the multicast address */
6122 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
6126 verify
= proto_addr
;
6128 result
= ifp
->if_check_multi(ifp
, verify
);
6131 ifnet_decr_iorefcnt(ifp
);
6135 __private_extern__ errno_t
6136 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
6137 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
6138 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
6140 struct if_proto
*proto
;
6143 /* callee holds a proto refcnt upon success */
6144 ifnet_lock_shared(ifp
);
6145 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
6146 ifnet_lock_done(ifp
);
6147 if (proto
== NULL
) {
6150 proto_media_send_arp arpp
;
6151 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
6152 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
6158 arpstat
.txrequests
++;
6159 if (target_hw
!= NULL
) {
6160 arpstat
.txurequests
++;
6164 arpstat
.txreplies
++;
6167 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
6168 target_hw
, target_proto
);
6170 if_proto_free(proto
);
6176 struct net_thread_marks
{ };
6177 static const struct net_thread_marks net_thread_marks_base
= { };
6179 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
6180 &net_thread_marks_base
;
6182 __private_extern__ net_thread_marks_t
6183 net_thread_marks_push(u_int32_t push
)
6185 static const char *const base
= (const void*)&net_thread_marks_base
;
6189 struct uthread
*uth
= get_bsdthread_info(current_thread());
6191 pop
= push
& ~uth
->uu_network_marks
;
6193 uth
->uu_network_marks
|= pop
;
6197 return (net_thread_marks_t
)&base
[pop
];
6200 __private_extern__ net_thread_marks_t
6201 net_thread_unmarks_push(u_int32_t unpush
)
6203 static const char *const base
= (const void*)&net_thread_marks_base
;
6204 u_int32_t unpop
= 0;
6207 struct uthread
*uth
= get_bsdthread_info(current_thread());
6209 unpop
= unpush
& uth
->uu_network_marks
;
6211 uth
->uu_network_marks
&= ~unpop
;
6215 return (net_thread_marks_t
)&base
[unpop
];
6218 __private_extern__
void
6219 net_thread_marks_pop(net_thread_marks_t popx
)
6221 static const char *const base
= (const void*)&net_thread_marks_base
;
6222 const ptrdiff_t pop
= (const char *)popx
- (const char *)base
;
6225 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
6226 struct uthread
*uth
= get_bsdthread_info(current_thread());
6228 VERIFY((pop
& ones
) == pop
);
6229 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
6230 uth
->uu_network_marks
&= ~pop
;
6234 __private_extern__
void
6235 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
6237 static const char *const base
= (const void*)&net_thread_marks_base
;
6238 ptrdiff_t unpop
= (const char *)unpopx
- (const char *)base
;
6241 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
6242 struct uthread
*uth
= get_bsdthread_info(current_thread());
6244 VERIFY((unpop
& ones
) == unpop
);
6245 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
6246 uth
->uu_network_marks
|= unpop
;
6250 __private_extern__ u_int32_t
6251 net_thread_is_marked(u_int32_t check
)
6254 struct uthread
*uth
= get_bsdthread_info(current_thread());
6255 return uth
->uu_network_marks
& check
;
6261 __private_extern__ u_int32_t
6262 net_thread_is_unmarked(u_int32_t check
)
6265 struct uthread
*uth
= get_bsdthread_info(current_thread());
6266 return ~uth
->uu_network_marks
& check
;
6272 static __inline__
int
6273 _is_announcement(const struct sockaddr_in
* sender_sin
,
6274 const struct sockaddr_in
* target_sin
)
6276 if (target_sin
== NULL
|| sender_sin
== NULL
) {
6280 return sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
;
6283 __private_extern__ errno_t
6284 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
*sender_hw
,
6285 const struct sockaddr
*sender_proto
, const struct sockaddr_dl
*target_hw
,
6286 const struct sockaddr
*target_proto0
, u_int32_t rtflags
)
6289 const struct sockaddr_in
* sender_sin
;
6290 const struct sockaddr_in
* target_sin
;
6291 struct sockaddr_inarp target_proto_sinarp
;
6292 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
6294 if (target_proto
== NULL
|| sender_proto
== NULL
) {
6298 if (sender_proto
->sa_family
!= target_proto
->sa_family
) {
6303 * If the target is a (default) router, provide that
6304 * information to the send_arp callback routine.
6306 if (rtflags
& RTF_ROUTER
) {
6307 bcopy(target_proto
, &target_proto_sinarp
,
6308 sizeof(struct sockaddr_in
));
6309 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
6310 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
6314 * If this is an ARP request and the target IP is IPv4LL,
6315 * send the request on all interfaces. The exception is
6316 * an announcement, which must only appear on the specific
6319 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
6320 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
6321 if (target_proto
->sa_family
== AF_INET
&&
6322 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
6323 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
6324 !_is_announcement(sender_sin
, target_sin
)) {
6331 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
6332 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
6334 ifaddr_t source_hw
= NULL
;
6335 ifaddr_t source_ip
= NULL
;
6336 struct sockaddr_in source_ip_copy
;
6337 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
6340 * Only arp on interfaces marked for IPv4LL
6341 * ARPing. This may mean that we don't ARP on
6342 * the interface the subnet route points to.
6344 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
)) {
6348 /* Find the source IP address */
6349 ifnet_lock_shared(cur_ifp
);
6350 source_hw
= cur_ifp
->if_lladdr
;
6351 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
6353 IFA_LOCK(source_ip
);
6354 if (source_ip
->ifa_addr
!= NULL
&&
6355 source_ip
->ifa_addr
->sa_family
==
6357 /* Copy the source IP address */
6359 *(struct sockaddr_in
*)
6360 (void *)source_ip
->ifa_addr
;
6361 IFA_UNLOCK(source_ip
);
6364 IFA_UNLOCK(source_ip
);
6367 /* No IP Source, don't arp */
6368 if (source_ip
== NULL
) {
6369 ifnet_lock_done(cur_ifp
);
6373 IFA_ADDREF(source_hw
);
6374 ifnet_lock_done(cur_ifp
);
6377 new_result
= dlil_send_arp_internal(cur_ifp
,
6378 arpop
, (struct sockaddr_dl
*)(void *)
6379 source_hw
->ifa_addr
,
6380 (struct sockaddr
*)&source_ip_copy
, NULL
,
6383 IFA_REMREF(source_hw
);
6384 if (result
== ENOTSUP
) {
6385 result
= new_result
;
6388 ifnet_list_free(ifp_list
);
6391 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
6392 sender_proto
, target_hw
, target_proto
);
6399 * Caller must hold ifnet head lock.
6402 ifnet_lookup(struct ifnet
*ifp
)
6406 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
6407 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
6412 return _ifp
!= NULL
;
6416 * Caller has to pass a non-zero refio argument to get a
6417 * IO reference count. This will prevent ifnet_detach from
6418 * being called when there are outstanding io reference counts.
6421 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
6425 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6426 if ((ret
= IF_FULLY_ATTACHED(ifp
))) {
6431 lck_mtx_unlock(&ifp
->if_ref_lock
);
6437 ifnet_incr_pending_thread_count(struct ifnet
*ifp
)
6439 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6440 ifp
->if_threads_pending
++;
6441 lck_mtx_unlock(&ifp
->if_ref_lock
);
6445 ifnet_decr_pending_thread_count(struct ifnet
*ifp
)
6447 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6448 VERIFY(ifp
->if_threads_pending
> 0);
6449 ifp
->if_threads_pending
--;
6450 if (ifp
->if_threads_pending
== 0) {
6451 wakeup(&ifp
->if_threads_pending
);
6453 lck_mtx_unlock(&ifp
->if_ref_lock
);
6457 * Caller must ensure the interface is attached; the assumption is that
6458 * there is at least an outstanding IO reference count held already.
6459 * Most callers would call ifnet_is_{attached,data_ready}() instead.
6462 ifnet_incr_iorefcnt(struct ifnet
*ifp
)
6464 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6465 VERIFY(IF_FULLY_ATTACHED(ifp
));
6466 VERIFY(ifp
->if_refio
> 0);
6468 lck_mtx_unlock(&ifp
->if_ref_lock
);
6471 __attribute__((always_inline
))
6473 ifnet_decr_iorefcnt_locked(struct ifnet
*ifp
)
6475 LCK_MTX_ASSERT(&ifp
->if_ref_lock
, LCK_MTX_ASSERT_OWNED
);
6477 VERIFY(ifp
->if_refio
> 0);
6478 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
6481 VERIFY(ifp
->if_refio
!= 0 || ifp
->if_datamov
== 0);
6484 * if there are no more outstanding io references, wakeup the
6485 * ifnet_detach thread if detaching flag is set.
6487 if (ifp
->if_refio
== 0 && (ifp
->if_refflags
& IFRF_DETACHING
)) {
6488 wakeup(&(ifp
->if_refio
));
6493 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
6495 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6496 ifnet_decr_iorefcnt_locked(ifp
);
6497 lck_mtx_unlock(&ifp
->if_ref_lock
);
6501 ifnet_datamov_begin(struct ifnet
*ifp
)
6505 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6506 if ((ret
= IF_FULLY_ATTACHED_AND_READY(ifp
))) {
6510 lck_mtx_unlock(&ifp
->if_ref_lock
);
6516 ifnet_datamov_end(struct ifnet
*ifp
)
6518 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6519 VERIFY(ifp
->if_datamov
> 0);
6521 * if there's no more thread moving data, wakeup any
6522 * drainers that's blocked waiting for this.
6524 if (--ifp
->if_datamov
== 0 && ifp
->if_drainers
> 0) {
6525 wakeup(&(ifp
->if_datamov
));
6527 ifnet_decr_iorefcnt_locked(ifp
);
6528 lck_mtx_unlock(&ifp
->if_ref_lock
);
6532 ifnet_datamov_suspend(struct ifnet
*ifp
)
6534 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6535 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
6537 if (ifp
->if_suspend
++ == 0) {
6538 VERIFY(ifp
->if_refflags
& IFRF_READY
);
6539 ifp
->if_refflags
&= ~IFRF_READY
;
6541 lck_mtx_unlock(&ifp
->if_ref_lock
);
6545 ifnet_datamov_drain(struct ifnet
*ifp
)
6547 lck_mtx_lock(&ifp
->if_ref_lock
);
6548 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
6549 /* data movement must already be suspended */
6550 VERIFY(ifp
->if_suspend
> 0);
6551 VERIFY(!(ifp
->if_refflags
& IFRF_READY
));
6553 while (ifp
->if_datamov
!= 0) {
6554 (void) msleep(&(ifp
->if_datamov
), &ifp
->if_ref_lock
,
6555 (PZERO
- 1), __func__
, NULL
);
6557 VERIFY(!(ifp
->if_refflags
& IFRF_READY
));
6558 VERIFY(ifp
->if_drainers
> 0);
6560 lck_mtx_unlock(&ifp
->if_ref_lock
);
6562 /* purge the interface queues */
6563 if ((ifp
->if_eflags
& IFEF_TXSTART
) != 0) {
6569 ifnet_datamov_resume(struct ifnet
*ifp
)
6571 lck_mtx_lock(&ifp
->if_ref_lock
);
6572 /* data movement must already be suspended */
6573 VERIFY(ifp
->if_suspend
> 0);
6574 if (--ifp
->if_suspend
== 0) {
6575 VERIFY(!(ifp
->if_refflags
& IFRF_READY
));
6576 ifp
->if_refflags
|= IFRF_READY
;
6578 ifnet_decr_iorefcnt_locked(ifp
);
6579 lck_mtx_unlock(&ifp
->if_ref_lock
);
6583 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
6585 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
6590 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
6591 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
6596 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
6597 tr
= dl_if_dbg
->dldbg_if_refhold
;
6599 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
6600 tr
= dl_if_dbg
->dldbg_if_refrele
;
6603 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
6604 ctrace_record(&tr
[idx
]);
6608 dlil_if_ref(struct ifnet
*ifp
)
6610 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6612 if (dl_if
== NULL
) {
6616 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
6617 ++dl_if
->dl_if_refcnt
;
6618 if (dl_if
->dl_if_refcnt
== 0) {
6619 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
6622 if (dl_if
->dl_if_trace
!= NULL
) {
6623 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
6625 lck_mtx_unlock(&dl_if
->dl_if_lock
);
6631 dlil_if_free(struct ifnet
*ifp
)
6633 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6634 bool need_release
= FALSE
;
6636 if (dl_if
== NULL
) {
6640 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
6641 switch (dl_if
->dl_if_refcnt
) {
6643 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
6647 if ((ifp
->if_refflags
& IFRF_EMBRYONIC
) != 0) {
6648 need_release
= TRUE
;
6654 --dl_if
->dl_if_refcnt
;
6655 if (dl_if
->dl_if_trace
!= NULL
) {
6656 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
6658 lck_mtx_unlock(&dl_if
->dl_if_lock
);
6660 dlil_if_release(ifp
);
6666 dlil_attach_protocol_internal(struct if_proto
*proto
,
6667 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
,
6668 uint32_t * proto_count
)
6670 struct kev_dl_proto_data ev_pr_data
;
6671 struct ifnet
*ifp
= proto
->ifp
;
6673 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
6674 struct if_proto
*prev_proto
;
6675 struct if_proto
*_proto
;
6677 /* callee holds a proto refcnt upon success */
6678 ifnet_lock_exclusive(ifp
);
6679 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
6680 if (_proto
!= NULL
) {
6681 ifnet_lock_done(ifp
);
6682 if_proto_free(_proto
);
6687 * Call family module add_proto routine so it can refine the
6688 * demux descriptors as it wishes.
6690 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
6693 ifnet_lock_done(ifp
);
6698 * Insert the protocol in the hash
6700 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
6701 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
) {
6702 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
6705 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
6707 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
6711 /* hold a proto refcnt for attach */
6712 if_proto_ref(proto
);
6715 * The reserved field carries the number of protocol still attached
6716 * (subject to change)
6718 ev_pr_data
.proto_family
= proto
->protocol_family
;
6719 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
6721 ifnet_lock_done(ifp
);
6723 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
6724 (struct net_event_data
*)&ev_pr_data
,
6725 sizeof(struct kev_dl_proto_data
));
6726 if (proto_count
!= NULL
) {
6727 *proto_count
= ev_pr_data
.proto_remaining_count
;
6733 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
6734 const struct ifnet_attach_proto_param
*proto_details
)
6737 struct if_proto
*ifproto
= NULL
;
6738 uint32_t proto_count
= 0;
6740 ifnet_head_lock_shared();
6741 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
6745 /* Check that the interface is in the global list */
6746 if (!ifnet_lookup(ifp
)) {
6751 ifproto
= zalloc_flags(dlif_proto_zone
, Z_WAITOK
| Z_ZERO
);
6752 if (ifproto
== NULL
) {
6757 /* refcnt held above during lookup */
6759 ifproto
->protocol_family
= protocol
;
6760 ifproto
->proto_kpi
= kProtoKPI_v1
;
6761 ifproto
->kpi
.v1
.input
= proto_details
->input
;
6762 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
6763 ifproto
->kpi
.v1
.event
= proto_details
->event
;
6764 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
6765 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
6766 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
6767 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
6769 retval
= dlil_attach_protocol_internal(ifproto
,
6770 proto_details
->demux_list
, proto_details
->demux_count
,
6774 if (retval
!= 0 && retval
!= EEXIST
) {
6775 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
6776 ifp
!= NULL
? if_name(ifp
) : "N/A", protocol
, retval
);
6779 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
6780 ifp
!= NULL
? if_name(ifp
) : "N/A",
6781 protocol
, proto_count
);
6787 * A protocol has been attached, mark the interface up.
6788 * This used to be done by configd.KernelEventMonitor, but that
6789 * is inherently prone to races (rdar://problem/30810208).
6791 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
6792 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
6793 dlil_post_sifflags_msg(ifp
);
6794 } else if (ifproto
!= NULL
) {
6795 zfree(dlif_proto_zone
, ifproto
);
6801 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
6802 const struct ifnet_attach_proto_param_v2
*proto_details
)
6805 struct if_proto
*ifproto
= NULL
;
6806 uint32_t proto_count
= 0;
6808 ifnet_head_lock_shared();
6809 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
6813 /* Check that the interface is in the global list */
6814 if (!ifnet_lookup(ifp
)) {
6819 ifproto
= zalloc(dlif_proto_zone
);
6820 if (ifproto
== NULL
) {
6824 bzero(ifproto
, sizeof(*ifproto
));
6826 /* refcnt held above during lookup */
6828 ifproto
->protocol_family
= protocol
;
6829 ifproto
->proto_kpi
= kProtoKPI_v2
;
6830 ifproto
->kpi
.v2
.input
= proto_details
->input
;
6831 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
6832 ifproto
->kpi
.v2
.event
= proto_details
->event
;
6833 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
6834 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
6835 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
6836 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
6838 retval
= dlil_attach_protocol_internal(ifproto
,
6839 proto_details
->demux_list
, proto_details
->demux_count
,
6843 if (retval
!= 0 && retval
!= EEXIST
) {
6844 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
6845 ifp
!= NULL
? if_name(ifp
) : "N/A", protocol
, retval
);
6848 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
6849 ifp
!= NULL
? if_name(ifp
) : "N/A",
6850 protocol
, proto_count
);
6856 * A protocol has been attached, mark the interface up.
6857 * This used to be done by configd.KernelEventMonitor, but that
6858 * is inherently prone to races (rdar://problem/30810208).
6860 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
6861 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
6862 dlil_post_sifflags_msg(ifp
);
6863 } else if (ifproto
!= NULL
) {
6864 zfree(dlif_proto_zone
, ifproto
);
6870 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
6872 struct if_proto
*proto
= NULL
;
6875 if (ifp
== NULL
|| proto_family
== 0) {
6880 ifnet_lock_exclusive(ifp
);
6881 /* callee holds a proto refcnt upon success */
6882 proto
= find_attached_proto(ifp
, proto_family
);
6883 if (proto
== NULL
) {
6885 ifnet_lock_done(ifp
);
6889 /* call family module del_proto */
6890 if (ifp
->if_del_proto
) {
6891 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
6894 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
6895 proto
, if_proto
, next_hash
);
6897 if (proto
->proto_kpi
== kProtoKPI_v1
) {
6898 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
6899 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
6900 proto
->kpi
.v1
.event
= ifproto_media_event
;
6901 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
6902 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
6903 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
6905 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
6906 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
6907 proto
->kpi
.v2
.event
= ifproto_media_event
;
6908 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
6909 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
6910 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
6912 proto
->detached
= 1;
6913 ifnet_lock_done(ifp
);
6916 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp
),
6917 (proto
->proto_kpi
== kProtoKPI_v1
) ?
6918 "v1" : "v2", proto_family
);
6921 /* release proto refcnt held during protocol attach */
6922 if_proto_free(proto
);
6925 * Release proto refcnt held during lookup; the rest of
6926 * protocol detach steps will happen when the last proto
6927 * reference is released.
6929 if_proto_free(proto
);
6937 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
6938 struct mbuf
*packet
, char *header
)
6940 #pragma unused(ifp, protocol, packet, header)
6945 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
6946 struct mbuf
*packet
)
6948 #pragma unused(ifp, protocol, packet)
6953 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
6954 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
6955 char *link_layer_dest
)
6957 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
6962 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
6963 const struct kev_msg
*event
)
6965 #pragma unused(ifp, protocol, event)
6969 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
6970 unsigned long command
, void *argument
)
6972 #pragma unused(ifp, protocol, command, argument)
6977 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
6978 struct sockaddr_dl
*out_ll
, size_t ll_len
)
6980 #pragma unused(ifp, proto_addr, out_ll, ll_len)
6985 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
6986 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
6987 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
6989 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
6993 extern int if_next_index(void);
6994 extern int tcp_ecn_outbound
;
6997 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
6999 struct ifnet
*tmp_if
;
7001 struct if_data_internal if_data_saved
;
7002 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
7003 struct dlil_threading_info
*dl_inp
;
7004 thread_continue_t thfunc
= NULL
;
7005 u_int32_t sflags
= 0;
7013 * Serialize ifnet attach using dlil_ifnet_lock, in order to
7014 * prevent the interface from being configured while it is
7015 * embryonic, as ifnet_head_lock is dropped and reacquired
7016 * below prior to marking the ifnet with IFRF_ATTACHED.
7019 ifnet_head_lock_exclusive();
7020 /* Verify we aren't already on the list */
7021 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
7022 if (tmp_if
== ifp
) {
7029 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7030 if (!(ifp
->if_refflags
& IFRF_EMBRYONIC
)) {
7031 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
7035 lck_mtx_unlock(&ifp
->if_ref_lock
);
7037 ifnet_lock_exclusive(ifp
);
7040 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
7041 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
7042 VERIFY(ifp
->if_threads_pending
== 0);
7044 if (ll_addr
!= NULL
) {
7045 if (ifp
->if_addrlen
== 0) {
7046 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
7047 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
7048 ifnet_lock_done(ifp
);
7056 * Allow interfaces without protocol families to attach
7057 * only if they have the necessary fields filled out.
7059 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
7060 DLIL_PRINTF("%s: Attempt to attach interface without "
7061 "family module - %d\n", __func__
, ifp
->if_family
);
7062 ifnet_lock_done(ifp
);
7068 /* Allocate protocol hash table */
7069 VERIFY(ifp
->if_proto_hash
== NULL
);
7070 ifp
->if_proto_hash
= zalloc_flags(dlif_phash_zone
, Z_WAITOK
| Z_ZERO
);
7071 if (ifp
->if_proto_hash
== NULL
) {
7072 ifnet_lock_done(ifp
);
7078 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
7079 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
7080 TAILQ_INIT(&ifp
->if_flt_head
);
7081 VERIFY(ifp
->if_flt_busy
== 0);
7082 VERIFY(ifp
->if_flt_waiters
== 0);
7083 lck_mtx_unlock(&ifp
->if_flt_lock
);
7085 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
7086 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
7087 LIST_INIT(&ifp
->if_multiaddrs
);
7090 VERIFY(ifp
->if_allhostsinm
== NULL
);
7091 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
7092 TAILQ_INIT(&ifp
->if_addrhead
);
7094 if (ifp
->if_index
== 0) {
7095 int idx
= if_next_index();
7099 ifnet_lock_done(ifp
);
7104 ifp
->if_index
= (uint16_t)idx
;
7106 /* the lladdr passed at attach time is the permanent address */
7107 if (ll_addr
!= NULL
&& ifp
->if_type
== IFT_ETHER
&&
7108 ll_addr
->sdl_alen
== ETHER_ADDR_LEN
) {
7109 bcopy(CONST_LLADDR(ll_addr
),
7110 dl_if
->dl_if_permanent_ether
,
7112 dl_if
->dl_if_permanent_ether_is_set
= 1;
7115 /* There should not be anything occupying this slot */
7116 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
7118 /* allocate (if needed) and initialize a link address */
7119 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
7121 ifnet_lock_done(ifp
);
7127 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
7128 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
7130 /* make this address the first on the list */
7132 /* hold a reference for ifnet_addrs[] */
7133 IFA_ADDREF_LOCKED(ifa
);
7134 /* if_attach_link_ifa() holds a reference for ifa_link */
7135 if_attach_link_ifa(ifp
, ifa
);
7138 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
7139 ifindex2ifnet
[ifp
->if_index
] = ifp
;
7141 /* Hold a reference to the underlying dlil_ifnet */
7142 ifnet_reference(ifp
);
7144 /* Clear stats (save and restore other fields that we care) */
7145 if_data_saved
= ifp
->if_data
;
7146 bzero(&ifp
->if_data
, sizeof(ifp
->if_data
));
7147 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
7148 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
7149 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
7150 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
7151 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
7152 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
7153 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
7154 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
7155 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
7156 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
7157 ifnet_touch_lastchange(ifp
);
7159 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
7160 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
||
7161 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_FQ_CODEL
);
7163 /* By default, use SFB and enable flow advisory */
7164 sflags
= PKTSCHEDF_QALG_SFB
;
7166 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
7169 if (if_delaybased_queue
) {
7170 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
7173 if (ifp
->if_output_sched_model
==
7174 IFNET_SCHED_MODEL_DRIVER_MANAGED
) {
7175 sflags
|= PKTSCHEDF_QALG_DRIVER_MANAGED
;
7178 /* Initialize transmit queue(s) */
7179 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
7181 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
7182 "err=%d", __func__
, ifp
, err
);
7186 /* Sanity checks on the input thread storage */
7187 dl_inp
= &dl_if
->dl_if_inpstorage
;
7188 bzero(&dl_inp
->dlth_stats
, sizeof(dl_inp
->dlth_stats
));
7189 VERIFY(dl_inp
->dlth_flags
== 0);
7190 VERIFY(dl_inp
->dlth_wtot
== 0);
7191 VERIFY(dl_inp
->dlth_ifp
== NULL
);
7192 VERIFY(qhead(&dl_inp
->dlth_pkts
) == NULL
&& qempty(&dl_inp
->dlth_pkts
));
7193 VERIFY(qlimit(&dl_inp
->dlth_pkts
) == 0);
7194 VERIFY(!dl_inp
->dlth_affinity
);
7195 VERIFY(ifp
->if_inp
== NULL
);
7196 VERIFY(dl_inp
->dlth_thread
== THREAD_NULL
);
7197 VERIFY(dl_inp
->dlth_strategy
== NULL
);
7198 VERIFY(dl_inp
->dlth_driver_thread
== THREAD_NULL
);
7199 VERIFY(dl_inp
->dlth_poller_thread
== THREAD_NULL
);
7200 VERIFY(dl_inp
->dlth_affinity_tag
== 0);
7202 #if IFNET_INPUT_SANITY_CHK
7203 VERIFY(dl_inp
->dlth_pkts_cnt
== 0);
7204 #endif /* IFNET_INPUT_SANITY_CHK */
7206 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
7207 dlil_reset_rxpoll_params(ifp
);
7209 * A specific DLIL input thread is created per non-loopback interface.
7211 if (ifp
->if_family
!= IFNET_FAMILY_LOOPBACK
) {
7212 ifp
->if_inp
= dl_inp
;
7213 ifnet_incr_pending_thread_count(ifp
);
7214 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
, &thfunc
);
7215 if (err
== ENODEV
) {
7216 VERIFY(thfunc
== NULL
);
7217 ifnet_decr_pending_thread_count(ifp
);
7218 } else if (err
!= 0) {
7219 panic_plain("%s: ifp=%p couldn't get an input thread; "
7220 "err=%d", __func__
, ifp
, err
);
7225 * If the driver supports the new transmit model, calculate flow hash
7226 * and create a workloop starter thread to invoke the if_start callback
7227 * where the packets may be dequeued and transmitted.
7229 if (ifp
->if_eflags
& IFEF_TXSTART
) {
7230 thread_precedence_policy_data_t info
;
7231 __unused kern_return_t kret
;
7233 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
7234 VERIFY(ifp
->if_flowhash
!= 0);
7235 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
7237 ifnet_set_start_cycle(ifp
, NULL
);
7238 ifp
->if_start_active
= 0;
7239 ifp
->if_start_req
= 0;
7240 ifp
->if_start_flags
= 0;
7241 VERIFY(ifp
->if_start
!= NULL
);
7242 ifnet_incr_pending_thread_count(ifp
);
7243 if ((err
= kernel_thread_start(ifnet_start_thread_func
,
7244 ifp
, &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
7246 "ifp=%p couldn't get a start thread; "
7247 "err=%d", __func__
, ifp
, err
);
7250 bzero(&info
, sizeof(info
));
7251 info
.importance
= 1;
7252 kret
= thread_policy_set(ifp
->if_start_thread
,
7253 THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&info
,
7254 THREAD_PRECEDENCE_POLICY_COUNT
);
7255 ASSERT(kret
== KERN_SUCCESS
);
7257 ifp
->if_flowhash
= 0;
7260 /* Reset polling parameters */
7261 ifnet_set_poll_cycle(ifp
, NULL
);
7262 ifp
->if_poll_update
= 0;
7263 ifp
->if_poll_flags
= 0;
7264 ifp
->if_poll_req
= 0;
7265 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
7268 * If the driver supports the new receive model, create a poller
7269 * thread to invoke if_input_poll callback where the packets may
7270 * be dequeued from the driver and processed for reception.
7271 * if the interface is netif compat then the poller thread is
7274 if (thfunc
== dlil_rxpoll_input_thread_func
) {
7275 thread_precedence_policy_data_t info
;
7276 __unused kern_return_t kret
;
7277 VERIFY(ifp
->if_input_poll
!= NULL
);
7278 VERIFY(ifp
->if_input_ctl
!= NULL
);
7279 ifnet_incr_pending_thread_count(ifp
);
7280 if ((err
= kernel_thread_start(ifnet_poll_thread_func
, ifp
,
7281 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
7282 panic_plain("%s: ifp=%p couldn't get a poll thread; "
7283 "err=%d", __func__
, ifp
, err
);
7286 bzero(&info
, sizeof(info
));
7287 info
.importance
= 1;
7288 kret
= thread_policy_set(ifp
->if_poll_thread
,
7289 THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&info
,
7290 THREAD_PRECEDENCE_POLICY_COUNT
);
7291 ASSERT(kret
== KERN_SUCCESS
);
7294 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
7295 VERIFY(ifp
->if_desc
.ifd_len
== 0);
7296 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
7298 /* Record attach PC stacktrace */
7299 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
7301 ifp
->if_updatemcasts
= 0;
7302 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
7303 struct ifmultiaddr
*ifma
;
7304 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
7306 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
7307 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
) {
7308 ifp
->if_updatemcasts
++;
7313 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
7314 "membership(s)\n", if_name(ifp
),
7315 ifp
->if_updatemcasts
);
7318 /* Clear logging parameters */
7319 bzero(&ifp
->if_log
, sizeof(ifp
->if_log
));
7321 /* Clear foreground/realtime activity timestamps */
7322 ifp
->if_fg_sendts
= 0;
7323 ifp
->if_rt_sendts
= 0;
7325 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
7326 VERIFY(ifp
->if_delegated
.type
== 0);
7327 VERIFY(ifp
->if_delegated
.family
== 0);
7328 VERIFY(ifp
->if_delegated
.subfamily
== 0);
7329 VERIFY(ifp
->if_delegated
.expensive
== 0);
7330 VERIFY(ifp
->if_delegated
.constrained
== 0);
7332 VERIFY(ifp
->if_agentids
== NULL
);
7333 VERIFY(ifp
->if_agentcount
== 0);
7335 /* Reset interface state */
7336 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
7337 ifp
->if_interface_state
.valid_bitmask
|=
7338 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7339 ifp
->if_interface_state
.interface_availability
=
7340 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
;
7342 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
7343 if (ifp
== lo_ifp
) {
7344 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_GOOD
;
7345 ifp
->if_interface_state
.valid_bitmask
|=
7346 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7348 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_UNKNOWN
;
7352 * Enable ECN capability on this interface depending on the
7353 * value of ECN global setting
7355 if (tcp_ecn_outbound
== 2 && !IFNET_IS_CELLULAR(ifp
)) {
7356 if_set_eflags(ifp
, IFEF_ECN_ENABLE
);
7357 if_clear_eflags(ifp
, IFEF_ECN_DISABLE
);
7361 * Built-in Cyclops always on policy for WiFi infra
7363 if (IFNET_IS_WIFI_INFRA(ifp
) && net_qos_policy_wifi_enabled
!= 0) {
7366 error
= if_set_qosmarking_mode(ifp
,
7367 IFRTYPE_QOSMARKING_FASTLANE
);
7369 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
7370 __func__
, ifp
->if_xname
, error
);
7372 if_set_eflags(ifp
, IFEF_QOSMARKING_ENABLED
);
7373 #if (DEVELOPMENT || DEBUG)
7374 DLIL_PRINTF("%s fastlane enabled on %s\n",
7375 __func__
, ifp
->if_xname
);
7376 #endif /* (DEVELOPMENT || DEBUG) */
7380 ifnet_lock_done(ifp
);
7384 lck_mtx_lock(&ifp
->if_cached_route_lock
);
7385 /* Enable forwarding cached route */
7386 ifp
->if_fwd_cacheok
= 1;
7387 /* Clean up any existing cached routes */
7388 ROUTE_RELEASE(&ifp
->if_fwd_route
);
7389 bzero(&ifp
->if_fwd_route
, sizeof(ifp
->if_fwd_route
));
7390 ROUTE_RELEASE(&ifp
->if_src_route
);
7391 bzero(&ifp
->if_src_route
, sizeof(ifp
->if_src_route
));
7392 ROUTE_RELEASE(&ifp
->if_src_route6
);
7393 bzero(&ifp
->if_src_route6
, sizeof(ifp
->if_src_route6
));
7394 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7396 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
7399 * Allocate and attach IGMPv3/MLDv2 interface specific variables
7400 * and trees; do this before the ifnet is marked as attached.
7401 * The ifnet keeps the reference to the info structures even after
7402 * the ifnet is detached, since the network-layer records still
7403 * refer to the info structures even after that. This also
7404 * makes it possible for them to still function after the ifnet
7405 * is recycled or reattached.
7408 if (IGMP_IFINFO(ifp
) == NULL
) {
7409 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, Z_WAITOK
);
7410 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
7412 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
7413 igmp_domifreattach(IGMP_IFINFO(ifp
));
7416 if (MLD_IFINFO(ifp
) == NULL
) {
7417 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, Z_WAITOK
);
7418 VERIFY(MLD_IFINFO(ifp
) != NULL
);
7420 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
7421 mld_domifreattach(MLD_IFINFO(ifp
));
7424 VERIFY(ifp
->if_data_threshold
== 0);
7425 VERIFY(ifp
->if_dt_tcall
!= NULL
);
7428 * Wait for the created kernel threads for I/O to get
7429 * scheduled and run at least once before we proceed
7430 * to mark interface as attached.
7432 lck_mtx_lock(&ifp
->if_ref_lock
);
7433 while (ifp
->if_threads_pending
!= 0) {
7434 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
7435 "interface %s to get scheduled at least once.\n",
7436 __func__
, ifp
->if_xname
);
7437 (void) msleep(&ifp
->if_threads_pending
, &ifp
->if_ref_lock
, (PZERO
- 1),
7439 LCK_MTX_ASSERT(&ifp
->if_ref_lock
, LCK_ASSERT_OWNED
);
7441 lck_mtx_unlock(&ifp
->if_ref_lock
);
7442 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
7443 "at least once. Proceeding.\n", __func__
, ifp
->if_xname
);
7445 /* Final mark this ifnet as attached. */
7446 lck_mtx_lock(rnh_lock
);
7447 ifnet_lock_exclusive(ifp
);
7448 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7449 ifp
->if_refflags
= (IFRF_ATTACHED
| IFRF_READY
); /* clears embryonic */
7450 lck_mtx_unlock(&ifp
->if_ref_lock
);
7452 /* boot-args override; enable idle notification */
7453 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
7456 /* apply previous request(s) to set the idle flags, if any */
7457 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
7458 ifp
->if_idle_new_flags_mask
);
7460 ifnet_lock_done(ifp
);
7461 lck_mtx_unlock(rnh_lock
);
7466 * Attach packet filter to this interface, if enabled.
7468 pf_ifnet_hook(ifp
, 1);
7471 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
7474 DLIL_PRINTF("%s: attached%s\n", if_name(ifp
),
7475 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
7482 * Prepare the storage for the first/permanent link address, which must
7483 * must have the same lifetime as the ifnet itself. Although the link
7484 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
7485 * its location in memory must never change as it may still be referred
7486 * to by some parts of the system afterwards (unfortunate implementation
7487 * artifacts inherited from BSD.)
7489 * Caller must hold ifnet lock as writer.
7491 static struct ifaddr
*
7492 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
7494 struct ifaddr
*ifa
, *oifa
;
7495 struct sockaddr_dl
*asdl
, *msdl
;
7496 char workbuf
[IFNAMSIZ
* 2];
7497 int namelen
, masklen
, socksize
;
7498 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
7500 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
7501 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
7503 namelen
= scnprintf(workbuf
, sizeof(workbuf
), "%s",
7505 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0])
7506 + ((namelen
> 0) ? namelen
: 0);
7507 socksize
= masklen
+ ifp
->if_addrlen
;
7508 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
7509 if ((u_int32_t
)socksize
< sizeof(struct sockaddr_dl
)) {
7510 socksize
= sizeof(struct sockaddr_dl
);
7512 socksize
= ROUNDUP(socksize
);
7515 ifa
= ifp
->if_lladdr
;
7516 if (socksize
> DLIL_SDLMAXLEN
||
7517 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
7519 * Rare, but in the event that the link address requires
7520 * more storage space than DLIL_SDLMAXLEN, allocate the
7521 * largest possible storages for address and mask, such
7522 * that we can reuse the same space when if_addrlen grows.
7523 * This same space will be used when if_addrlen shrinks.
7525 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
7526 int ifasize
= sizeof(*ifa
) + 2 * SOCK_MAXADDRLEN
;
7527 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
7532 /* Don't set IFD_ALLOC, as this is permanent */
7533 ifa
->ifa_debug
= IFD_LINK
;
7536 /* address and mask sockaddr_dl locations */
7537 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
7538 bzero(asdl
, SOCK_MAXADDRLEN
);
7539 msdl
= (struct sockaddr_dl
*)(void *)
7540 ((char *)asdl
+ SOCK_MAXADDRLEN
);
7541 bzero(msdl
, SOCK_MAXADDRLEN
);
7543 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
7545 * Use the storage areas for address and mask within the
7546 * dlil_ifnet structure. This is the most common case.
7549 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
7551 /* Don't set IFD_ALLOC, as this is permanent */
7552 ifa
->ifa_debug
= IFD_LINK
;
7555 /* address and mask sockaddr_dl locations */
7556 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
7557 bzero(asdl
, sizeof(dl_if
->dl_if_lladdr
.asdl
));
7558 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
7559 bzero(msdl
, sizeof(dl_if
->dl_if_lladdr
.msdl
));
7562 /* hold a permanent reference for the ifnet itself */
7563 IFA_ADDREF_LOCKED(ifa
);
7564 oifa
= ifp
->if_lladdr
;
7565 ifp
->if_lladdr
= ifa
;
7567 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
7569 ifa
->ifa_rtrequest
= link_rtrequest
;
7570 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
7571 asdl
->sdl_len
= (u_char
)socksize
;
7572 asdl
->sdl_family
= AF_LINK
;
7574 bcopy(workbuf
, asdl
->sdl_data
, min(namelen
,
7575 sizeof(asdl
->sdl_data
)));
7576 asdl
->sdl_nlen
= (u_char
)namelen
;
7580 asdl
->sdl_index
= ifp
->if_index
;
7581 asdl
->sdl_type
= ifp
->if_type
;
7582 if (ll_addr
!= NULL
) {
7583 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
7584 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
7588 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
7589 msdl
->sdl_len
= (u_char
)masklen
;
7590 while (namelen
> 0) {
7591 msdl
->sdl_data
[--namelen
] = 0xff;
7603 if_purgeaddrs(struct ifnet
*ifp
)
7608 in6_purgeaddrs(ifp
);
7612 ifnet_detach(ifnet_t ifp
)
7614 struct ifnet
*delegated_ifp
;
7615 struct nd_ifinfo
*ndi
= NULL
;
7621 ndi
= ND_IFINFO(ifp
);
7623 ndi
->cga_initialized
= FALSE
;
7626 lck_mtx_lock(rnh_lock
);
7627 ifnet_head_lock_exclusive();
7628 ifnet_lock_exclusive(ifp
);
7630 if (ifp
->if_output_netem
!= NULL
) {
7631 netem_destroy(ifp
->if_output_netem
);
7632 ifp
->if_output_netem
= NULL
;
7636 * Check to see if this interface has previously triggered
7637 * aggressive protocol draining; if so, decrement the global
7638 * refcnt and clear PR_AGGDRAIN on the route domain if
7639 * there are no more of such an interface around.
7641 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
7643 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7644 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
7645 lck_mtx_unlock(&ifp
->if_ref_lock
);
7646 ifnet_lock_done(ifp
);
7648 lck_mtx_unlock(rnh_lock
);
7650 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
7651 /* Interface has already been detached */
7652 lck_mtx_unlock(&ifp
->if_ref_lock
);
7653 ifnet_lock_done(ifp
);
7655 lck_mtx_unlock(rnh_lock
);
7658 VERIFY(!(ifp
->if_refflags
& IFRF_EMBRYONIC
));
7659 /* Indicate this interface is being detached */
7660 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
7661 ifp
->if_refflags
|= IFRF_DETACHING
;
7662 lck_mtx_unlock(&ifp
->if_ref_lock
);
7665 DLIL_PRINTF("%s: detaching\n", if_name(ifp
));
7668 /* clean up flow control entry object if there's any */
7669 if (ifp
->if_eflags
& IFEF_TXSTART
) {
7670 ifnet_flowadv(ifp
->if_flowhash
);
7673 /* Reset ECN enable/disable flags */
7674 /* Reset CLAT46 flag */
7675 if_clear_eflags(ifp
, IFEF_ECN_ENABLE
| IFEF_ECN_DISABLE
| IFEF_CLAT46
);
7678 * We do not reset the TCP keep alive counters in case
7679 * a TCP connection stays connection after the interface
7682 if (ifp
->if_tcp_kao_cnt
> 0) {
7683 os_log(OS_LOG_DEFAULT
, "%s %s tcp_kao_cnt %u not zero",
7684 __func__
, if_name(ifp
), ifp
->if_tcp_kao_cnt
);
7686 ifp
->if_tcp_kao_max
= 0;
7689 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
7690 * no longer be visible during lookups from this point.
7692 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
7693 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
7694 ifp
->if_link
.tqe_next
= NULL
;
7695 ifp
->if_link
.tqe_prev
= NULL
;
7696 if (ifp
->if_ordered_link
.tqe_next
!= NULL
||
7697 ifp
->if_ordered_link
.tqe_prev
!= NULL
) {
7698 ifnet_remove_from_ordered_list(ifp
);
7700 ifindex2ifnet
[ifp
->if_index
] = NULL
;
7702 /* 18717626 - reset router mode */
7703 if_clear_eflags(ifp
, IFEF_IPV4_ROUTER
);
7704 ifp
->if_ipv6_router_mode
= IPV6_ROUTER_MODE_DISABLED
;
7706 /* Record detach PC stacktrace */
7707 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
7709 /* Clear logging parameters */
7710 bzero(&ifp
->if_log
, sizeof(ifp
->if_log
));
7712 /* Clear delegated interface info (reference released below) */
7713 delegated_ifp
= ifp
->if_delegated
.ifp
;
7714 bzero(&ifp
->if_delegated
, sizeof(ifp
->if_delegated
));
7716 /* Reset interface state */
7717 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
7719 ifnet_lock_done(ifp
);
7721 lck_mtx_unlock(rnh_lock
);
7724 /* Release reference held on the delegated interface */
7725 if (delegated_ifp
!= NULL
) {
7726 ifnet_release(delegated_ifp
);
7729 /* Reset Link Quality Metric (unless loopback [lo0]) */
7730 if (ifp
!= lo_ifp
) {
7731 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
, 0);
7734 /* Reset TCP local statistics */
7735 if (ifp
->if_tcp_stat
!= NULL
) {
7736 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
7739 /* Reset UDP local statistics */
7740 if (ifp
->if_udp_stat
!= NULL
) {
7741 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
7744 /* Reset ifnet IPv4 stats */
7745 if (ifp
->if_ipv4_stat
!= NULL
) {
7746 bzero(ifp
->if_ipv4_stat
, sizeof(*ifp
->if_ipv4_stat
));
7749 /* Reset ifnet IPv6 stats */
7750 if (ifp
->if_ipv6_stat
!= NULL
) {
7751 bzero(ifp
->if_ipv6_stat
, sizeof(*ifp
->if_ipv6_stat
));
7754 /* Release memory held for interface link status report */
7755 if (ifp
->if_link_status
!= NULL
) {
7756 FREE(ifp
->if_link_status
, M_TEMP
);
7757 ifp
->if_link_status
= NULL
;
7760 /* Clear agent IDs */
7761 if (ifp
->if_agentids
!= NULL
) {
7762 FREE(ifp
->if_agentids
, M_NETAGENT
);
7763 ifp
->if_agentids
= NULL
;
7765 ifp
->if_agentcount
= 0;
7768 /* Let BPF know we're detaching */
7771 /* Mark the interface as DOWN */
7774 /* Disable forwarding cached route */
7775 lck_mtx_lock(&ifp
->if_cached_route_lock
);
7776 ifp
->if_fwd_cacheok
= 0;
7777 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7779 /* Disable data threshold and wait for any pending event posting */
7780 ifp
->if_data_threshold
= 0;
7781 VERIFY(ifp
->if_dt_tcall
!= NULL
);
7782 (void) thread_call_cancel_wait(ifp
->if_dt_tcall
);
7785 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
7786 * references to the info structures and leave them attached to
7790 igmp_domifdetach(ifp
);
7792 mld_domifdetach(ifp
);
7794 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
7796 /* Let worker thread take care of the rest, to avoid reentrancy */
7798 ifnet_detaching_enqueue(ifp
);
7805 ifnet_detaching_enqueue(struct ifnet
*ifp
)
7807 dlil_if_lock_assert();
7809 ++ifnet_detaching_cnt
;
7810 VERIFY(ifnet_detaching_cnt
!= 0);
7811 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
7812 wakeup((caddr_t
)&ifnet_delayed_run
);
7815 static struct ifnet
*
7816 ifnet_detaching_dequeue(void)
7820 dlil_if_lock_assert();
7822 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
7823 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
7825 VERIFY(ifnet_detaching_cnt
!= 0);
7826 --ifnet_detaching_cnt
;
7827 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
7828 ifp
->if_detaching_link
.tqe_next
= NULL
;
7829 ifp
->if_detaching_link
.tqe_prev
= NULL
;
7834 __attribute__((noreturn
))
7836 ifnet_detacher_thread_cont(void *v
, wait_result_t wres
)
7838 #pragma unused(v, wres)
7842 if (__improbable(ifnet_detaching_embryonic
)) {
7843 ifnet_detaching_embryonic
= FALSE
;
7844 /* there's no lock ordering constrain so OK to do this here */
7845 dlil_decr_pending_thread_count();
7849 dlil_if_lock_assert();
7851 if (ifnet_detaching_cnt
== 0) {
7855 net_update_uptime();
7857 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
7859 /* Take care of detaching ifnet */
7860 ifp
= ifnet_detaching_dequeue();
7863 ifnet_detach_final(ifp
);
7868 (void) assert_wait(&ifnet_delayed_run
, THREAD_UNINT
);
7870 (void) thread_block(ifnet_detacher_thread_cont
);
7872 VERIFY(0); /* we should never get here */
7874 __builtin_unreachable();
7879 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
7881 #pragma unused(v, w)
7883 (void) assert_wait(&ifnet_delayed_run
, THREAD_UNINT
);
7884 ifnet_detaching_embryonic
= TRUE
;
7885 /* wake up once to get out of embryonic state */
7886 wakeup((caddr_t
)&ifnet_delayed_run
);
7888 (void) thread_block(ifnet_detacher_thread_cont
);
7891 __builtin_unreachable();
7895 ifnet_detach_final(struct ifnet
*ifp
)
7897 struct ifnet_filter
*filter
, *filter_next
;
7898 struct ifnet_filter_head fhead
;
7899 struct dlil_threading_info
*inp
;
7901 ifnet_detached_func if_free
;
7904 lck_mtx_lock(&ifp
->if_ref_lock
);
7905 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
7906 panic("%s: flags mismatch (detaching not set) ifp=%p",
7912 * Wait until the existing IO references get released
7913 * before we proceed with ifnet_detach. This is not a
7914 * common case, so block without using a continuation.
7916 while (ifp
->if_refio
> 0) {
7917 DLIL_PRINTF("%s: Waiting for IO references on %s interface "
7918 "to be released\n", __func__
, if_name(ifp
));
7919 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
7920 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
7923 VERIFY(ifp
->if_datamov
== 0);
7924 VERIFY(ifp
->if_drainers
== 0);
7925 VERIFY(ifp
->if_suspend
== 0);
7926 ifp
->if_refflags
&= ~IFRF_READY
;
7927 lck_mtx_unlock(&ifp
->if_ref_lock
);
7929 /* Drain and destroy send queue */
7930 ifclassq_teardown(ifp
);
7932 /* Detach interface filters */
7933 lck_mtx_lock(&ifp
->if_flt_lock
);
7934 if_flt_monitor_enter(ifp
);
7936 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
7937 fhead
= ifp
->if_flt_head
;
7938 TAILQ_INIT(&ifp
->if_flt_head
);
7940 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
7941 filter_next
= TAILQ_NEXT(filter
, filt_next
);
7942 lck_mtx_unlock(&ifp
->if_flt_lock
);
7944 dlil_detach_filter_internal(filter
, 1);
7945 lck_mtx_lock(&ifp
->if_flt_lock
);
7947 if_flt_monitor_leave(ifp
);
7948 lck_mtx_unlock(&ifp
->if_flt_lock
);
7950 /* Tell upper layers to drop their network addresses */
7953 ifnet_lock_exclusive(ifp
);
7955 /* Unplumb all protocols */
7956 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
7957 struct if_proto
*proto
;
7959 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
7960 while (proto
!= NULL
) {
7961 protocol_family_t family
= proto
->protocol_family
;
7962 ifnet_lock_done(ifp
);
7963 proto_unplumb(family
, ifp
);
7964 ifnet_lock_exclusive(ifp
);
7965 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
7967 /* There should not be any protocols left */
7968 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
7970 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
7971 ifp
->if_proto_hash
= NULL
;
7973 /* Detach (permanent) link address from if_addrhead */
7974 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
7975 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
7977 if_detach_link_ifa(ifp
, ifa
);
7980 /* Remove (permanent) link address from ifnet_addrs[] */
7982 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
7984 /* This interface should not be on {ifnet_head,detaching} */
7985 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
7986 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
7987 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
7988 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
7989 VERIFY(ifp
->if_ordered_link
.tqe_next
== NULL
);
7990 VERIFY(ifp
->if_ordered_link
.tqe_prev
== NULL
);
7992 /* The slot should have been emptied */
7993 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
7995 /* There should not be any addresses left */
7996 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
7999 * Signal the starter thread to terminate itself.
8001 if (ifp
->if_start_thread
!= THREAD_NULL
) {
8002 lck_mtx_lock_spin(&ifp
->if_start_lock
);
8003 ifp
->if_start_flags
= 0;
8004 ifp
->if_start_thread
= THREAD_NULL
;
8005 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
8006 lck_mtx_unlock(&ifp
->if_start_lock
);
8010 * Signal the poller thread to terminate itself.
8012 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
8013 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
8014 ifp
->if_poll_thread
= THREAD_NULL
;
8015 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
8016 lck_mtx_unlock(&ifp
->if_poll_lock
);
8020 * If thread affinity was set for the workloop thread, we will need
8021 * to tear down the affinity and release the extra reference count
8022 * taken at attach time. Does not apply to lo0 or other interfaces
8023 * without dedicated input threads.
8025 if ((inp
= ifp
->if_inp
) != NULL
) {
8026 VERIFY(inp
!= dlil_main_input_thread
);
8028 if (inp
->dlth_affinity
) {
8029 struct thread
*tp
, *wtp
, *ptp
;
8031 lck_mtx_lock_spin(&inp
->dlth_lock
);
8032 wtp
= inp
->dlth_driver_thread
;
8033 inp
->dlth_driver_thread
= THREAD_NULL
;
8034 ptp
= inp
->dlth_poller_thread
;
8035 inp
->dlth_poller_thread
= THREAD_NULL
;
8036 ASSERT(inp
->dlth_thread
!= THREAD_NULL
);
8037 tp
= inp
->dlth_thread
; /* don't nullify now */
8038 inp
->dlth_affinity_tag
= 0;
8039 inp
->dlth_affinity
= FALSE
;
8040 lck_mtx_unlock(&inp
->dlth_lock
);
8042 /* Tear down poll thread affinity */
8044 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
8045 VERIFY(ifp
->if_xflags
& IFXF_LEGACY
);
8046 (void) dlil_affinity_set(ptp
,
8047 THREAD_AFFINITY_TAG_NULL
);
8048 thread_deallocate(ptp
);
8051 /* Tear down workloop thread affinity */
8053 (void) dlil_affinity_set(wtp
,
8054 THREAD_AFFINITY_TAG_NULL
);
8055 thread_deallocate(wtp
);
8058 /* Tear down DLIL input thread affinity */
8059 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
8060 thread_deallocate(tp
);
8063 /* disassociate ifp DLIL input thread */
8066 /* if the worker thread was created, tell it to terminate */
8067 if (inp
->dlth_thread
!= THREAD_NULL
) {
8068 lck_mtx_lock_spin(&inp
->dlth_lock
);
8069 inp
->dlth_flags
|= DLIL_INPUT_TERMINATE
;
8070 if (!(inp
->dlth_flags
& DLIL_INPUT_RUNNING
)) {
8071 wakeup_one((caddr_t
)&inp
->dlth_flags
);
8073 lck_mtx_unlock(&inp
->dlth_lock
);
8074 ifnet_lock_done(ifp
);
8076 /* wait for the input thread to terminate */
8077 lck_mtx_lock_spin(&inp
->dlth_lock
);
8078 while ((inp
->dlth_flags
& DLIL_INPUT_TERMINATE_COMPLETE
)
8080 (void) msleep(&inp
->dlth_flags
, &inp
->dlth_lock
,
8081 (PZERO
- 1) | PSPIN
, inp
->dlth_name
, NULL
);
8083 lck_mtx_unlock(&inp
->dlth_lock
);
8084 ifnet_lock_exclusive(ifp
);
8087 /* clean-up input thread state */
8088 dlil_clean_threading_info(inp
);
8089 /* clean-up poll parameters */
8090 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
8091 dlil_reset_rxpoll_params(ifp
);
8094 /* The driver might unload, so point these to ourselves */
8095 if_free
= ifp
->if_free
;
8096 ifp
->if_output_dlil
= ifp_if_output
;
8097 ifp
->if_output
= ifp_if_output
;
8098 ifp
->if_pre_enqueue
= ifp_if_output
;
8099 ifp
->if_start
= ifp_if_start
;
8100 ifp
->if_output_ctl
= ifp_if_ctl
;
8101 ifp
->if_input_dlil
= ifp_if_input
;
8102 ifp
->if_input_poll
= ifp_if_input_poll
;
8103 ifp
->if_input_ctl
= ifp_if_ctl
;
8104 ifp
->if_ioctl
= ifp_if_ioctl
;
8105 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
8106 ifp
->if_free
= ifp_if_free
;
8107 ifp
->if_demux
= ifp_if_demux
;
8108 ifp
->if_event
= ifp_if_event
;
8109 ifp
->if_framer_legacy
= ifp_if_framer
;
8110 ifp
->if_framer
= ifp_if_framer_extended
;
8111 ifp
->if_add_proto
= ifp_if_add_proto
;
8112 ifp
->if_del_proto
= ifp_if_del_proto
;
8113 ifp
->if_check_multi
= ifp_if_check_multi
;
8115 /* wipe out interface description */
8116 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
8117 ifp
->if_desc
.ifd_len
= 0;
8118 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
8119 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
8121 /* there shouldn't be any delegation by now */
8122 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
8123 VERIFY(ifp
->if_delegated
.type
== 0);
8124 VERIFY(ifp
->if_delegated
.family
== 0);
8125 VERIFY(ifp
->if_delegated
.subfamily
== 0);
8126 VERIFY(ifp
->if_delegated
.expensive
== 0);
8127 VERIFY(ifp
->if_delegated
.constrained
== 0);
8129 /* QoS marking get cleared */
8130 if_clear_eflags(ifp
, IFEF_QOSMARKING_ENABLED
);
8131 if_set_qosmarking_mode(ifp
, IFRTYPE_QOSMARKING_MODE_NONE
);
8134 ifnet_lock_done(ifp
);
8138 * Detach this interface from packet filter, if enabled.
8140 pf_ifnet_hook(ifp
, 0);
8143 /* Filter list should be empty */
8144 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
8145 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
8146 VERIFY(ifp
->if_flt_busy
== 0);
8147 VERIFY(ifp
->if_flt_waiters
== 0);
8148 lck_mtx_unlock(&ifp
->if_flt_lock
);
8150 /* Last chance to drain send queue */
8153 /* Last chance to cleanup any cached route */
8154 lck_mtx_lock(&ifp
->if_cached_route_lock
);
8155 VERIFY(!ifp
->if_fwd_cacheok
);
8156 ROUTE_RELEASE(&ifp
->if_fwd_route
);
8157 bzero(&ifp
->if_fwd_route
, sizeof(ifp
->if_fwd_route
));
8158 ROUTE_RELEASE(&ifp
->if_src_route
);
8159 bzero(&ifp
->if_src_route
, sizeof(ifp
->if_src_route
));
8160 ROUTE_RELEASE(&ifp
->if_src_route6
);
8161 bzero(&ifp
->if_src_route6
, sizeof(ifp
->if_src_route6
));
8162 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8164 VERIFY(ifp
->if_data_threshold
== 0);
8165 VERIFY(ifp
->if_dt_tcall
!= NULL
);
8166 VERIFY(!thread_call_isactive(ifp
->if_dt_tcall
));
8168 ifnet_llreach_ifdetach(ifp
);
8170 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
8173 * Finally, mark this ifnet as detached.
8175 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
8176 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
8177 panic("%s: flags mismatch (detaching not set) ifp=%p",
8181 ifp
->if_refflags
&= ~IFRF_DETACHING
;
8182 lck_mtx_unlock(&ifp
->if_ref_lock
);
8183 if (if_free
!= NULL
) {
8188 DLIL_PRINTF("%s: detached\n", if_name(ifp
));
8191 /* Release reference held during ifnet attach */
8196 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
8204 ifp_if_start(struct ifnet
*ifp
)
8210 ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
8211 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
8212 boolean_t poll
, struct thread
*tp
)
8214 #pragma unused(ifp, m_tail, s, poll, tp)
8215 m_freem_list(m_head
);
8220 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
8221 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
8223 #pragma unused(ifp, flags, max_cnt)
8224 if (m_head
!= NULL
) {
8227 if (m_tail
!= NULL
) {
8239 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
8241 #pragma unused(ifp, cmd, arglen, arg)
8246 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
8248 #pragma unused(ifp, fh, pf)
8254 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
8255 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
8257 #pragma unused(ifp, pf, da, dc)
8262 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
8264 #pragma unused(ifp, pf)
8269 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
8271 #pragma unused(ifp, sa)
8275 #if !XNU_TARGET_OS_OSX
8277 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
8278 const struct sockaddr
*sa
, const char *ll
, const char *t
,
8279 u_int32_t
*pre
, u_int32_t
*post
)
8280 #else /* XNU_TARGET_OS_OSX */
8282 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
8283 const struct sockaddr
*sa
, const char *ll
, const char *t
)
8284 #endif /* XNU_TARGET_OS_OSX */
8286 #pragma unused(ifp, m, sa, ll, t)
8287 #if !XNU_TARGET_OS_OSX
8288 return ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, pre
, post
);
8289 #else /* XNU_TARGET_OS_OSX */
8290 return ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
);
8291 #endif /* XNU_TARGET_OS_OSX */
8295 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
8296 const struct sockaddr
*sa
, const char *ll
, const char *t
,
8297 u_int32_t
*pre
, u_int32_t
*post
)
8299 #pragma unused(ifp, sa, ll, t)
8314 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
8316 #pragma unused(ifp, cmd, arg)
8321 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
8323 #pragma unused(ifp, tm, f)
8324 /* XXX not sure what to do here */
8329 ifp_if_free(struct ifnet
*ifp
)
8335 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
8337 #pragma unused(ifp, e)
8341 dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
8342 size_t uniqueid_len
, const char *ifxname
, struct ifnet
**ifp
)
8344 struct ifnet
*ifp1
= NULL
;
8345 struct dlil_ifnet
*dlifp1
= NULL
;
8346 struct dlil_ifnet
*dlifp1_saved
= NULL
;
8347 void *buf
, *base
, **pbuf
;
8350 VERIFY(*ifp
== NULL
);
8353 * We absolutely can't have an interface with the same name
8355 * To make sure of that list has to be traversed completely
8357 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
8358 ifp1
= (struct ifnet
*)dlifp1
;
8360 if (ifp1
->if_family
!= family
) {
8365 * If interface is in use, return EBUSY if either unique id
8366 * or interface extended names are the same
8368 lck_mtx_lock(&dlifp1
->dl_if_lock
);
8369 if (strncmp(ifxname
, ifp1
->if_xname
, IFXNAMSIZ
) == 0) {
8370 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
8371 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
8378 if (uniqueid_len
== dlifp1
->dl_if_uniqueid_len
&&
8379 bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
) == 0) {
8380 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
8381 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
8385 /* Cache the first interface that can be recycled */
8388 dlifp1_saved
= dlifp1
;
8391 * XXX Do not break or jump to end as we have to traverse
8392 * the whole list to ensure there are no name collisions
8397 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
8400 /* If there's an interface that can be recycled, use that */
8402 if (dlifp1_saved
!= NULL
) {
8403 lck_mtx_lock(&dlifp1_saved
->dl_if_lock
);
8404 dlifp1_saved
->dl_if_flags
|= (DLIF_INUSE
| DLIF_REUSE
);
8405 lck_mtx_unlock(&dlifp1_saved
->dl_if_lock
);
8406 dlifp1_saved
= NULL
;
8411 /* no interface found, allocate a new one */
8412 buf
= zalloc_flags(dlif_zone
, Z_WAITOK
| Z_ZERO
);
8418 /* Get the 64-bit aligned base address for this object */
8419 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
8421 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
8424 * Wind back a pointer size from the aligned base and
8425 * save the original address so we can free it later.
8427 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
8432 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
8434 if (dlifp1
->dl_if_uniqueid
== NULL
) {
8435 zfree(dlif_zone
, buf
);
8439 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
8440 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
8443 ifp1
= (struct ifnet
*)dlifp1
;
8444 dlifp1
->dl_if_flags
= DLIF_INUSE
;
8446 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
8447 dlifp1
->dl_if_trace
= dlil_if_trace
;
8449 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
8450 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
8452 /* initialize interface description */
8453 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
8454 ifp1
->if_desc
.ifd_len
= 0;
8455 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
8458 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
8459 DLIL_PRINTF("%s: failed to allocate if local stats, "
8460 "error: %d\n", __func__
, ret
);
8461 /* This probably shouldn't be fatal */
8465 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8466 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8467 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8468 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8469 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
8471 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8473 lck_rw_init(&ifp1
->if_inetdata_lock
, ifnet_lock_group
,
8475 ifp1
->if_inetdata
= NULL
;
8477 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
,
8479 ifp1
->if_inet6data
= NULL
;
8480 lck_rw_init(&ifp1
->if_link_status_lock
, ifnet_lock_group
,
8482 ifp1
->if_link_status
= NULL
;
8484 /* for send data paths */
8485 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
8487 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
8489 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
8492 /* for receive data paths */
8493 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
8496 /* thread call allocation is done with sleeping zalloc */
8497 ifp1
->if_dt_tcall
= thread_call_allocate_with_options(dlil_dt_tcall_fn
,
8498 ifp1
, THREAD_CALL_PRIORITY_KERNEL
, THREAD_CALL_OPTIONS_ONCE
);
8499 if (ifp1
->if_dt_tcall
== NULL
) {
8500 panic_plain("%s: couldn't create if_dt_tcall", __func__
);
8504 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
8511 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof(u_int64_t
)) &&
8512 IS_P2ALIGNED(&ifp1
->if_data
, sizeof(u_int64_t
))));
8517 __private_extern__
void
8518 dlil_if_release(ifnet_t ifp
)
8520 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
8522 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_count
) > 0);
8523 if (!(ifp
->if_xflags
& IFXF_ALLOC_KPI
)) {
8524 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_os_count
) > 0);
8527 ifnet_lock_exclusive(ifp
);
8528 lck_mtx_lock(&dlifp
->dl_if_lock
);
8529 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
8530 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
8531 ifp
->if_name
= dlifp
->dl_if_namestorage
;
8532 /* Reset external name (name + unit) */
8533 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
8534 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
8535 "%s?", ifp
->if_name
);
8536 lck_mtx_unlock(&dlifp
->dl_if_lock
);
8537 ifnet_lock_done(ifp
);
8540 __private_extern__
void
8543 lck_mtx_lock(&dlil_ifnet_lock
);
8546 __private_extern__
void
8547 dlil_if_unlock(void)
8549 lck_mtx_unlock(&dlil_ifnet_lock
);
8552 __private_extern__
void
8553 dlil_if_lock_assert(void)
8555 LCK_MTX_ASSERT(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
8558 __private_extern__
void
8559 dlil_proto_unplumb_all(struct ifnet
*ifp
)
8562 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
8563 * each bucket contains exactly one entry; PF_VLAN does not need an
8566 * if_proto_hash[3] is for other protocols; we expect anything
8567 * in this bucket to respond to the DETACHING event (which would
8568 * have happened by now) and do the unplumb then.
8570 (void) proto_unplumb(PF_INET
, ifp
);
8571 (void) proto_unplumb(PF_INET6
, ifp
);
8575 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
8577 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8578 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8580 route_copyout(dst
, &ifp
->if_src_route
, sizeof(*dst
));
8582 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8586 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
8588 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8589 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8591 if (ifp
->if_fwd_cacheok
) {
8592 route_copyin(src
, &ifp
->if_src_route
, sizeof(*src
));
8596 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8600 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
8602 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8603 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8605 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
8608 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8612 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
8614 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8615 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8617 if (ifp
->if_fwd_cacheok
) {
8618 route_copyin((struct route
*)src
,
8619 (struct route
*)&ifp
->if_src_route6
, sizeof(*src
));
8623 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8627 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
8629 struct route src_rt
;
8630 struct sockaddr_in
*dst
;
8632 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
8634 ifp_src_route_copyout(ifp
, &src_rt
);
8636 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
8637 ROUTE_RELEASE(&src_rt
);
8638 if (dst
->sin_family
!= AF_INET
) {
8639 bzero(&src_rt
.ro_dst
, sizeof(src_rt
.ro_dst
));
8640 dst
->sin_len
= sizeof(src_rt
.ro_dst
);
8641 dst
->sin_family
= AF_INET
;
8643 dst
->sin_addr
= src_ip
;
8645 VERIFY(src_rt
.ro_rt
== NULL
);
8646 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
8647 0, 0, ifp
->if_index
);
8649 if (src_rt
.ro_rt
!= NULL
) {
8650 /* retain a ref, copyin consumes one */
8651 struct rtentry
*rte
= src_rt
.ro_rt
;
8653 ifp_src_route_copyin(ifp
, &src_rt
);
8658 return src_rt
.ro_rt
;
8662 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
8664 struct route_in6 src_rt
;
8666 ifp_src_route6_copyout(ifp
, &src_rt
);
8668 if (ROUTE_UNUSABLE(&src_rt
) ||
8669 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
8670 ROUTE_RELEASE(&src_rt
);
8671 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
8672 bzero(&src_rt
.ro_dst
, sizeof(src_rt
.ro_dst
));
8673 src_rt
.ro_dst
.sin6_len
= sizeof(src_rt
.ro_dst
);
8674 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
8676 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
8677 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
8678 sizeof(src_rt
.ro_dst
.sin6_addr
));
8680 if (src_rt
.ro_rt
== NULL
) {
8681 src_rt
.ro_rt
= rtalloc1_scoped(
8682 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
8685 if (src_rt
.ro_rt
!= NULL
) {
8686 /* retain a ref, copyin consumes one */
8687 struct rtentry
*rte
= src_rt
.ro_rt
;
8689 ifp_src_route6_copyin(ifp
, &src_rt
);
8695 return src_rt
.ro_rt
;
8699 if_lqm_update(struct ifnet
*ifp
, int lqm
, int locked
)
8701 struct kev_dl_link_quality_metric_data ev_lqm_data
;
8703 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
8705 /* Normalize to edge */
8706 if (lqm
>= 0 && lqm
<= IFNET_LQM_THRESH_ABORT
) {
8707 lqm
= IFNET_LQM_THRESH_ABORT
;
8708 atomic_bitset_32(&tcbinfo
.ipi_flags
,
8709 INPCBINFO_HANDLE_LQM_ABORT
);
8710 inpcb_timer_sched(&tcbinfo
, INPCB_TIMER_FAST
);
8711 } else if (lqm
> IFNET_LQM_THRESH_ABORT
&&
8712 lqm
<= IFNET_LQM_THRESH_MINIMALLY_VIABLE
) {
8713 lqm
= IFNET_LQM_THRESH_MINIMALLY_VIABLE
;
8714 } else if (lqm
> IFNET_LQM_THRESH_MINIMALLY_VIABLE
&&
8715 lqm
<= IFNET_LQM_THRESH_POOR
) {
8716 lqm
= IFNET_LQM_THRESH_POOR
;
8717 } else if (lqm
> IFNET_LQM_THRESH_POOR
&&
8718 lqm
<= IFNET_LQM_THRESH_GOOD
) {
8719 lqm
= IFNET_LQM_THRESH_GOOD
;
8723 * Take the lock if needed
8726 ifnet_lock_exclusive(ifp
);
8729 if (lqm
== ifp
->if_interface_state
.lqm_state
&&
8730 (ifp
->if_interface_state
.valid_bitmask
&
8731 IF_INTERFACE_STATE_LQM_STATE_VALID
)) {
8733 * Release the lock if was not held by the caller
8736 ifnet_lock_done(ifp
);
8738 return; /* nothing to update */
8740 ifp
->if_interface_state
.valid_bitmask
|=
8741 IF_INTERFACE_STATE_LQM_STATE_VALID
;
8742 ifp
->if_interface_state
.lqm_state
= (int8_t)lqm
;
8745 * Don't want to hold the lock when issuing kernel events
8747 ifnet_lock_done(ifp
);
8749 bzero(&ev_lqm_data
, sizeof(ev_lqm_data
));
8750 ev_lqm_data
.link_quality_metric
= lqm
;
8752 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
8753 (struct net_event_data
*)&ev_lqm_data
, sizeof(ev_lqm_data
));
8756 * Reacquire the lock for the caller
8759 ifnet_lock_exclusive(ifp
);
8764 if_rrc_state_update(struct ifnet
*ifp
, unsigned int rrc_state
)
8766 struct kev_dl_rrc_state kev
;
8768 if (rrc_state
== ifp
->if_interface_state
.rrc_state
&&
8769 (ifp
->if_interface_state
.valid_bitmask
&
8770 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
8774 ifp
->if_interface_state
.valid_bitmask
|=
8775 IF_INTERFACE_STATE_RRC_STATE_VALID
;
8777 ifp
->if_interface_state
.rrc_state
= (uint8_t)rrc_state
;
8780 * Don't want to hold the lock when issuing kernel events
8782 ifnet_lock_done(ifp
);
8784 bzero(&kev
, sizeof(struct kev_dl_rrc_state
));
8785 kev
.rrc_state
= rrc_state
;
8787 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_RRC_STATE_CHANGED
,
8788 (struct net_event_data
*)&kev
, sizeof(struct kev_dl_rrc_state
));
8790 ifnet_lock_exclusive(ifp
);
8794 if_state_update(struct ifnet
*ifp
,
8795 struct if_interface_state
*if_interface_state
)
8797 u_short if_index_available
= 0;
8799 ifnet_lock_exclusive(ifp
);
8801 if ((ifp
->if_type
!= IFT_CELLULAR
) &&
8802 (if_interface_state
->valid_bitmask
&
8803 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
8804 ifnet_lock_done(ifp
);
8807 if ((if_interface_state
->valid_bitmask
&
8808 IF_INTERFACE_STATE_LQM_STATE_VALID
) &&
8809 (if_interface_state
->lqm_state
< IFNET_LQM_MIN
||
8810 if_interface_state
->lqm_state
> IFNET_LQM_MAX
)) {
8811 ifnet_lock_done(ifp
);
8814 if ((if_interface_state
->valid_bitmask
&
8815 IF_INTERFACE_STATE_RRC_STATE_VALID
) &&
8816 if_interface_state
->rrc_state
!=
8817 IF_INTERFACE_STATE_RRC_STATE_IDLE
&&
8818 if_interface_state
->rrc_state
!=
8819 IF_INTERFACE_STATE_RRC_STATE_CONNECTED
) {
8820 ifnet_lock_done(ifp
);
8824 if (if_interface_state
->valid_bitmask
&
8825 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
8826 if_lqm_update(ifp
, if_interface_state
->lqm_state
, 1);
8828 if (if_interface_state
->valid_bitmask
&
8829 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
8830 if_rrc_state_update(ifp
, if_interface_state
->rrc_state
);
8832 if (if_interface_state
->valid_bitmask
&
8833 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
8834 ifp
->if_interface_state
.valid_bitmask
|=
8835 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
8836 ifp
->if_interface_state
.interface_availability
=
8837 if_interface_state
->interface_availability
;
8839 if (ifp
->if_interface_state
.interface_availability
==
8840 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
) {
8841 os_log(OS_LOG_DEFAULT
, "%s: interface %s (%u) available\n",
8842 __func__
, if_name(ifp
), ifp
->if_index
);
8843 if_index_available
= ifp
->if_index
;
8845 os_log(OS_LOG_DEFAULT
, "%s: interface %s (%u) unavailable)\n",
8846 __func__
, if_name(ifp
), ifp
->if_index
);
8849 ifnet_lock_done(ifp
);
8852 * Check if the TCP connections going on this interface should be
8853 * forced to send probe packets instead of waiting for TCP timers
8854 * to fire. This is done on an explicit notification such as
8855 * SIOCSIFINTERFACESTATE which marks the interface as available.
8857 if (if_index_available
> 0) {
8858 tcp_interface_send_probe(if_index_available
);
8865 if_get_state(struct ifnet
*ifp
,
8866 struct if_interface_state
*if_interface_state
)
8868 ifnet_lock_shared(ifp
);
8870 if_interface_state
->valid_bitmask
= 0;
8872 if (ifp
->if_interface_state
.valid_bitmask
&
8873 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
8874 if_interface_state
->valid_bitmask
|=
8875 IF_INTERFACE_STATE_RRC_STATE_VALID
;
8876 if_interface_state
->rrc_state
=
8877 ifp
->if_interface_state
.rrc_state
;
8879 if (ifp
->if_interface_state
.valid_bitmask
&
8880 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
8881 if_interface_state
->valid_bitmask
|=
8882 IF_INTERFACE_STATE_LQM_STATE_VALID
;
8883 if_interface_state
->lqm_state
=
8884 ifp
->if_interface_state
.lqm_state
;
8886 if (ifp
->if_interface_state
.valid_bitmask
&
8887 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
8888 if_interface_state
->valid_bitmask
|=
8889 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
8890 if_interface_state
->interface_availability
=
8891 ifp
->if_interface_state
.interface_availability
;
8894 ifnet_lock_done(ifp
);
8898 if_probe_connectivity(struct ifnet
*ifp
, u_int32_t conn_probe
)
8900 if (conn_probe
> 1) {
8903 if (conn_probe
== 0) {
8904 if_clear_eflags(ifp
, IFEF_PROBE_CONNECTIVITY
);
8906 if_set_eflags(ifp
, IFEF_PROBE_CONNECTIVITY
);
8910 necp_update_all_clients();
8913 tcp_probe_connectivity(ifp
, conn_probe
);
8919 get_ether_index(int * ret_other_index
)
8923 int other_en_index
= 0;
8924 int any_ether_index
= 0;
8925 short best_unit
= 0;
8927 *ret_other_index
= 0;
8928 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
8930 * find en0, or if not en0, the lowest unit en*, and if not
8931 * that, any ethernet
8933 ifnet_lock_shared(ifp
);
8934 if (strcmp(ifp
->if_name
, "en") == 0) {
8935 if (ifp
->if_unit
== 0) {
8936 /* found en0, we're done */
8937 en0_index
= ifp
->if_index
;
8938 ifnet_lock_done(ifp
);
8941 if (other_en_index
== 0 || ifp
->if_unit
< best_unit
) {
8942 other_en_index
= ifp
->if_index
;
8943 best_unit
= ifp
->if_unit
;
8945 } else if (ifp
->if_type
== IFT_ETHER
&& any_ether_index
== 0) {
8946 any_ether_index
= ifp
->if_index
;
8948 ifnet_lock_done(ifp
);
8950 if (en0_index
== 0) {
8951 if (other_en_index
!= 0) {
8952 *ret_other_index
= other_en_index
;
8953 } else if (any_ether_index
!= 0) {
8954 *ret_other_index
= any_ether_index
;
8961 uuid_get_ethernet(u_int8_t
*node
)
8963 static int en0_index
;
8965 int other_index
= 0;
8969 ifnet_head_lock_shared();
8970 if (en0_index
== 0 || ifindex2ifnet
[en0_index
] == NULL
) {
8971 en0_index
= get_ether_index(&other_index
);
8973 if (en0_index
!= 0) {
8974 the_index
= en0_index
;
8975 } else if (other_index
!= 0) {
8976 the_index
= other_index
;
8978 if (the_index
!= 0) {
8979 struct dlil_ifnet
*dl_if
;
8981 ifp
= ifindex2ifnet
[the_index
];
8982 VERIFY(ifp
!= NULL
);
8983 dl_if
= (struct dlil_ifnet
*)ifp
;
8984 if (dl_if
->dl_if_permanent_ether_is_set
!= 0) {
8986 * Use the permanent ethernet address if it is
8987 * available because it will never change.
8989 memcpy(node
, dl_if
->dl_if_permanent_ether
,
8992 memcpy(node
, IF_LLADDR(ifp
), ETHER_ADDR_LEN
);
9003 sysctl_rxpoll SYSCTL_HANDLER_ARGS
9005 #pragma unused(arg1, arg2)
9011 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9012 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9016 if (net_rxpoll
== 0) {
9025 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
9027 #pragma unused(arg1, arg2)
9031 q
= if_rxpoll_mode_holdtime
;
9033 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
9034 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9038 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
) {
9039 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
9042 if_rxpoll_mode_holdtime
= q
;
9048 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
9050 #pragma unused(arg1, arg2)
9054 q
= if_rxpoll_sample_holdtime
;
9056 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
9057 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9061 if (q
< IF_RXPOLL_SAMPLETIME_MIN
) {
9062 q
= IF_RXPOLL_SAMPLETIME_MIN
;
9065 if_rxpoll_sample_holdtime
= q
;
9071 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
9073 #pragma unused(arg1, arg2)
9077 q
= if_rxpoll_interval_time
;
9079 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
9080 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9084 if (q
< IF_RXPOLL_INTERVALTIME_MIN
) {
9085 q
= IF_RXPOLL_INTERVALTIME_MIN
;
9088 if_rxpoll_interval_time
= q
;
9094 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
9096 #pragma unused(arg1, arg2)
9100 i
= if_sysctl_rxpoll_wlowat
;
9102 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9103 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9107 if (i
== 0 || i
>= if_sysctl_rxpoll_whiwat
) {
9111 if_sysctl_rxpoll_wlowat
= i
;
9116 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
9118 #pragma unused(arg1, arg2)
9122 i
= if_sysctl_rxpoll_whiwat
;
9124 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9125 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9129 if (i
<= if_sysctl_rxpoll_wlowat
) {
9133 if_sysctl_rxpoll_whiwat
= i
;
9138 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
9140 #pragma unused(arg1, arg2)
9145 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9146 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9150 if (i
< IF_SNDQ_MINLEN
) {
9159 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
9161 #pragma unused(arg1, arg2)
9166 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9167 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9171 if (i
< IF_RCVQ_MINLEN
) {
9180 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
9181 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
9183 struct kev_dl_node_presence kev
;
9184 struct sockaddr_dl
*sdl
;
9185 struct sockaddr_in6
*sin6
;
9190 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
9192 bzero(&kev
, sizeof(kev
));
9193 sin6
= &kev
.sin6_node_address
;
9194 sdl
= &kev
.sdl_node_address
;
9195 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
9197 kev
.link_quality_metric
= lqm
;
9198 kev
.node_proximity_metric
= npm
;
9199 bcopy(srvinfo
, kev
.node_service_info
, sizeof(kev
.node_service_info
));
9201 ret
= nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
9203 int err
= dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
9204 &kev
.link_data
, sizeof(kev
));
9206 log(LOG_ERR
, "%s: Post DL_NODE_PRESENCE failed with"
9207 "error %d\n", __func__
, err
);
9214 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
9216 struct kev_dl_node_absence kev
= {};
9217 struct sockaddr_in6
*kev_sin6
= NULL
;
9218 struct sockaddr_dl
*kev_sdl
= NULL
;
9220 VERIFY(ifp
!= NULL
);
9222 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
9224 kev_sin6
= &kev
.sin6_node_address
;
9225 kev_sdl
= &kev
.sdl_node_address
;
9227 if (sa
->sa_family
== AF_INET6
) {
9229 * If IPv6 address is given, get the link layer
9230 * address from what was cached in the neighbor cache
9232 VERIFY(sa
->sa_len
<= sizeof(*kev_sin6
));
9233 bcopy(sa
, kev_sin6
, sa
->sa_len
);
9234 nd6_alt_node_absent(ifp
, kev_sin6
, kev_sdl
);
9237 * If passed address is AF_LINK type, derive the address
9238 * based on the link address.
9240 nd6_alt_node_addr_decompose(ifp
, sa
, kev_sdl
, kev_sin6
);
9241 nd6_alt_node_absent(ifp
, kev_sin6
, NULL
);
9244 kev_sdl
->sdl_type
= ifp
->if_type
;
9245 kev_sdl
->sdl_index
= ifp
->if_index
;
9247 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
9248 &kev
.link_data
, sizeof(kev
));
9252 dlil_node_present_v2(struct ifnet
*ifp
, struct sockaddr
*sa
, struct sockaddr_dl
*sdl
,
9253 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
9255 struct kev_dl_node_presence kev
= {};
9256 struct sockaddr_dl
*kev_sdl
= NULL
;
9257 struct sockaddr_in6
*kev_sin6
= NULL
;
9260 VERIFY(ifp
!= NULL
);
9261 VERIFY(sa
!= NULL
&& sdl
!= NULL
);
9262 VERIFY(sa
->sa_family
== AF_INET6
&& sdl
->sdl_family
== AF_LINK
);
9264 kev_sin6
= &kev
.sin6_node_address
;
9265 kev_sdl
= &kev
.sdl_node_address
;
9267 VERIFY(sdl
->sdl_len
<= sizeof(*kev_sdl
));
9268 bcopy(sdl
, kev_sdl
, sdl
->sdl_len
);
9269 kev_sdl
->sdl_type
= ifp
->if_type
;
9270 kev_sdl
->sdl_index
= ifp
->if_index
;
9272 VERIFY(sa
->sa_len
<= sizeof(*kev_sin6
));
9273 bcopy(sa
, kev_sin6
, sa
->sa_len
);
9276 kev
.link_quality_metric
= lqm
;
9277 kev
.node_proximity_metric
= npm
;
9278 bcopy(srvinfo
, kev
.node_service_info
, sizeof(kev
.node_service_info
));
9280 ret
= nd6_alt_node_present(ifp
, SIN6(sa
), sdl
, rssi
, lqm
, npm
);
9282 int err
= dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
9283 &kev
.link_data
, sizeof(kev
));
9285 log(LOG_ERR
, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__
, err
);
9292 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
9293 kauth_cred_t
*credp
)
9295 const u_int8_t
*bytes
;
9298 bytes
= CONST_LLADDR(sdl
);
9299 size
= sdl
->sdl_alen
;
9302 if (dlil_lladdr_ckreq
) {
9303 switch (sdl
->sdl_type
) {
9313 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
9314 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
9322 #pragma unused(credp)
9325 if (sizep
!= NULL
) {
9332 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
9333 u_int8_t info
[DLIL_MODARGLEN
])
9335 struct kev_dl_issues kev
;
9338 VERIFY(ifp
!= NULL
);
9339 VERIFY(modid
!= NULL
);
9340 _CASSERT(sizeof(kev
.modid
) == DLIL_MODIDLEN
);
9341 _CASSERT(sizeof(kev
.info
) == DLIL_MODARGLEN
);
9343 bzero(&kev
, sizeof(kev
));
9346 kev
.timestamp
= tv
.tv_sec
;
9347 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
9349 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
9352 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
9353 &kev
.link_data
, sizeof(kev
));
9357 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
9360 u_int32_t level
= IFNET_THROTTLE_OFF
;
9363 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
9365 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
9367 * XXX: Use priv_check_cred() instead of root check?
9369 if ((result
= proc_suser(p
)) != 0) {
9373 if (ifr
->ifr_opportunistic
.ifo_flags
==
9374 IFRIFOF_BLOCK_OPPORTUNISTIC
) {
9375 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
9376 } else if (ifr
->ifr_opportunistic
.ifo_flags
== 0) {
9377 level
= IFNET_THROTTLE_OFF
;
9383 result
= ifnet_set_throttle(ifp
, level
);
9385 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
9386 ifr
->ifr_opportunistic
.ifo_flags
= 0;
9387 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
9388 ifr
->ifr_opportunistic
.ifo_flags
|=
9389 IFRIFOF_BLOCK_OPPORTUNISTIC
;
9394 * Return the count of current opportunistic connections
9395 * over the interface.
9399 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
9400 INPCB_OPPORTUNISTIC_SETCMD
: 0;
9401 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
9402 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
9403 ifr
->ifr_opportunistic
.ifo_inuse
=
9404 udp_count_opportunistic(ifp
->if_index
, flags
) +
9405 tcp_count_opportunistic(ifp
->if_index
, flags
);
9408 if (result
== EALREADY
) {
9416 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
9418 struct ifclassq
*ifq
;
9421 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
9425 *level
= IFNET_THROTTLE_OFF
;
9429 /* Throttling works only for IFCQ, not ALTQ instances */
9430 if (IFCQ_IS_ENABLED(ifq
)) {
9431 cqrq_throttle_t req
= { 0, IFNET_THROTTLE_OFF
};
9433 err
= fq_if_request_classq(ifq
, CLASSQRQ_THROTTLE
, &req
);
9442 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
9444 struct ifclassq
*ifq
;
9447 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
9454 case IFNET_THROTTLE_OFF
:
9455 case IFNET_THROTTLE_OPPORTUNISTIC
:
9462 if (IFCQ_IS_ENABLED(ifq
)) {
9463 cqrq_throttle_t req
= { 1, level
};
9465 err
= fq_if_request_classq(ifq
, CLASSQRQ_THROTTLE
, &req
);
9470 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp
),
9473 necp_update_all_clients();
9475 if (level
== IFNET_THROTTLE_OFF
) {
9484 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
9490 int level
, category
, subcategory
;
9492 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
9494 if (cmd
== SIOCSIFLOG
) {
9495 if ((result
= priv_check_cred(kauth_cred_get(),
9496 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0) {
9500 level
= ifr
->ifr_log
.ifl_level
;
9501 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
) {
9505 flags
= ifr
->ifr_log
.ifl_flags
;
9506 if ((flags
&= IFNET_LOGF_MASK
) == 0) {
9510 category
= ifr
->ifr_log
.ifl_category
;
9511 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
9514 result
= ifnet_set_log(ifp
, level
, flags
,
9515 category
, subcategory
);
9518 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
9521 ifr
->ifr_log
.ifl_level
= level
;
9522 ifr
->ifr_log
.ifl_flags
= flags
;
9523 ifr
->ifr_log
.ifl_category
= category
;
9524 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
9532 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
9533 int32_t category
, int32_t subcategory
)
9537 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
9538 VERIFY(flags
& IFNET_LOGF_MASK
);
9541 * The logging level applies to all facilities; make sure to
9542 * update them all with the most current level.
9544 flags
|= ifp
->if_log
.flags
;
9546 if (ifp
->if_output_ctl
!= NULL
) {
9547 struct ifnet_log_params l
;
9549 bzero(&l
, sizeof(l
));
9552 l
.flags
&= ~IFNET_LOGF_DLIL
;
9553 l
.category
= category
;
9554 l
.subcategory
= subcategory
;
9556 /* Send this request to lower layers */
9558 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
9561 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
9563 * If targeted to the lower layers without an output
9564 * control callback registered on the interface, just
9565 * silently ignore facilities other than ours.
9567 flags
&= IFNET_LOGF_DLIL
;
9568 if (flags
== 0 && (!(ifp
->if_log
.flags
& IFNET_LOGF_DLIL
))) {
9574 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
) {
9575 ifp
->if_log
.flags
= 0;
9577 ifp
->if_log
.flags
|= flags
;
9580 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
9581 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
9582 ifp
->if_log
.level
, ifp
->if_log
.flags
,
9583 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
9584 category
, subcategory
);
9591 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
9592 int32_t *category
, int32_t *subcategory
)
9594 if (level
!= NULL
) {
9595 *level
= ifp
->if_log
.level
;
9597 if (flags
!= NULL
) {
9598 *flags
= ifp
->if_log
.flags
;
9600 if (category
!= NULL
) {
9601 *category
= ifp
->if_log
.category
;
9603 if (subcategory
!= NULL
) {
9604 *subcategory
= ifp
->if_log
.subcategory
;
9611 ifnet_notify_address(struct ifnet
*ifp
, int af
)
9613 struct ifnet_notify_address_params na
;
9616 (void) pf_ifaddr_hook(ifp
);
9619 if (ifp
->if_output_ctl
== NULL
) {
9623 bzero(&na
, sizeof(na
));
9624 na
.address_family
= (sa_family_t
)af
;
9626 return ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
9631 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
9633 if (ifp
== NULL
|| flowid
== NULL
) {
9635 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
9636 !IF_FULLY_ATTACHED(ifp
)) {
9640 *flowid
= ifp
->if_flowhash
;
9646 ifnet_disable_output(struct ifnet
*ifp
)
9652 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
9653 !IF_FULLY_ATTACHED(ifp
)) {
9657 if ((err
= ifnet_fc_add(ifp
)) == 0) {
9658 lck_mtx_lock_spin(&ifp
->if_start_lock
);
9659 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
9660 lck_mtx_unlock(&ifp
->if_start_lock
);
9666 ifnet_enable_output(struct ifnet
*ifp
)
9670 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
9671 !IF_FULLY_ATTACHED(ifp
)) {
9675 ifnet_start_common(ifp
, TRUE
);
9680 ifnet_flowadv(uint32_t flowhash
)
9682 struct ifnet_fc_entry
*ifce
;
9685 ifce
= ifnet_fc_get(flowhash
);
9690 VERIFY(ifce
->ifce_ifp
!= NULL
);
9691 ifp
= ifce
->ifce_ifp
;
9693 /* flow hash gets recalculated per attach, so check */
9694 if (ifnet_is_attached(ifp
, 1)) {
9695 if (ifp
->if_flowhash
== flowhash
) {
9696 (void) ifnet_enable_output(ifp
);
9698 ifnet_decr_iorefcnt(ifp
);
9700 ifnet_fc_entry_free(ifce
);
9704 * Function to compare ifnet_fc_entries in ifnet flow control tree
9707 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
9709 return fc1
->ifce_flowhash
- fc2
->ifce_flowhash
;
9713 ifnet_fc_add(struct ifnet
*ifp
)
9715 struct ifnet_fc_entry keyfc
, *ifce
;
9718 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
9719 VERIFY(ifp
->if_flowhash
!= 0);
9720 flowhash
= ifp
->if_flowhash
;
9722 bzero(&keyfc
, sizeof(keyfc
));
9723 keyfc
.ifce_flowhash
= flowhash
;
9725 lck_mtx_lock_spin(&ifnet_fc_lock
);
9726 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
9727 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
9728 /* Entry is already in ifnet_fc_tree, return */
9729 lck_mtx_unlock(&ifnet_fc_lock
);
9735 * There is a different fc entry with the same flow hash
9736 * but different ifp pointer. There can be a collision
9737 * on flow hash but the probability is low. Let's just
9738 * avoid adding a second one when there is a collision.
9740 lck_mtx_unlock(&ifnet_fc_lock
);
9744 /* become regular mutex */
9745 lck_mtx_convert_spin(&ifnet_fc_lock
);
9747 ifce
= zalloc_flags(ifnet_fc_zone
, Z_WAITOK
| Z_ZERO
);
9748 ifce
->ifce_flowhash
= flowhash
;
9749 ifce
->ifce_ifp
= ifp
;
9751 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
9752 lck_mtx_unlock(&ifnet_fc_lock
);
9756 static struct ifnet_fc_entry
*
9757 ifnet_fc_get(uint32_t flowhash
)
9759 struct ifnet_fc_entry keyfc
, *ifce
;
9762 bzero(&keyfc
, sizeof(keyfc
));
9763 keyfc
.ifce_flowhash
= flowhash
;
9765 lck_mtx_lock_spin(&ifnet_fc_lock
);
9766 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
9768 /* Entry is not present in ifnet_fc_tree, return */
9769 lck_mtx_unlock(&ifnet_fc_lock
);
9773 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
9775 VERIFY(ifce
->ifce_ifp
!= NULL
);
9776 ifp
= ifce
->ifce_ifp
;
9778 /* become regular mutex */
9779 lck_mtx_convert_spin(&ifnet_fc_lock
);
9781 if (!ifnet_is_attached(ifp
, 0)) {
9783 * This ifp is not attached or in the process of being
9784 * detached; just don't process it.
9786 ifnet_fc_entry_free(ifce
);
9789 lck_mtx_unlock(&ifnet_fc_lock
);
9795 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
9797 zfree(ifnet_fc_zone
, ifce
);
9801 ifnet_calc_flowhash(struct ifnet
*ifp
)
9803 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
9804 uint32_t flowhash
= 0;
9806 if (ifnet_flowhash_seed
== 0) {
9807 ifnet_flowhash_seed
= RandomULong();
9810 bzero(&fh
, sizeof(fh
));
9812 (void) snprintf(fh
.ifk_name
, sizeof(fh
.ifk_name
), "%s", ifp
->if_name
);
9813 fh
.ifk_unit
= ifp
->if_unit
;
9814 fh
.ifk_flags
= ifp
->if_flags
;
9815 fh
.ifk_eflags
= ifp
->if_eflags
;
9816 fh
.ifk_capabilities
= ifp
->if_capabilities
;
9817 fh
.ifk_capenable
= ifp
->if_capenable
;
9818 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
9819 fh
.ifk_rand1
= RandomULong();
9820 fh
.ifk_rand2
= RandomULong();
9823 flowhash
= net_flowhash(&fh
, sizeof(fh
), ifnet_flowhash_seed
);
9824 if (flowhash
== 0) {
9825 /* try to get a non-zero flowhash */
9826 ifnet_flowhash_seed
= RandomULong();
9834 ifnet_set_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t len
,
9835 uint16_t flags
, uint8_t *data
)
9837 #pragma unused(flags)
9842 if_inetdata_lock_exclusive(ifp
);
9843 if (IN_IFEXTRA(ifp
) != NULL
) {
9845 /* Allow clearing the signature */
9846 IN_IFEXTRA(ifp
)->netsig_len
= 0;
9847 bzero(IN_IFEXTRA(ifp
)->netsig
,
9848 sizeof(IN_IFEXTRA(ifp
)->netsig
));
9849 if_inetdata_lock_done(ifp
);
9851 } else if (len
> sizeof(IN_IFEXTRA(ifp
)->netsig
)) {
9853 if_inetdata_lock_done(ifp
);
9856 IN_IFEXTRA(ifp
)->netsig_len
= len
;
9857 bcopy(data
, IN_IFEXTRA(ifp
)->netsig
, len
);
9861 if_inetdata_lock_done(ifp
);
9865 if_inet6data_lock_exclusive(ifp
);
9866 if (IN6_IFEXTRA(ifp
) != NULL
) {
9868 /* Allow clearing the signature */
9869 IN6_IFEXTRA(ifp
)->netsig_len
= 0;
9870 bzero(IN6_IFEXTRA(ifp
)->netsig
,
9871 sizeof(IN6_IFEXTRA(ifp
)->netsig
));
9872 if_inet6data_lock_done(ifp
);
9874 } else if (len
> sizeof(IN6_IFEXTRA(ifp
)->netsig
)) {
9876 if_inet6data_lock_done(ifp
);
9879 IN6_IFEXTRA(ifp
)->netsig_len
= len
;
9880 bcopy(data
, IN6_IFEXTRA(ifp
)->netsig
, len
);
9884 if_inet6data_lock_done(ifp
);
9896 ifnet_get_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t *len
,
9897 uint16_t *flags
, uint8_t *data
)
9901 if (ifp
== NULL
|| len
== NULL
|| data
== NULL
) {
9907 if_inetdata_lock_shared(ifp
);
9908 if (IN_IFEXTRA(ifp
) != NULL
) {
9909 if (*len
== 0 || *len
< IN_IFEXTRA(ifp
)->netsig_len
) {
9911 if_inetdata_lock_done(ifp
);
9914 if ((*len
= (uint8_t)IN_IFEXTRA(ifp
)->netsig_len
) > 0) {
9915 bcopy(IN_IFEXTRA(ifp
)->netsig
, data
, *len
);
9922 if_inetdata_lock_done(ifp
);
9926 if_inet6data_lock_shared(ifp
);
9927 if (IN6_IFEXTRA(ifp
) != NULL
) {
9928 if (*len
== 0 || *len
< IN6_IFEXTRA(ifp
)->netsig_len
) {
9930 if_inet6data_lock_done(ifp
);
9933 if ((*len
= (uint8_t)IN6_IFEXTRA(ifp
)->netsig_len
) > 0) {
9934 bcopy(IN6_IFEXTRA(ifp
)->netsig
, data
, *len
);
9941 if_inet6data_lock_done(ifp
);
9949 if (error
== 0 && flags
!= NULL
) {
9957 ifnet_set_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
9959 int i
, error
= 0, one_set
= 0;
9961 if_inet6data_lock_exclusive(ifp
);
9963 if (IN6_IFEXTRA(ifp
) == NULL
) {
9968 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
9969 uint32_t prefix_len
=
9970 prefixes
[i
].prefix_len
;
9971 struct in6_addr
*prefix
=
9972 &prefixes
[i
].ipv6_prefix
;
9974 if (prefix_len
== 0) {
9975 clat_log0((LOG_DEBUG
,
9976 "NAT64 prefixes purged from Interface %s\n",
9978 /* Allow clearing the signature */
9979 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= 0;
9980 bzero(&IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
9981 sizeof(struct in6_addr
));
9984 } else if (prefix_len
!= NAT64_PREFIX_LEN_32
&&
9985 prefix_len
!= NAT64_PREFIX_LEN_40
&&
9986 prefix_len
!= NAT64_PREFIX_LEN_48
&&
9987 prefix_len
!= NAT64_PREFIX_LEN_56
&&
9988 prefix_len
!= NAT64_PREFIX_LEN_64
&&
9989 prefix_len
!= NAT64_PREFIX_LEN_96
) {
9990 clat_log0((LOG_DEBUG
,
9991 "NAT64 prefixlen is incorrect %d\n", prefix_len
));
9996 if (IN6_IS_SCOPE_EMBED(prefix
)) {
9997 clat_log0((LOG_DEBUG
,
9998 "NAT64 prefix has interface/link local scope.\n"));
10003 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= prefix_len
;
10004 bcopy(prefix
, &IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
10005 sizeof(struct in6_addr
));
10006 clat_log0((LOG_DEBUG
,
10007 "NAT64 prefix set to %s with prefixlen: %d\n",
10008 ip6_sprintf(prefix
), prefix_len
));
10013 if_inet6data_lock_done(ifp
);
10015 if (error
== 0 && one_set
!= 0) {
10016 necp_update_all_clients();
10023 ifnet_get_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
10025 int i
, found_one
= 0, error
= 0;
10031 if_inet6data_lock_shared(ifp
);
10033 if (IN6_IFEXTRA(ifp
) == NULL
) {
10038 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
10039 if (IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
!= 0) {
10044 if (found_one
== 0) {
10050 bcopy(IN6_IFEXTRA(ifp
)->nat64_prefixes
, prefixes
,
10051 sizeof(IN6_IFEXTRA(ifp
)->nat64_prefixes
));
10055 if_inet6data_lock_done(ifp
);
10061 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
10062 protocol_family_t pf
)
10064 #pragma unused(ifp)
10067 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
10068 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
| CSUM_TSO_IPV6
))) {
10074 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
10075 if (did_sw
& CSUM_DELAY_IP
) {
10076 hwcksum_dbg_finalized_hdr
++;
10078 if (did_sw
& CSUM_DELAY_DATA
) {
10079 hwcksum_dbg_finalized_data
++;
10084 * Checksum offload should not have been enabled when
10085 * extension headers exist; that also means that we
10086 * cannot force-finalize packets with extension headers.
10087 * Indicate to the callee should it skip such case by
10088 * setting optlen to -1.
10090 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
10091 m
->m_pkthdr
.csum_flags
);
10092 if (did_sw
& CSUM_DELAY_IPV6_DATA
) {
10093 hwcksum_dbg_finalized_data
++;
10102 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
10103 protocol_family_t pf
)
10108 if (frame_header
== NULL
||
10109 frame_header
< (char *)mbuf_datastart(m
) ||
10110 frame_header
> (char *)m
->m_data
) {
10111 DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
10112 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
10113 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
10114 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
10115 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
10116 (uint64_t)VM_KERNEL_ADDRPERM(m
));
10119 hlen
= (uint32_t)(m
->m_data
- frame_header
);
10130 * Force partial checksum offload; useful to simulate cases
10131 * where the hardware does not support partial checksum offload,
10132 * in order to validate correctness throughout the layers above.
10134 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
10135 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
10137 if (foff
> (uint32_t)m
->m_pkthdr
.len
) {
10141 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
10143 /* Compute 16-bit 1's complement sum from forced offset */
10144 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
10146 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
10147 m
->m_pkthdr
.csum_rx_val
= sum
;
10148 m
->m_pkthdr
.csum_rx_start
= (uint16_t)(foff
+ hlen
);
10150 hwcksum_dbg_partial_forced
++;
10151 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
10155 * Partial checksum offload verification (and adjustment);
10156 * useful to validate and test cases where the hardware
10157 * supports partial checksum offload.
10159 if ((m
->m_pkthdr
.csum_flags
&
10160 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
10161 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
10164 /* Start offset must begin after frame header */
10165 rxoff
= m
->m_pkthdr
.csum_rx_start
;
10166 if (hlen
> rxoff
) {
10167 hwcksum_dbg_bad_rxoff
++;
10168 if (dlil_verbose
) {
10169 DLIL_PRINTF("%s: partial cksum start offset %d "
10170 "is less than frame header length %d for "
10171 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
10172 (uint64_t)VM_KERNEL_ADDRPERM(m
));
10178 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
10180 * Compute the expected 16-bit 1's complement sum;
10181 * skip this if we've already computed it above
10182 * when partial checksum offload is forced.
10184 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
10186 /* Hardware or driver is buggy */
10187 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
10188 hwcksum_dbg_bad_cksum
++;
10189 if (dlil_verbose
) {
10190 DLIL_PRINTF("%s: bad partial cksum value "
10191 "0x%x (expected 0x%x) for mbuf "
10192 "0x%llx [rx_start %d]\n",
10194 m
->m_pkthdr
.csum_rx_val
, sum
,
10195 (uint64_t)VM_KERNEL_ADDRPERM(m
),
10196 m
->m_pkthdr
.csum_rx_start
);
10201 hwcksum_dbg_verified
++;
10204 * This code allows us to emulate various hardwares that
10205 * perform 16-bit 1's complement sum beginning at various
10206 * start offset values.
10208 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
10209 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
10211 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
) {
10215 sum
= m_adj_sum16(m
, rxoff
, aoff
,
10216 m_pktlen(m
) - aoff
, sum
);
10218 m
->m_pkthdr
.csum_rx_val
= sum
;
10219 m
->m_pkthdr
.csum_rx_start
= (uint16_t)(aoff
+ hlen
);
10221 hwcksum_dbg_adjusted
++;
10227 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
10229 #pragma unused(arg1, arg2)
10233 i
= hwcksum_dbg_mode
;
10235 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
10236 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
10240 if (hwcksum_dbg
== 0) {
10244 if ((i
& ~HWCKSUM_DBG_MASK
) != 0) {
10248 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
10254 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
10256 #pragma unused(arg1, arg2)
10260 i
= hwcksum_dbg_partial_rxoff_forced
;
10262 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
10263 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
10267 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
10271 hwcksum_dbg_partial_rxoff_forced
= i
;
10277 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
10279 #pragma unused(arg1, arg2)
10283 i
= hwcksum_dbg_partial_rxoff_adj
;
10285 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
10286 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
10290 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
)) {
10294 hwcksum_dbg_partial_rxoff_adj
= i
;
10300 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
10302 #pragma unused(oidp, arg1, arg2)
10305 if (req
->oldptr
== USER_ADDR_NULL
) {
10307 if (req
->newptr
!= USER_ADDR_NULL
) {
10310 err
= SYSCTL_OUT(req
, &tx_chain_len_stats
,
10311 sizeof(struct chain_len_stats
));
10317 #if DEBUG || DEVELOPMENT
10318 /* Blob for sum16 verification */
10319 static uint8_t sumdata
[] = {
10320 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
10321 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
10322 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
10323 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
10324 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
10325 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
10326 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
10327 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
10328 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
10329 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
10330 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
10331 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
10332 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
10333 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
10334 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
10335 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
10336 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
10337 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
10338 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
10339 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
10340 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
10341 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
10342 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
10343 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
10344 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
10345 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
10346 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
10347 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
10348 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
10349 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
10350 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
10351 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
10352 0xc8, 0x28, 0x02, 0x00, 0x00
10355 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
10359 uint16_t sumr
; /* reference */
10360 uint16_t sumrp
; /* reference, precomputed */
10362 { FALSE
, 0, 0, 0x0000 },
10363 { FALSE
, 1, 0, 0x001f },
10364 { FALSE
, 2, 0, 0x8b1f },
10365 { FALSE
, 3, 0, 0x8b27 },
10366 { FALSE
, 7, 0, 0x790e },
10367 { FALSE
, 11, 0, 0xcb6d },
10368 { FALSE
, 20, 0, 0x20dd },
10369 { FALSE
, 27, 0, 0xbabd },
10370 { FALSE
, 32, 0, 0xf3e8 },
10371 { FALSE
, 37, 0, 0x197d },
10372 { FALSE
, 43, 0, 0x9eae },
10373 { FALSE
, 64, 0, 0x4678 },
10374 { FALSE
, 127, 0, 0x9399 },
10375 { FALSE
, 256, 0, 0xd147 },
10376 { FALSE
, 325, 0, 0x0358 },
10378 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
10381 dlil_verify_sum16(void)
10387 /* Make sure test data plus extra room for alignment fits in cluster */
10388 _CASSERT((sizeof(sumdata
) + (sizeof(uint64_t) * 2)) <= MCLBYTES
);
10390 kprintf("DLIL: running SUM16 self-tests ... ");
10392 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
10393 m_align(m
, sizeof(sumdata
) + (sizeof(uint64_t) * 2));
10395 buf
= mtod(m
, uint8_t *); /* base address */
10397 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
10398 uint16_t len
= sumtbl
[n
].len
;
10401 /* Verify for all possible alignments */
10402 for (i
= 0; i
< (int)sizeof(uint64_t); i
++) {
10403 uint16_t sum
, sumr
;
10406 /* Copy over test data to mbuf */
10407 VERIFY(len
<= sizeof(sumdata
));
10409 bcopy(sumdata
, c
, len
);
10411 /* Zero-offset test (align by data pointer) */
10412 m
->m_data
= (caddr_t
)c
;
10414 sum
= m_sum16(m
, 0, len
);
10416 if (!sumtbl
[n
].init
) {
10417 sumr
= (uint16_t)in_cksum_mbuf_ref(m
, len
, 0, 0);
10418 sumtbl
[n
].sumr
= sumr
;
10419 sumtbl
[n
].init
= TRUE
;
10421 sumr
= sumtbl
[n
].sumr
;
10424 /* Something is horribly broken; stop now */
10425 if (sumr
!= sumtbl
[n
].sumrp
) {
10426 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
10427 "for len=%d align=%d sum=0x%04x "
10428 "[expected=0x%04x]\n", __func__
,
10429 len
, i
, sum
, sumr
);
10431 } else if (sum
!= sumr
) {
10432 panic_plain("\n%s: broken m_sum16() for len=%d "
10433 "align=%d sum=0x%04x [expected=0x%04x]\n",
10434 __func__
, len
, i
, sum
, sumr
);
10438 /* Alignment test by offset (fixed data pointer) */
10439 m
->m_data
= (caddr_t
)buf
;
10440 m
->m_len
= i
+ len
;
10441 sum
= m_sum16(m
, i
, len
);
10443 /* Something is horribly broken; stop now */
10445 panic_plain("\n%s: broken m_sum16() for len=%d "
10446 "offset=%d sum=0x%04x [expected=0x%04x]\n",
10447 __func__
, len
, i
, sum
, sumr
);
10451 /* Simple sum16 contiguous buffer test by aligment */
10452 sum
= b_sum16(c
, len
);
10454 /* Something is horribly broken; stop now */
10456 panic_plain("\n%s: broken b_sum16() for len=%d "
10457 "align=%d sum=0x%04x [expected=0x%04x]\n",
10458 __func__
, len
, i
, sum
, sumr
);
10466 kprintf("PASSED\n");
10468 #endif /* DEBUG || DEVELOPMENT */
10470 #define CASE_STRINGIFY(x) case x: return #x
10472 __private_extern__
const char *
10473 dlil_kev_dl_code_str(u_int32_t event_code
)
10475 switch (event_code
) {
10476 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
10477 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
10478 CASE_STRINGIFY(KEV_DL_SIFMTU
);
10479 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
10480 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
10481 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
10482 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
10483 CASE_STRINGIFY(KEV_DL_DELMULTI
);
10484 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
10485 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
10486 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
10487 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
10488 CASE_STRINGIFY(KEV_DL_LINK_ON
);
10489 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
10490 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
10491 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
10492 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
10493 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
10494 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
10495 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
10496 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
10497 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
10498 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
10499 CASE_STRINGIFY(KEV_DL_ISSUES
);
10500 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);
10508 dlil_dt_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
10510 #pragma unused(arg1)
10511 struct ifnet
*ifp
= arg0
;
10513 if (ifnet_is_attached(ifp
, 1)) {
10514 nstat_ifnet_threshold_reached(ifp
->if_index
);
10515 ifnet_decr_iorefcnt(ifp
);
10520 ifnet_notify_data_threshold(struct ifnet
*ifp
)
10522 uint64_t bytes
= (ifp
->if_ibytes
+ ifp
->if_obytes
);
10523 uint64_t oldbytes
= ifp
->if_dt_bytes
;
10525 ASSERT(ifp
->if_dt_tcall
!= NULL
);
10528 * If we went over the threshold, notify NetworkStatistics.
10529 * We rate-limit it based on the threshold interval value.
10531 if (threshold_notify
&& (bytes
- oldbytes
) > ifp
->if_data_threshold
&&
10532 OSCompareAndSwap64(oldbytes
, bytes
, &ifp
->if_dt_bytes
) &&
10533 !thread_call_isactive(ifp
->if_dt_tcall
)) {
10534 uint64_t tival
= (threshold_interval
* NSEC_PER_SEC
);
10535 uint64_t now
= mach_absolute_time(), deadline
= now
;
10539 nanoseconds_to_absolutetime(tival
, &ival
);
10540 clock_deadline_for_periodic_event(ival
, now
, &deadline
);
10541 (void) thread_call_enter_delayed(ifp
->if_dt_tcall
,
10544 (void) thread_call_enter(ifp
->if_dt_tcall
);
10549 #if (DEVELOPMENT || DEBUG)
10551 * The sysctl variable name contains the input parameters of
10552 * ifnet_get_keepalive_offload_frames()
10553 * ifp (interface index): name[0]
10554 * frames_array_count: name[1]
10555 * frame_data_offset: name[2]
10556 * The return length gives used_frames_count
10559 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
10561 #pragma unused(oidp)
10562 int *name
= (int *)arg1
;
10563 u_int namelen
= arg2
;
10565 ifnet_t ifp
= NULL
;
10566 u_int32_t frames_array_count
;
10567 size_t frame_data_offset
;
10568 u_int32_t used_frames_count
;
10569 struct ifnet_keepalive_offload_frame
*frames_array
= NULL
;
10574 * Only root can get look at other people TCP frames
10576 error
= proc_suser(current_proc());
10581 * Validate the input parameters
10583 if (req
->newptr
!= USER_ADDR_NULL
) {
10587 if (namelen
!= 3) {
10591 if (req
->oldptr
== USER_ADDR_NULL
) {
10595 if (req
->oldlen
== 0) {
10600 frames_array_count
= name
[1];
10601 frame_data_offset
= name
[2];
10603 /* Make sure the passed buffer is large enough */
10604 if (frames_array_count
* sizeof(struct ifnet_keepalive_offload_frame
) >
10610 ifnet_head_lock_shared();
10611 if (!IF_INDEX_IN_RANGE(idx
)) {
10616 ifp
= ifindex2ifnet
[idx
];
10619 frames_array
= _MALLOC(frames_array_count
*
10620 sizeof(struct ifnet_keepalive_offload_frame
), M_TEMP
, M_WAITOK
);
10621 if (frames_array
== NULL
) {
10626 error
= ifnet_get_keepalive_offload_frames(ifp
, frames_array
,
10627 frames_array_count
, frame_data_offset
, &used_frames_count
);
10629 DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n",
10634 for (i
= 0; i
< used_frames_count
; i
++) {
10635 error
= SYSCTL_OUT(req
, frames_array
+ i
,
10636 sizeof(struct ifnet_keepalive_offload_frame
));
10642 if (frames_array
!= NULL
) {
10643 _FREE(frames_array
, M_TEMP
);
10647 #endif /* DEVELOPMENT || DEBUG */
10650 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow
*ifs
,
10653 tcp_update_stats_per_flow(ifs
, ifp
);
10656 static inline u_int32_t
10657 _set_flags(u_int32_t
*flags_p
, u_int32_t set_flags
)
10659 return (u_int32_t
)OSBitOrAtomic(set_flags
, flags_p
);
10663 _clear_flags(u_int32_t
*flags_p
, u_int32_t clear_flags
)
10665 OSBitAndAtomic(~clear_flags
, flags_p
);
10668 __private_extern__ u_int32_t
10669 if_set_eflags(ifnet_t interface
, u_int32_t set_flags
)
10671 return _set_flags(&interface
->if_eflags
, set_flags
);
10674 __private_extern__
void
10675 if_clear_eflags(ifnet_t interface
, u_int32_t clear_flags
)
10677 _clear_flags(&interface
->if_eflags
, clear_flags
);
10680 __private_extern__ u_int32_t
10681 if_set_xflags(ifnet_t interface
, u_int32_t set_flags
)
10683 return _set_flags(&interface
->if_xflags
, set_flags
);
10686 __private_extern__
void
10687 if_clear_xflags(ifnet_t interface
, u_int32_t clear_flags
)
10689 _clear_flags(&interface
->if_xflags
, clear_flags
);