2 * Copyright (c) 1999-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_ipsec.h>
71 #include <net/if_llreach.h>
72 #include <net/if_utun.h>
73 #include <net/kpi_interfacefilter.h>
74 #include <net/classq/classq.h>
75 #include <net/classq/classq_sfb.h>
76 #include <net/flowhash.h>
77 #include <net/ntstat.h>
78 #include <net/if_llatbl.h>
79 #include <net/net_api_stats.h>
80 #include <net/if_ports_used.h>
81 #include <netinet/in.h>
83 #include <netinet/in_var.h>
84 #include <netinet/igmp_var.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/tcp.h>
87 #include <netinet/tcp_var.h>
88 #include <netinet/udp.h>
89 #include <netinet/udp_var.h>
90 #include <netinet/if_ether.h>
91 #include <netinet/in_pcb.h>
92 #include <netinet/in_tclass.h>
93 #include <netinet/ip.h>
94 #include <netinet/ip_icmp.h>
95 #include <netinet/icmp_var.h>
99 #include <net/nat464_utils.h>
100 #include <netinet6/in6_var.h>
101 #include <netinet6/nd6.h>
102 #include <netinet6/mld6_var.h>
103 #include <netinet6/scope6_var.h>
104 #include <netinet/ip6.h>
105 #include <netinet/icmp6.h>
107 #include <net/pf_pbuf.h>
108 #include <libkern/OSAtomic.h>
109 #include <libkern/tree.h>
111 #include <dev/random/randomdev.h>
112 #include <machine/machine_routines.h>
114 #include <mach/thread_act.h>
115 #include <mach/sdt.h>
118 #include <sys/kauth.h>
119 #include <security/mac_framework.h>
120 #include <net/ethernet.h>
121 #include <net/firewire.h>
125 #include <net/pfvar.h>
127 #include <net/pktsched/pktsched.h>
128 #include <net/pktsched/pktsched_netem.h>
131 #include <net/necp.h>
137 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
138 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
139 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
140 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
141 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
143 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
144 #define MAX_LINKADDR 4 /* LONGWORDS */
145 #define M_NKE M_IFADDR
148 #define DLIL_PRINTF printf
150 #define DLIL_PRINTF kprintf
153 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
154 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
156 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
157 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
165 * List of if_proto structures in if_proto_hash[] is protected by
166 * the ifnet lock. The rest of the fields are initialized at protocol
167 * attach time and never change, thus no lock required as long as
168 * a reference to it is valid, via if_proto_ref().
171 SLIST_ENTRY(if_proto
) next_hash
;
175 protocol_family_t protocol_family
;
179 proto_media_input input
;
180 proto_media_preout pre_output
;
181 proto_media_event event
;
182 proto_media_ioctl ioctl
;
183 proto_media_detached detached
;
184 proto_media_resolve_multi resolve_multi
;
185 proto_media_send_arp send_arp
;
188 proto_media_input_v2 input
;
189 proto_media_preout pre_output
;
190 proto_media_event event
;
191 proto_media_ioctl ioctl
;
192 proto_media_detached detached
;
193 proto_media_resolve_multi resolve_multi
;
194 proto_media_send_arp send_arp
;
199 SLIST_HEAD(proto_hash_entry
, if_proto
);
201 #define DLIL_SDLDATALEN \
202 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
205 struct ifnet dl_if
; /* public ifnet */
207 * DLIL private fields, protected by dl_if_lock
209 decl_lck_mtx_data(, dl_if_lock
);
210 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
211 u_int32_t dl_if_flags
; /* flags (below) */
212 u_int32_t dl_if_refcnt
; /* refcnt */
213 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
214 void *dl_if_uniqueid
; /* unique interface id */
215 size_t dl_if_uniqueid_len
; /* length of the unique id */
216 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
217 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
219 struct ifaddr ifa
; /* lladdr ifa */
220 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
221 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
223 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
224 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
225 ctrace_t dl_if_attach
; /* attach PC stacktrace */
226 ctrace_t dl_if_detach
; /* detach PC stacktrace */
229 /* Values for dl_if_flags (private to DLIL) */
230 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
231 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
232 #define DLIF_DEBUG 0x4 /* has debugging info */
234 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
237 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
239 struct dlil_ifnet_dbg
{
240 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
241 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
242 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
244 * Circular lists of ifnet_{reference,release} callers.
246 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
247 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
250 #define DLIL_TO_IFP(s) (&s->dl_if)
251 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
253 struct ifnet_filter
{
254 TAILQ_ENTRY(ifnet_filter
) filt_next
;
256 u_int32_t filt_flags
;
258 const char *filt_name
;
260 protocol_family_t filt_protocol
;
261 iff_input_func filt_input
;
262 iff_output_func filt_output
;
263 iff_event_func filt_event
;
264 iff_ioctl_func filt_ioctl
;
265 iff_detached_func filt_detached
;
268 struct proto_input_entry
;
270 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
271 static lck_grp_t
*dlil_lock_group
;
272 lck_grp_t
*ifnet_lock_group
;
273 static lck_grp_t
*ifnet_head_lock_group
;
274 static lck_grp_t
*ifnet_snd_lock_group
;
275 static lck_grp_t
*ifnet_rcv_lock_group
;
276 lck_attr_t
*ifnet_lock_attr
;
277 decl_lck_rw_data(static, ifnet_head_lock
);
278 decl_lck_mtx_data(static, dlil_ifnet_lock
);
279 u_int32_t dlil_filter_disable_tso_count
= 0;
282 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
284 static unsigned int ifnet_debug
; /* debugging (disabled) */
286 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
287 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
288 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
290 #define DLIF_ZONE_MAX IFNETS_MAX /* maximum elements in zone */
291 #define DLIF_ZONE_NAME "ifnet" /* zone name */
293 static unsigned int dlif_filt_size
; /* size of ifnet_filter */
294 static struct zone
*dlif_filt_zone
; /* zone for ifnet_filter */
296 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
297 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
299 static unsigned int dlif_phash_size
; /* size of ifnet proto hash table */
300 static struct zone
*dlif_phash_zone
; /* zone for ifnet proto hash table */
302 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
303 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
305 static unsigned int dlif_proto_size
; /* size of if_proto */
306 static struct zone
*dlif_proto_zone
; /* zone for if_proto */
308 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
309 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
311 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
312 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
313 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
315 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
316 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
318 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
319 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
320 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
322 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
323 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
325 static u_int32_t net_rtref
;
327 static struct dlil_main_threading_info dlil_main_input_thread_info
;
328 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
329 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
331 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
, bool update_generation
);
332 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
333 static void dlil_if_trace(struct dlil_ifnet
*, int);
334 static void if_proto_ref(struct if_proto
*);
335 static void if_proto_free(struct if_proto
*);
336 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
337 static u_int32_t
dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
338 u_int32_t list_count
);
339 static void if_flt_monitor_busy(struct ifnet
*);
340 static void if_flt_monitor_unbusy(struct ifnet
*);
341 static void if_flt_monitor_enter(struct ifnet
*);
342 static void if_flt_monitor_leave(struct ifnet
*);
343 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
344 char **, protocol_family_t
);
345 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
347 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
348 const struct sockaddr_dl
*);
349 static int ifnet_lookup(struct ifnet
*);
350 static void if_purgeaddrs(struct ifnet
*);
352 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
353 struct mbuf
*, char *);
354 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
356 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
357 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
358 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
359 const struct kev_msg
*);
360 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
361 unsigned long, void *);
362 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
363 struct sockaddr_dl
*, size_t);
364 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
365 const struct sockaddr_dl
*, const struct sockaddr
*,
366 const struct sockaddr_dl
*, const struct sockaddr
*);
368 static errno_t
ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
369 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
370 boolean_t poll
, struct thread
*tp
);
371 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
372 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
373 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
374 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
375 protocol_family_t
*);
376 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
377 const struct ifnet_demux_desc
*, u_int32_t
);
378 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
379 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
381 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
382 const struct sockaddr
*, const char *, const char *,
383 u_int32_t
*, u_int32_t
*);
385 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
386 const struct sockaddr
*, const char *, const char *);
387 #endif /* CONFIG_EMBEDDED */
388 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
389 const struct sockaddr
*, const char *, const char *,
390 u_int32_t
*, u_int32_t
*);
391 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
392 static void ifp_if_free(struct ifnet
*);
393 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
394 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
395 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
397 static void dlil_main_input_thread_func(void *, wait_result_t
);
398 static void dlil_main_input_thread_cont(void *, wait_result_t
);
400 static void dlil_input_thread_func(void *, wait_result_t
);
401 static void dlil_input_thread_cont(void *, wait_result_t
);
403 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
404 static void dlil_rxpoll_input_thread_cont(void *, wait_result_t
);
406 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*);
407 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
408 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
409 struct dlil_threading_info
*, struct ifnet
*, boolean_t
);
410 static boolean_t
dlil_input_stats_sync(struct ifnet
*,
411 struct dlil_threading_info
*);
412 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
413 u_int32_t
, ifnet_model_t
, boolean_t
);
414 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
415 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
416 static int dlil_is_clat_needed(protocol_family_t
, mbuf_t
);
417 static errno_t
dlil_clat46(ifnet_t
, protocol_family_t
*, mbuf_t
*);
418 static errno_t
dlil_clat64(ifnet_t
, protocol_family_t
*, mbuf_t
*);
419 #if DEBUG || DEVELOPMENT
420 static void dlil_verify_sum16(void);
421 #endif /* DEBUG || DEVELOPMENT */
422 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
424 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
427 static void dlil_incr_pending_thread_count(void);
428 static void dlil_decr_pending_thread_count(void);
430 static void ifnet_detacher_thread_func(void *, wait_result_t
);
431 static int ifnet_detacher_thread_cont(int);
432 static void ifnet_detach_final(struct ifnet
*);
433 static void ifnet_detaching_enqueue(struct ifnet
*);
434 static struct ifnet
*ifnet_detaching_dequeue(void);
436 static void ifnet_start_thread_func(void *, wait_result_t
);
437 static void ifnet_start_thread_cont(void *, wait_result_t
);
439 static void ifnet_poll_thread_func(void *, wait_result_t
);
440 static void ifnet_poll_thread_cont(void *, wait_result_t
);
442 static errno_t
ifnet_enqueue_common(struct ifnet
*, classq_pkt_t
*,
443 boolean_t
, boolean_t
*);
445 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
446 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
448 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
449 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
452 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
453 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
454 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
455 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
456 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
457 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
458 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
459 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
460 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
461 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
462 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
464 struct chain_len_stats tx_chain_len_stats
;
465 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
;
467 #if TEST_INPUT_THREAD_TERMINATION
468 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
;
469 #endif /* TEST_INPUT_THREAD_TERMINATION */
471 /* The following are protected by dlil_ifnet_lock */
472 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
473 static u_int32_t ifnet_detaching_cnt
;
474 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
476 decl_lck_mtx_data(static, ifnet_fc_lock
);
478 static uint32_t ifnet_flowhash_seed
;
480 struct ifnet_flowhash_key
{
481 char ifk_name
[IFNAMSIZ
];
485 uint32_t ifk_capabilities
;
486 uint32_t ifk_capenable
;
487 uint32_t ifk_output_sched_model
;
492 /* Flow control entry per interface */
493 struct ifnet_fc_entry
{
494 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
495 u_int32_t ifce_flowhash
;
496 struct ifnet
*ifce_ifp
;
499 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
500 static int ifce_cmp(const struct ifnet_fc_entry
*,
501 const struct ifnet_fc_entry
*);
502 static int ifnet_fc_add(struct ifnet
*);
503 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
504 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
506 /* protected by ifnet_fc_lock */
507 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
508 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
509 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
511 static unsigned int ifnet_fc_zone_size
; /* sizeof ifnet_fc_entry */
512 static struct zone
*ifnet_fc_zone
; /* ifnet_fc_entry zone */
514 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
515 #define IFNET_FC_ZONE_MAX 32
517 extern void bpfdetach(struct ifnet
*);
518 extern void proto_input_run(void);
520 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
522 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
525 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
528 #ifdef CONFIG_EMBEDDED
529 int dlil_lladdr_ckreq
= 1;
531 int dlil_lladdr_ckreq
= 0;
536 int dlil_verbose
= 1;
538 int dlil_verbose
= 0;
540 #if IFNET_INPUT_SANITY_CHK
541 /* sanity checking of input packet lists received */
542 static u_int32_t dlil_input_sanity_check
= 0;
543 #endif /* IFNET_INPUT_SANITY_CHK */
544 /* rate limit debug messages */
545 struct timespec dlil_dbgrate
= { .tv_sec
= 1, .tv_nsec
= 0 };
547 SYSCTL_DECL(_net_link_generic_system
);
549 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
550 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
552 #define IF_SNDQ_MINLEN 32
553 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
554 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
555 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
556 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
558 #define IF_RCVQ_MINLEN 32
559 #define IF_RCVQ_MAXLEN 256
560 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
561 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
562 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
563 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
565 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
566 u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
567 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
568 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
569 "ilog2 of EWMA decay rate of avg inbound packets");
571 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
572 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
573 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
574 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
575 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
576 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
577 "Q", "input poll mode freeze time");
579 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
580 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
581 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
582 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
583 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
584 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
585 "Q", "input poll sampling time");
587 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
588 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
589 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
590 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
591 "Q", "input poll interval (time)");
593 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
594 u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
595 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
596 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
597 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
599 #define IF_RXPOLL_WLOWAT 10
600 static u_int32_t if_sysctl_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
601 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
602 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sysctl_rxpoll_wlowat
,
603 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
604 "I", "input poll wakeup low watermark");
606 #define IF_RXPOLL_WHIWAT 100
607 static u_int32_t if_sysctl_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
608 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
609 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sysctl_rxpoll_whiwat
,
610 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
611 "I", "input poll wakeup high watermark");
613 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
614 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
615 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
616 "max packets per poll call");
618 u_int32_t if_rxpoll
= 1;
619 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
620 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
621 sysctl_rxpoll
, "I", "enable opportunistic input polling");
623 #if TEST_INPUT_THREAD_TERMINATION
624 static u_int32_t if_input_thread_termination_spin
= 0;
625 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, input_thread_termination_spin
,
626 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
627 &if_input_thread_termination_spin
, 0,
628 sysctl_input_thread_termination_spin
,
629 "I", "input thread termination spin limit");
630 #endif /* TEST_INPUT_THREAD_TERMINATION */
632 static u_int32_t cur_dlil_input_threads
= 0;
633 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
634 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
635 "Current number of DLIL input threads");
637 #if IFNET_INPUT_SANITY_CHK
638 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
639 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
640 "Turn on sanity checking in DLIL input");
641 #endif /* IFNET_INPUT_SANITY_CHK */
643 static u_int32_t if_flowadv
= 1;
644 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
645 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
646 "enable flow-advisory mechanism");
648 static u_int32_t if_delaybased_queue
= 1;
649 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
650 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
651 "enable delay based dynamic queue sizing");
653 static uint64_t hwcksum_in_invalidated
= 0;
654 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
655 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
656 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
658 uint32_t hwcksum_dbg
= 0;
659 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
660 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
661 "enable hardware cksum debugging");
663 u_int32_t ifnet_start_delayed
= 0;
664 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delayed
,
665 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_start_delayed
, 0,
666 "number of times start was delayed");
668 u_int32_t ifnet_delay_start_disabled
= 0;
669 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delay_disabled
,
670 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_delay_start_disabled
, 0,
671 "number of times start was delayed");
673 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
674 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
675 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
676 #define HWCKSUM_DBG_MASK \
677 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
678 HWCKSUM_DBG_FINALIZE_FORCED)
680 static uint32_t hwcksum_dbg_mode
= 0;
681 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
682 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
683 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
685 static uint64_t hwcksum_dbg_partial_forced
= 0;
686 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
687 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
688 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
690 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
691 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
692 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
693 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
695 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
696 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
697 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
698 &hwcksum_dbg_partial_rxoff_forced
, 0,
699 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
700 "forced partial cksum rx offset");
702 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
703 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
704 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
705 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
706 "adjusted partial cksum rx offset");
708 static uint64_t hwcksum_dbg_verified
= 0;
709 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
710 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
711 &hwcksum_dbg_verified
, "packets verified for having good checksum");
713 static uint64_t hwcksum_dbg_bad_cksum
= 0;
714 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
715 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
716 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
718 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
719 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
720 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
721 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
723 static uint64_t hwcksum_dbg_adjusted
= 0;
724 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
725 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
726 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
728 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
729 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
730 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
731 &hwcksum_dbg_finalized_hdr
, "finalized headers");
733 static uint64_t hwcksum_dbg_finalized_data
= 0;
734 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
735 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
736 &hwcksum_dbg_finalized_data
, "finalized payloads");
738 uint32_t hwcksum_tx
= 1;
739 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
740 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
741 "enable transmit hardware checksum offload");
743 uint32_t hwcksum_rx
= 1;
744 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
745 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
746 "enable receive hardware checksum offload");
748 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, tx_chain_len_stats
,
749 CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 9,
750 sysctl_tx_chain_len_stats
, "S", "");
752 uint32_t tx_chain_len_count
= 0;
753 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, tx_chain_len_count
,
754 CTLFLAG_RW
| CTLFLAG_LOCKED
, &tx_chain_len_count
, 0, "");
756 static uint32_t threshold_notify
= 1; /* enable/disable */
757 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_notify
,
758 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_notify
, 0, "");
760 static uint32_t threshold_interval
= 2; /* in seconds */
761 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_interval
,
762 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_interval
, 0, "");
764 #if (DEVELOPMENT || DEBUG)
765 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
;
766 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_kao_frames
,
767 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_kao_frames
, "");
768 #endif /* DEVELOPMENT || DEBUG */
770 struct net_api_stats net_api_stats
;
771 SYSCTL_STRUCT(_net
, OID_AUTO
, api_stats
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
772 &net_api_stats
, net_api_stats
, "");
775 unsigned int net_rxpoll
= 1;
776 unsigned int net_affinity
= 1;
777 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
779 extern u_int32_t inject_buckets
;
781 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
782 static lck_attr_t
*dlil_lck_attributes
= NULL
;
784 /* DLIL data threshold thread call */
785 static void dlil_dt_tcall_fn(thread_call_param_t
, thread_call_param_t
);
788 ifnet_filter_update_tso(boolean_t filter_enable
)
791 * update filter count and route_generation ID to let TCP
792 * know it should reevalute doing TSO or not
794 OSAddAtomic(filter_enable
? 1 : -1, &dlil_filter_disable_tso_count
);
799 #define DLIL_INPUT_CHECK(m, ifp) { \
800 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
801 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
802 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
803 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
808 #define DLIL_EWMA(old, new, decay) do { \
810 if ((_avg = (old)) > 0) \
811 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
817 #define MBPS (1ULL * 1000 * 1000)
818 #define GBPS (MBPS * 1000)
820 struct rxpoll_time_tbl
{
821 u_int64_t speed
; /* downlink speed */
822 u_int32_t plowat
; /* packets low watermark */
823 u_int32_t phiwat
; /* packets high watermark */
824 u_int32_t blowat
; /* bytes low watermark */
825 u_int32_t bhiwat
; /* bytes high watermark */
828 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
829 { .speed
= 10 * MBPS
, .plowat
= 2, .phiwat
= 8, .blowat
= (1 * 1024), .bhiwat
= (6 * 1024) },
830 { .speed
= 100 * MBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
831 { .speed
= 1 * GBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
832 { .speed
= 10 * GBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
833 { .speed
= 100 * GBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
834 { .speed
= 0, .plowat
= 0, .phiwat
= 0, .blowat
= 0, .bhiwat
= 0 }
837 decl_lck_mtx_data(static, dlil_thread_sync_lock
);
838 static uint32_t dlil_pending_thread_cnt
= 0;
840 dlil_incr_pending_thread_count(void)
842 LCK_MTX_ASSERT(&dlil_thread_sync_lock
, LCK_MTX_ASSERT_NOTOWNED
);
843 lck_mtx_lock(&dlil_thread_sync_lock
);
844 dlil_pending_thread_cnt
++;
845 lck_mtx_unlock(&dlil_thread_sync_lock
);
849 dlil_decr_pending_thread_count(void)
851 LCK_MTX_ASSERT(&dlil_thread_sync_lock
, LCK_MTX_ASSERT_NOTOWNED
);
852 lck_mtx_lock(&dlil_thread_sync_lock
);
853 VERIFY(dlil_pending_thread_cnt
> 0);
854 dlil_pending_thread_cnt
--;
855 if (dlil_pending_thread_cnt
== 0) {
856 wakeup(&dlil_pending_thread_cnt
);
858 lck_mtx_unlock(&dlil_thread_sync_lock
);
862 proto_hash_value(u_int32_t protocol_family
)
865 * dlil_proto_unplumb_all() depends on the mapping between
866 * the hash bucket index and the protocol family defined
867 * here; future changes must be applied there as well.
869 switch (protocol_family
) {
885 * Caller must already be holding ifnet lock.
887 static struct if_proto
*
888 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
890 struct if_proto
*proto
= NULL
;
891 u_int32_t i
= proto_hash_value(protocol_family
);
893 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
895 if (ifp
->if_proto_hash
!= NULL
) {
896 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
899 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
) {
900 proto
= SLIST_NEXT(proto
, next_hash
);
911 if_proto_ref(struct if_proto
*proto
)
913 atomic_add_32(&proto
->refcount
, 1);
916 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
919 if_proto_free(struct if_proto
*proto
)
922 struct ifnet
*ifp
= proto
->ifp
;
923 u_int32_t proto_family
= proto
->protocol_family
;
924 struct kev_dl_proto_data ev_pr_data
;
926 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
931 /* No more reference on this, protocol must have been detached */
932 VERIFY(proto
->detached
);
934 if (proto
->proto_kpi
== kProtoKPI_v1
) {
935 if (proto
->kpi
.v1
.detached
) {
936 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
939 if (proto
->proto_kpi
== kProtoKPI_v2
) {
940 if (proto
->kpi
.v2
.detached
) {
941 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
946 * Cleanup routes that may still be in the routing table for that
947 * interface/protocol pair.
949 if_rtproto_del(ifp
, proto_family
);
952 * The reserved field carries the number of protocol still attached
953 * (subject to change)
955 ifnet_lock_shared(ifp
);
956 ev_pr_data
.proto_family
= proto_family
;
957 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
958 ifnet_lock_done(ifp
);
960 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
961 (struct net_event_data
*)&ev_pr_data
,
962 sizeof(struct kev_dl_proto_data
));
964 if (ev_pr_data
.proto_remaining_count
== 0) {
966 * The protocol count has gone to zero, mark the interface down.
967 * This used to be done by configd.KernelEventMonitor, but that
968 * is inherently prone to races (rdar://problem/30810208).
970 (void) ifnet_set_flags(ifp
, 0, IFF_UP
);
971 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
972 dlil_post_sifflags_msg(ifp
);
975 zfree(dlif_proto_zone
, proto
);
978 __private_extern__
void
979 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
984 unsigned int type
= 0;
988 case IFNET_LCK_ASSERT_EXCLUSIVE
:
989 type
= LCK_RW_ASSERT_EXCLUSIVE
;
992 case IFNET_LCK_ASSERT_SHARED
:
993 type
= LCK_RW_ASSERT_SHARED
;
996 case IFNET_LCK_ASSERT_OWNED
:
997 type
= LCK_RW_ASSERT_HELD
;
1000 case IFNET_LCK_ASSERT_NOTOWNED
:
1001 /* nothing to do here for RW lock; bypass assert */
1006 panic("bad ifnet assert type: %d", what
);
1010 LCK_RW_ASSERT(&ifp
->if_lock
, type
);
1014 __private_extern__
void
1015 ifnet_lock_shared(struct ifnet
*ifp
)
1017 lck_rw_lock_shared(&ifp
->if_lock
);
1020 __private_extern__
void
1021 ifnet_lock_exclusive(struct ifnet
*ifp
)
1023 lck_rw_lock_exclusive(&ifp
->if_lock
);
1026 __private_extern__
void
1027 ifnet_lock_done(struct ifnet
*ifp
)
1029 lck_rw_done(&ifp
->if_lock
);
1033 __private_extern__
void
1034 if_inetdata_lock_shared(struct ifnet
*ifp
)
1036 lck_rw_lock_shared(&ifp
->if_inetdata_lock
);
1039 __private_extern__
void
1040 if_inetdata_lock_exclusive(struct ifnet
*ifp
)
1042 lck_rw_lock_exclusive(&ifp
->if_inetdata_lock
);
1045 __private_extern__
void
1046 if_inetdata_lock_done(struct ifnet
*ifp
)
1048 lck_rw_done(&ifp
->if_inetdata_lock
);
1053 __private_extern__
void
1054 if_inet6data_lock_shared(struct ifnet
*ifp
)
1056 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
1059 __private_extern__
void
1060 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
1062 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
1065 __private_extern__
void
1066 if_inet6data_lock_done(struct ifnet
*ifp
)
1068 lck_rw_done(&ifp
->if_inet6data_lock
);
1072 __private_extern__
void
1073 ifnet_head_lock_shared(void)
1075 lck_rw_lock_shared(&ifnet_head_lock
);
1078 __private_extern__
void
1079 ifnet_head_lock_exclusive(void)
1081 lck_rw_lock_exclusive(&ifnet_head_lock
);
1084 __private_extern__
void
1085 ifnet_head_done(void)
1087 lck_rw_done(&ifnet_head_lock
);
1090 __private_extern__
void
1091 ifnet_head_assert_exclusive(void)
1093 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
1097 * dlil_ifp_protolist
1098 * - get the list of protocols attached to the interface, or just the number
1099 * of attached protocols
1100 * - if the number returned is greater than 'list_count', truncation occurred
1103 * - caller must already be holding ifnet lock.
1106 dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
1107 u_int32_t list_count
)
1109 u_int32_t count
= 0;
1112 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
1114 if (ifp
->if_proto_hash
== NULL
) {
1118 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1119 struct if_proto
*proto
;
1120 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1121 if (list
!= NULL
&& count
< list_count
) {
1122 list
[count
] = proto
->protocol_family
;
1131 __private_extern__ u_int32_t
1132 if_get_protolist(struct ifnet
* ifp
, u_int32_t
*protolist
, u_int32_t count
)
1134 ifnet_lock_shared(ifp
);
1135 count
= dlil_ifp_protolist(ifp
, protolist
, count
);
1136 ifnet_lock_done(ifp
);
1140 __private_extern__
void
1141 if_free_protolist(u_int32_t
*list
)
1143 _FREE(list
, M_TEMP
);
1146 __private_extern__
int
1147 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
1148 u_int32_t event_code
, struct net_event_data
*event_data
,
1149 u_int32_t event_data_len
)
1151 struct net_event_data ev_data
;
1152 struct kev_msg ev_msg
;
1154 bzero(&ev_msg
, sizeof(ev_msg
));
1155 bzero(&ev_data
, sizeof(ev_data
));
1157 * a net event always starts with a net_event_data structure
1158 * but the caller can generate a simple net event or
1159 * provide a longer event structure to post
1161 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1162 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1163 ev_msg
.kev_subclass
= event_subclass
;
1164 ev_msg
.event_code
= event_code
;
1166 if (event_data
== NULL
) {
1167 event_data
= &ev_data
;
1168 event_data_len
= sizeof(struct net_event_data
);
1171 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
1172 event_data
->if_family
= ifp
->if_family
;
1173 event_data
->if_unit
= (u_int32_t
)ifp
->if_unit
;
1175 ev_msg
.dv
[0].data_length
= event_data_len
;
1176 ev_msg
.dv
[0].data_ptr
= event_data
;
1177 ev_msg
.dv
[1].data_length
= 0;
1179 bool update_generation
= true;
1180 if (event_subclass
== KEV_DL_SUBCLASS
) {
1181 /* Don't update interface generation for frequent link quality and state changes */
1182 switch (event_code
) {
1183 case KEV_DL_LINK_QUALITY_METRIC_CHANGED
:
1184 case KEV_DL_RRC_STATE_CHANGED
:
1185 case KEV_DL_NODE_PRESENCE
:
1186 case KEV_DL_NODE_ABSENCE
:
1187 case KEV_DL_MASTER_ELECTED
:
1188 update_generation
= false;
1195 return dlil_event_internal(ifp
, &ev_msg
, update_generation
);
1198 __private_extern__
int
1199 dlil_alloc_local_stats(struct ifnet
*ifp
)
1202 void *buf
, *base
, **pbuf
;
1208 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1209 /* allocate tcpstat_local structure */
1210 buf
= zalloc(dlif_tcpstat_zone
);
1215 bzero(buf
, dlif_tcpstat_bufsize
);
1217 /* Get the 64-bit aligned base address for this object */
1218 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
1220 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1221 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1224 * Wind back a pointer size from the aligned base and
1225 * save the original address so we can free it later.
1227 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
1229 ifp
->if_tcp_stat
= base
;
1231 /* allocate udpstat_local structure */
1232 buf
= zalloc(dlif_udpstat_zone
);
1237 bzero(buf
, dlif_udpstat_bufsize
);
1239 /* Get the 64-bit aligned base address for this object */
1240 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
1242 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1243 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1246 * Wind back a pointer size from the aligned base and
1247 * save the original address so we can free it later.
1249 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
1251 ifp
->if_udp_stat
= base
;
1253 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof(u_int64_t
)) &&
1254 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof(u_int64_t
)));
1259 if (ifp
->if_ipv4_stat
== NULL
) {
1260 MALLOC(ifp
->if_ipv4_stat
, struct if_tcp_ecn_stat
*,
1261 sizeof(struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
| M_ZERO
);
1262 if (ifp
->if_ipv4_stat
== NULL
) {
1268 if (ifp
->if_ipv6_stat
== NULL
) {
1269 MALLOC(ifp
->if_ipv6_stat
, struct if_tcp_ecn_stat
*,
1270 sizeof(struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
| M_ZERO
);
1271 if (ifp
->if_ipv6_stat
== NULL
) {
1277 if (ifp
!= NULL
&& ret
!= 0) {
1278 if (ifp
->if_tcp_stat
!= NULL
) {
1280 ((intptr_t)ifp
->if_tcp_stat
- sizeof(void *));
1281 zfree(dlif_tcpstat_zone
, *pbuf
);
1282 ifp
->if_tcp_stat
= NULL
;
1284 if (ifp
->if_udp_stat
!= NULL
) {
1286 ((intptr_t)ifp
->if_udp_stat
- sizeof(void *));
1287 zfree(dlif_udpstat_zone
, *pbuf
);
1288 ifp
->if_udp_stat
= NULL
;
1290 if (ifp
->if_ipv4_stat
!= NULL
) {
1291 FREE(ifp
->if_ipv4_stat
, M_TEMP
);
1292 ifp
->if_ipv4_stat
= NULL
;
1294 if (ifp
->if_ipv6_stat
!= NULL
) {
1295 FREE(ifp
->if_ipv6_stat
, M_TEMP
);
1296 ifp
->if_ipv6_stat
= NULL
;
1304 dlil_reset_rxpoll_params(ifnet_t ifp
)
1306 ASSERT(ifp
!= NULL
);
1307 ifnet_set_poll_cycle(ifp
, NULL
);
1308 ifp
->if_poll_update
= 0;
1309 ifp
->if_poll_flags
= 0;
1310 ifp
->if_poll_req
= 0;
1311 ifp
->if_poll_mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1312 bzero(&ifp
->if_poll_tstats
, sizeof(ifp
->if_poll_tstats
));
1313 bzero(&ifp
->if_poll_pstats
, sizeof(ifp
->if_poll_pstats
));
1314 bzero(&ifp
->if_poll_sstats
, sizeof(ifp
->if_poll_sstats
));
1315 net_timerclear(&ifp
->if_poll_mode_holdtime
);
1316 net_timerclear(&ifp
->if_poll_mode_lasttime
);
1317 net_timerclear(&ifp
->if_poll_sample_holdtime
);
1318 net_timerclear(&ifp
->if_poll_sample_lasttime
);
1319 net_timerclear(&ifp
->if_poll_dbg_lasttime
);
1323 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
)
1325 boolean_t dlil_rxpoll_input
;
1326 thread_continue_t func
;
1330 dlil_rxpoll_input
= (ifp
!= NULL
&& net_rxpoll
&&
1331 (ifp
->if_eflags
& IFEF_RXPOLL
) && (ifp
->if_xflags
& IFXF_LEGACY
));
1333 /* NULL ifp indicates the main input thread, called at dlil_init time */
1335 func
= dlil_main_input_thread_func
;
1336 VERIFY(inp
== dlil_main_input_thread
);
1337 (void) strlcat(inp
->input_name
,
1338 "main_input", DLIL_THREADNAME_LEN
);
1339 } else if (dlil_rxpoll_input
) {
1340 func
= dlil_rxpoll_input_thread_func
;
1341 VERIFY(inp
!= dlil_main_input_thread
);
1342 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1343 "%s_input_poll", if_name(ifp
));
1345 func
= dlil_input_thread_func
;
1346 VERIFY(inp
!= dlil_main_input_thread
);
1347 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1348 "%s_input", if_name(ifp
));
1350 VERIFY(inp
->input_thr
== THREAD_NULL
);
1352 inp
->lck_grp
= lck_grp_alloc_init(inp
->input_name
, dlil_grp_attributes
);
1353 lck_mtx_init(&inp
->input_lck
, inp
->lck_grp
, dlil_lck_attributes
);
1355 inp
->ifp
= ifp
; /* NULL for main input thread */
1357 * For interfaces that support opportunistic polling, set the
1358 * low and high watermarks for outstanding inbound packets/bytes.
1359 * Also define freeze times for transitioning between modes
1360 * and updating the average.
1362 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1363 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1364 if (ifp
->if_xflags
& IFXF_LEGACY
) {
1365 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1368 limit
= (u_int32_t
)-1;
1371 _qinit(&inp
->rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1372 if (inp
== dlil_main_input_thread
) {
1373 struct dlil_main_threading_info
*inpm
=
1374 (struct dlil_main_threading_info
*)inp
;
1375 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1378 error
= kernel_thread_start(func
, inp
, &inp
->input_thr
);
1379 if (error
== KERN_SUCCESS
) {
1380 ml_thread_policy(inp
->input_thr
, MACHINE_GROUP
,
1381 (MACHINE_NETWORK_GROUP
| MACHINE_NETWORK_NETISR
));
1383 * We create an affinity set so that the matching workloop
1384 * thread or the starter thread (for loopback) can be
1385 * scheduled on the same processor set as the input thread.
1388 struct thread
*tp
= inp
->input_thr
;
1391 * Randomize to reduce the probability
1392 * of affinity tag namespace collision.
1394 read_frandom(&tag
, sizeof(tag
));
1395 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1396 thread_reference(tp
);
1398 inp
->net_affinity
= TRUE
;
1401 } else if (inp
== dlil_main_input_thread
) {
1402 panic_plain("%s: couldn't create main input thread", __func__
);
1405 panic_plain("%s: couldn't create %s input thread", __func__
,
1409 OSAddAtomic(1, &cur_dlil_input_threads
);
1414 #if TEST_INPUT_THREAD_TERMINATION
1416 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1418 #pragma unused(arg1, arg2)
1422 i
= if_input_thread_termination_spin
;
1424 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
1425 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
1429 if (net_rxpoll
== 0) {
1433 if_input_thread_termination_spin
= i
;
1436 #endif /* TEST_INPUT_THREAD_TERMINATION */
1439 dlil_clean_threading_info(struct dlil_threading_info
*inp
)
1441 lck_mtx_destroy(&inp
->input_lck
, inp
->lck_grp
);
1442 lck_grp_free(inp
->lck_grp
);
1444 inp
->input_waiting
= 0;
1446 bzero(inp
->input_name
, sizeof(inp
->input_name
));
1448 VERIFY(qhead(&inp
->rcvq_pkts
) == NULL
&& qempty(&inp
->rcvq_pkts
));
1449 qlimit(&inp
->rcvq_pkts
) = 0;
1450 bzero(&inp
->stats
, sizeof(inp
->stats
));
1452 VERIFY(!inp
->net_affinity
);
1453 inp
->input_thr
= THREAD_NULL
;
1454 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
1455 VERIFY(inp
->poll_thr
== THREAD_NULL
);
1456 VERIFY(inp
->tag
== 0);
1457 #if IFNET_INPUT_SANITY_CHK
1458 inp
->input_mbuf_cnt
= 0;
1459 #endif /* IFNET_INPUT_SANITY_CHK */
1463 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1465 struct ifnet
*ifp
= inp
->ifp
;
1466 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
1468 VERIFY(current_thread() == inp
->input_thr
);
1469 VERIFY(inp
!= dlil_main_input_thread
);
1471 OSAddAtomic(-1, &cur_dlil_input_threads
);
1473 #if TEST_INPUT_THREAD_TERMINATION
1474 { /* do something useless that won't get optimized away */
1476 for (uint32_t i
= 0;
1477 i
< if_input_thread_termination_spin
;
1481 DLIL_PRINTF("the value is %d\n", v
);
1483 #endif /* TEST_INPUT_THREAD_TERMINATION */
1485 lck_mtx_lock_spin(&inp
->input_lck
);
1486 _getq_all(&inp
->rcvq_pkts
, &pkt
, NULL
, NULL
, NULL
);
1487 VERIFY((inp
->input_waiting
& DLIL_INPUT_TERMINATE
) != 0);
1488 inp
->input_waiting
|= DLIL_INPUT_TERMINATE_COMPLETE
;
1489 wakeup_one((caddr_t
)&inp
->input_waiting
);
1490 lck_mtx_unlock(&inp
->input_lck
);
1492 /* free up pending packets */
1493 if (pkt
.cp_mbuf
!= NULL
) {
1494 mbuf_freem_list(pkt
.cp_mbuf
);
1497 /* for the extra refcnt from kernel_thread_start() */
1498 thread_deallocate(current_thread());
1501 DLIL_PRINTF("%s: input thread terminated\n",
1505 /* this is the end */
1506 thread_terminate(current_thread());
1510 static kern_return_t
1511 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1513 thread_affinity_policy_data_t policy
;
1515 bzero(&policy
, sizeof(policy
));
1516 policy
.affinity_tag
= tag
;
1517 return thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1518 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
);
1524 thread_t thread
= THREAD_NULL
;
1527 * The following fields must be 64-bit aligned for atomic operations.
1529 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1530 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1531 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1532 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1533 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1534 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1535 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1536 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1537 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1538 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1539 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1540 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1541 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1542 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1543 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1545 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1546 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1547 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1548 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1549 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1550 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1551 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1552 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1553 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1554 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1555 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1556 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1557 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1558 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1559 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1562 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1564 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1565 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1566 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1567 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1568 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1569 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1570 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1571 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1572 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1573 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT
== IFNET_CSUM_ZERO_INVERT
);
1574 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1575 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1576 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1577 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1580 * ... as well as the mbuf checksum flags counterparts.
1582 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1583 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1584 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1585 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1586 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1587 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1588 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1589 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1590 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1591 _CASSERT(CSUM_ZERO_INVERT
== IF_HWASSIST_CSUM_ZERO_INVERT
);
1592 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1595 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1597 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1598 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1600 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1601 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1602 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1603 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1605 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1606 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1607 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1609 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1610 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1611 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1612 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1613 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1614 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1615 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1616 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1617 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1618 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1619 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1620 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1621 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1622 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1623 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1624 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1625 _CASSERT(IFRTYPE_FAMILY_6LOWPAN
== IFNET_FAMILY_6LOWPAN
);
1626 _CASSERT(IFRTYPE_FAMILY_UTUN
== IFNET_FAMILY_UTUN
);
1627 _CASSERT(IFRTYPE_FAMILY_IPSEC
== IFNET_FAMILY_IPSEC
);
1629 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1630 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1631 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1632 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1633 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1634 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1635 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC
== IFNET_SUBFAMILY_INTCOPROC
);
1636 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY
== IFNET_SUBFAMILY_QUICKRELAY
);
1637 _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT
== IFNET_SUBFAMILY_DEFAULT
);
1639 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1640 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1642 PE_parse_boot_argn("net_affinity", &net_affinity
,
1643 sizeof(net_affinity
));
1645 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof(net_rxpoll
));
1647 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof(net_rtref
));
1649 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof(ifnet_debug
));
1651 VERIFY(dlil_pending_thread_cnt
== 0);
1652 dlif_size
= (ifnet_debug
== 0) ? sizeof(struct dlil_ifnet
) :
1653 sizeof(struct dlil_ifnet_dbg
);
1654 /* Enforce 64-bit alignment for dlil_ifnet structure */
1655 dlif_bufsize
= dlif_size
+ sizeof(void *) + sizeof(u_int64_t
);
1656 dlif_bufsize
= P2ROUNDUP(dlif_bufsize
, sizeof(u_int64_t
));
1657 dlif_zone
= zinit(dlif_bufsize
, DLIF_ZONE_MAX
* dlif_bufsize
,
1659 if (dlif_zone
== NULL
) {
1660 panic_plain("%s: failed allocating %s", __func__
,
1664 zone_change(dlif_zone
, Z_EXPAND
, TRUE
);
1665 zone_change(dlif_zone
, Z_CALLERACCT
, FALSE
);
1667 dlif_filt_size
= sizeof(struct ifnet_filter
);
1668 dlif_filt_zone
= zinit(dlif_filt_size
,
1669 DLIF_FILT_ZONE_MAX
* dlif_filt_size
, 0, DLIF_FILT_ZONE_NAME
);
1670 if (dlif_filt_zone
== NULL
) {
1671 panic_plain("%s: failed allocating %s", __func__
,
1672 DLIF_FILT_ZONE_NAME
);
1675 zone_change(dlif_filt_zone
, Z_EXPAND
, TRUE
);
1676 zone_change(dlif_filt_zone
, Z_CALLERACCT
, FALSE
);
1678 dlif_phash_size
= sizeof(struct proto_hash_entry
) * PROTO_HASH_SLOTS
;
1679 dlif_phash_zone
= zinit(dlif_phash_size
,
1680 DLIF_PHASH_ZONE_MAX
* dlif_phash_size
, 0, DLIF_PHASH_ZONE_NAME
);
1681 if (dlif_phash_zone
== NULL
) {
1682 panic_plain("%s: failed allocating %s", __func__
,
1683 DLIF_PHASH_ZONE_NAME
);
1686 zone_change(dlif_phash_zone
, Z_EXPAND
, TRUE
);
1687 zone_change(dlif_phash_zone
, Z_CALLERACCT
, FALSE
);
1689 dlif_proto_size
= sizeof(struct if_proto
);
1690 dlif_proto_zone
= zinit(dlif_proto_size
,
1691 DLIF_PROTO_ZONE_MAX
* dlif_proto_size
, 0, DLIF_PROTO_ZONE_NAME
);
1692 if (dlif_proto_zone
== NULL
) {
1693 panic_plain("%s: failed allocating %s", __func__
,
1694 DLIF_PROTO_ZONE_NAME
);
1697 zone_change(dlif_proto_zone
, Z_EXPAND
, TRUE
);
1698 zone_change(dlif_proto_zone
, Z_CALLERACCT
, FALSE
);
1700 dlif_tcpstat_size
= sizeof(struct tcpstat_local
);
1701 /* Enforce 64-bit alignment for tcpstat_local structure */
1702 dlif_tcpstat_bufsize
=
1703 dlif_tcpstat_size
+ sizeof(void *) + sizeof(u_int64_t
);
1704 dlif_tcpstat_bufsize
=
1705 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof(u_int64_t
));
1706 dlif_tcpstat_zone
= zinit(dlif_tcpstat_bufsize
,
1707 DLIF_TCPSTAT_ZONE_MAX
* dlif_tcpstat_bufsize
, 0,
1708 DLIF_TCPSTAT_ZONE_NAME
);
1709 if (dlif_tcpstat_zone
== NULL
) {
1710 panic_plain("%s: failed allocating %s", __func__
,
1711 DLIF_TCPSTAT_ZONE_NAME
);
1714 zone_change(dlif_tcpstat_zone
, Z_EXPAND
, TRUE
);
1715 zone_change(dlif_tcpstat_zone
, Z_CALLERACCT
, FALSE
);
1717 dlif_udpstat_size
= sizeof(struct udpstat_local
);
1718 /* Enforce 64-bit alignment for udpstat_local structure */
1719 dlif_udpstat_bufsize
=
1720 dlif_udpstat_size
+ sizeof(void *) + sizeof(u_int64_t
);
1721 dlif_udpstat_bufsize
=
1722 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof(u_int64_t
));
1723 dlif_udpstat_zone
= zinit(dlif_udpstat_bufsize
,
1724 DLIF_TCPSTAT_ZONE_MAX
* dlif_udpstat_bufsize
, 0,
1725 DLIF_UDPSTAT_ZONE_NAME
);
1726 if (dlif_udpstat_zone
== NULL
) {
1727 panic_plain("%s: failed allocating %s", __func__
,
1728 DLIF_UDPSTAT_ZONE_NAME
);
1731 zone_change(dlif_udpstat_zone
, Z_EXPAND
, TRUE
);
1732 zone_change(dlif_udpstat_zone
, Z_CALLERACCT
, FALSE
);
1734 ifnet_llreach_init();
1735 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt
);
1737 TAILQ_INIT(&dlil_ifnet_head
);
1738 TAILQ_INIT(&ifnet_head
);
1739 TAILQ_INIT(&ifnet_detaching_head
);
1740 TAILQ_INIT(&ifnet_ordered_head
);
1742 /* Setup the lock groups we will use */
1743 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1745 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1746 dlil_grp_attributes
);
1747 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1748 dlil_grp_attributes
);
1749 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1750 dlil_grp_attributes
);
1751 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1752 dlil_grp_attributes
);
1753 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1754 dlil_grp_attributes
);
1756 /* Setup the lock attributes we will use */
1757 dlil_lck_attributes
= lck_attr_alloc_init();
1759 ifnet_lock_attr
= lck_attr_alloc_init();
1761 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1762 dlil_lck_attributes
);
1763 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1764 lck_mtx_init(&dlil_thread_sync_lock
, dlil_lock_group
, dlil_lck_attributes
);
1766 /* Setup interface flow control related items */
1767 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1769 ifnet_fc_zone_size
= sizeof(struct ifnet_fc_entry
);
1770 ifnet_fc_zone
= zinit(ifnet_fc_zone_size
,
1771 IFNET_FC_ZONE_MAX
* ifnet_fc_zone_size
, 0, IFNET_FC_ZONE_NAME
);
1772 if (ifnet_fc_zone
== NULL
) {
1773 panic_plain("%s: failed allocating %s", __func__
,
1774 IFNET_FC_ZONE_NAME
);
1777 zone_change(ifnet_fc_zone
, Z_EXPAND
, TRUE
);
1778 zone_change(ifnet_fc_zone
, Z_CALLERACCT
, FALSE
);
1780 /* Initialize interface address subsystem */
1784 /* Initialize the packet filter */
1788 /* Initialize queue algorithms */
1791 /* Initialize packet schedulers */
1794 /* Initialize flow advisory subsystem */
1797 /* Initialize the pktap virtual interface */
1800 /* Initialize the service class to dscp map */
1803 /* Initialize the interface port list */
1804 if_ports_used_init();
1806 /* Initialize the interface low power mode event handler */
1807 if_low_power_evhdlr_init();
1809 #if DEBUG || DEVELOPMENT
1810 /* Run self-tests */
1811 dlil_verify_sum16();
1812 #endif /* DEBUG || DEVELOPMENT */
1814 /* Initialize link layer table */
1815 lltable_glbl_init();
1818 * Create and start up the main DLIL input thread and the interface
1819 * detacher threads once everything is initialized.
1821 dlil_incr_pending_thread_count();
1822 dlil_create_input_thread(NULL
, dlil_main_input_thread
);
1825 * Create ifnet detacher thread.
1826 * When an interface gets detached, part of the detach processing
1827 * is delayed. The interface is added to delayed detach list
1828 * and this thread is woken up to call ifnet_detach_final
1829 * on these interfaces.
1831 dlil_incr_pending_thread_count();
1832 if (kernel_thread_start(ifnet_detacher_thread_func
,
1833 NULL
, &thread
) != KERN_SUCCESS
) {
1834 panic_plain("%s: couldn't create detacher thread", __func__
);
1837 thread_deallocate(thread
);
1840 * Wait for the created kernel threads for dlil to get
1841 * scheduled and run at least once before we proceed
1843 lck_mtx_lock(&dlil_thread_sync_lock
);
1844 while (dlil_pending_thread_cnt
!= 0) {
1845 DLIL_PRINTF("%s: Waiting for all the create dlil kernel threads "
1846 "to get scheduled at least once.\n", __func__
);
1847 (void) msleep(&dlil_pending_thread_cnt
, &dlil_thread_sync_lock
, (PZERO
- 1),
1849 LCK_MTX_ASSERT(&dlil_thread_sync_lock
, LCK_ASSERT_OWNED
);
1851 lck_mtx_unlock(&dlil_thread_sync_lock
);
1852 DLIL_PRINTF("%s: All the created dlil kernel threads have been scheduled "
1853 "at least once. Proceeding.\n", __func__
);
1857 if_flt_monitor_busy(struct ifnet
*ifp
)
1859 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1862 VERIFY(ifp
->if_flt_busy
!= 0);
1866 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1868 if_flt_monitor_leave(ifp
);
1872 if_flt_monitor_enter(struct ifnet
*ifp
)
1874 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1876 while (ifp
->if_flt_busy
) {
1877 ++ifp
->if_flt_waiters
;
1878 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1879 (PZERO
- 1), "if_flt_monitor", NULL
);
1881 if_flt_monitor_busy(ifp
);
1885 if_flt_monitor_leave(struct ifnet
*ifp
)
1887 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1889 VERIFY(ifp
->if_flt_busy
!= 0);
1892 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1893 ifp
->if_flt_waiters
= 0;
1894 wakeup(&ifp
->if_flt_head
);
1898 __private_extern__
int
1899 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1900 interface_filter_t
*filter_ref
, u_int32_t flags
)
1903 struct ifnet_filter
*filter
= NULL
;
1905 ifnet_head_lock_shared();
1906 /* Check that the interface is in the global list */
1907 if (!ifnet_lookup(ifp
)) {
1912 filter
= zalloc(dlif_filt_zone
);
1913 if (filter
== NULL
) {
1917 bzero(filter
, dlif_filt_size
);
1919 /* refcnt held above during lookup */
1920 filter
->filt_flags
= flags
;
1921 filter
->filt_ifp
= ifp
;
1922 filter
->filt_cookie
= if_filter
->iff_cookie
;
1923 filter
->filt_name
= if_filter
->iff_name
;
1924 filter
->filt_protocol
= if_filter
->iff_protocol
;
1926 * Do not install filter callbacks for internal coproc interface
1928 if (!IFNET_IS_INTCOPROC(ifp
)) {
1929 filter
->filt_input
= if_filter
->iff_input
;
1930 filter
->filt_output
= if_filter
->iff_output
;
1931 filter
->filt_event
= if_filter
->iff_event
;
1932 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1934 filter
->filt_detached
= if_filter
->iff_detached
;
1936 lck_mtx_lock(&ifp
->if_flt_lock
);
1937 if_flt_monitor_enter(ifp
);
1939 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1940 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1942 if_flt_monitor_leave(ifp
);
1943 lck_mtx_unlock(&ifp
->if_flt_lock
);
1945 *filter_ref
= filter
;
1948 * Bump filter count and route_generation ID to let TCP
1949 * know it shouldn't do TSO on this connection
1951 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1952 ifnet_filter_update_tso(TRUE
);
1954 OSIncrementAtomic64(&net_api_stats
.nas_iflt_attach_count
);
1955 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_total
);
1956 if ((filter
->filt_flags
& DLIL_IFF_INTERNAL
)) {
1957 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_os_total
);
1960 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp
),
1961 if_filter
->iff_name
);
1965 if (retval
!= 0 && ifp
!= NULL
) {
1966 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1967 if_name(ifp
), if_filter
->iff_name
, retval
);
1969 if (retval
!= 0 && filter
!= NULL
) {
1970 zfree(dlif_filt_zone
, filter
);
1977 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1981 if (detached
== 0) {
1984 ifnet_head_lock_shared();
1985 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1986 interface_filter_t entry
= NULL
;
1988 lck_mtx_lock(&ifp
->if_flt_lock
);
1989 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1990 if (entry
!= filter
|| entry
->filt_skip
) {
1994 * We've found a match; since it's possible
1995 * that the thread gets blocked in the monitor,
1996 * we do the lock dance. Interface should
1997 * not be detached since we still have a use
1998 * count held during filter attach.
2000 entry
->filt_skip
= 1; /* skip input/output */
2001 lck_mtx_unlock(&ifp
->if_flt_lock
);
2004 lck_mtx_lock(&ifp
->if_flt_lock
);
2005 if_flt_monitor_enter(ifp
);
2006 LCK_MTX_ASSERT(&ifp
->if_flt_lock
,
2007 LCK_MTX_ASSERT_OWNED
);
2009 /* Remove the filter from the list */
2010 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
2013 if_flt_monitor_leave(ifp
);
2014 lck_mtx_unlock(&ifp
->if_flt_lock
);
2016 DLIL_PRINTF("%s: %s filter detached\n",
2017 if_name(ifp
), filter
->filt_name
);
2021 lck_mtx_unlock(&ifp
->if_flt_lock
);
2025 /* filter parameter is not a valid filter ref */
2031 DLIL_PRINTF("%s filter detached\n", filter
->filt_name
);
2036 /* Call the detached function if there is one */
2037 if (filter
->filt_detached
) {
2038 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
2042 * Decrease filter count and route_generation ID to let TCP
2043 * know it should reevalute doing TSO or not
2045 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
2046 ifnet_filter_update_tso(FALSE
);
2049 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_iflt_attach_count
) > 0);
2051 /* Free the filter */
2052 zfree(dlif_filt_zone
, filter
);
2055 if (retval
!= 0 && filter
!= NULL
) {
2056 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
2057 filter
->filt_name
, retval
);
2063 __private_extern__
void
2064 dlil_detach_filter(interface_filter_t filter
)
2066 if (filter
== NULL
) {
2069 dlil_detach_filter_internal(filter
, 0);
2072 __attribute__((noreturn
))
2074 dlil_main_input_thread_func(void *v
, wait_result_t w
)
2077 struct dlil_threading_info
*inp
= v
;
2079 VERIFY(inp
== dlil_main_input_thread
);
2080 VERIFY(inp
->ifp
== NULL
);
2081 VERIFY(current_thread() == inp
->input_thr
);
2083 dlil_decr_pending_thread_count();
2084 lck_mtx_lock(&inp
->input_lck
);
2085 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_RUNNING
));
2086 (void) assert_wait(&inp
->input_waiting
, THREAD_UNINT
);
2087 lck_mtx_unlock(&inp
->input_lck
);
2088 (void) thread_block_parameter(dlil_main_input_thread_cont
, inp
);
2090 __builtin_unreachable();
2094 * Main input thread:
2096 * a) handles all inbound packets for lo0
2097 * b) handles all inbound packets for interfaces with no dedicated
2098 * input thread (e.g. anything but Ethernet/PDP or those that support
2099 * opportunistic polling.)
2100 * c) protocol registrations
2101 * d) packet injections
2103 __attribute__((noreturn
))
2105 dlil_main_input_thread_cont(void *v
, wait_result_t wres
)
2107 struct dlil_main_threading_info
*inpm
= v
;
2108 struct dlil_threading_info
*inp
= v
;
2110 /* main input thread is uninterruptible */
2111 VERIFY(wres
!= THREAD_INTERRUPTED
);
2112 lck_mtx_lock_spin(&inp
->input_lck
);
2113 VERIFY(!(inp
->input_waiting
& (DLIL_INPUT_TERMINATE
|
2114 DLIL_INPUT_RUNNING
)));
2115 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2118 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
2119 u_int32_t m_cnt
, m_cnt_loop
;
2120 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
2121 boolean_t proto_req
;
2123 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2125 proto_req
= (inp
->input_waiting
&
2126 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
2128 /* Packets for non-dedicated interfaces other than lo0 */
2129 m_cnt
= qlen(&inp
->rcvq_pkts
);
2130 _getq_all(&inp
->rcvq_pkts
, &pkt
, NULL
, NULL
, NULL
);
2133 /* Packets exclusive to lo0 */
2134 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
2135 _getq_all(&inpm
->lo_rcvq_pkts
, &pkt
, NULL
, NULL
, NULL
);
2136 m_loop
= pkt
.cp_mbuf
;
2140 lck_mtx_unlock(&inp
->input_lck
);
2143 * NOTE warning %%% attention !!!!
2144 * We should think about putting some thread starvation
2145 * safeguards if we deal with long chains of packets.
2147 if (m_loop
!= NULL
) {
2148 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
2149 m_cnt_loop
, IFNET_MODEL_INPUT_POLL_OFF
);
2153 dlil_input_packet_list_extended(NULL
, m
,
2154 m_cnt
, IFNET_MODEL_INPUT_POLL_OFF
);
2161 lck_mtx_lock_spin(&inp
->input_lck
);
2162 VERIFY(inp
->input_waiting
& DLIL_INPUT_RUNNING
);
2163 /* main input thread cannot be terminated */
2164 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_TERMINATE
));
2165 if (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2170 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2171 (void) assert_wait(&inp
->input_waiting
, THREAD_UNINT
);
2172 lck_mtx_unlock(&inp
->input_lck
);
2173 (void) thread_block_parameter(dlil_main_input_thread_cont
, inp
);
2175 VERIFY(0); /* we should never get here */
2177 __builtin_unreachable();
2181 * Input thread for interfaces with legacy input model.
2183 __attribute__((noreturn
))
2185 dlil_input_thread_func(void *v
, wait_result_t w
)
2188 char thread_name
[MAXTHREADNAMESIZE
];
2189 struct dlil_threading_info
*inp
= v
;
2190 struct ifnet
*ifp
= inp
->ifp
;
2192 VERIFY(inp
!= dlil_main_input_thread
);
2193 VERIFY(ifp
!= NULL
);
2194 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
||
2195 !(ifp
->if_xflags
& IFXF_LEGACY
));
2196 VERIFY(ifp
->if_poll_mode
== IFNET_MODEL_INPUT_POLL_OFF
||
2197 !(ifp
->if_xflags
& IFXF_LEGACY
));
2198 VERIFY(current_thread() == inp
->input_thr
);
2200 /* construct the name for this thread, and then apply it */
2201 bzero(thread_name
, sizeof(thread_name
));
2202 (void) snprintf(thread_name
, sizeof(thread_name
),
2203 "dlil_input_%s", ifp
->if_xname
);
2204 thread_set_thread_name(inp
->input_thr
, thread_name
);
2205 ifnet_decr_pending_thread_count(ifp
);
2207 lck_mtx_lock(&inp
->input_lck
);
2208 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_RUNNING
));
2209 (void) assert_wait(&inp
->input_waiting
, THREAD_UNINT
);
2210 lck_mtx_unlock(&inp
->input_lck
);
2211 (void) thread_block_parameter(dlil_input_thread_cont
, inp
);
2213 __builtin_unreachable();
2216 __attribute__((noreturn
))
2218 dlil_input_thread_cont(void *v
, wait_result_t wres
)
2220 struct dlil_threading_info
*inp
= v
;
2221 struct ifnet
*ifp
= inp
->ifp
;
2223 lck_mtx_lock_spin(&inp
->input_lck
);
2224 if (__improbable(wres
== THREAD_INTERRUPTED
||
2225 (inp
->input_waiting
& DLIL_INPUT_TERMINATE
))) {
2229 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_RUNNING
));
2230 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2233 struct mbuf
*m
= NULL
;
2234 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
2235 boolean_t notify
= FALSE
;
2238 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2241 * Protocol registration and injection must always use
2242 * the main input thread; in theory the latter can utilize
2243 * the corresponding input thread where the packet arrived
2244 * on, but that requires our knowing the interface in advance
2245 * (and the benefits might not worth the trouble.)
2247 VERIFY(!(inp
->input_waiting
&
2248 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
)));
2250 /* Packets for this interface */
2251 m_cnt
= qlen(&inp
->rcvq_pkts
);
2252 _getq_all(&inp
->rcvq_pkts
, &pkt
, NULL
, NULL
, NULL
);
2257 notify
= dlil_input_stats_sync(ifp
, inp
);
2259 lck_mtx_unlock(&inp
->input_lck
);
2262 ifnet_notify_data_threshold(ifp
);
2266 * NOTE warning %%% attention !!!!
2267 * We should think about putting some thread starvation
2268 * safeguards if we deal with long chains of packets.
2271 dlil_input_packet_list_extended(NULL
, m
,
2272 m_cnt
, ifp
->if_poll_mode
);
2275 lck_mtx_lock_spin(&inp
->input_lck
);
2276 VERIFY(inp
->input_waiting
& DLIL_INPUT_RUNNING
);
2277 if (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2282 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2284 if (__improbable(inp
->input_waiting
& DLIL_INPUT_TERMINATE
)) {
2286 lck_mtx_unlock(&inp
->input_lck
);
2287 dlil_terminate_input_thread(inp
);
2290 (void) assert_wait(&inp
->input_waiting
, THREAD_UNINT
);
2291 lck_mtx_unlock(&inp
->input_lck
);
2292 (void) thread_block_parameter(dlil_input_thread_cont
, inp
);
2296 VERIFY(0); /* we should never get here */
2298 __builtin_unreachable();
2302 * Input thread for interfaces with opportunistic polling input model.
2304 __attribute__((noreturn
))
2306 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
2309 char thread_name
[MAXTHREADNAMESIZE
];
2310 struct dlil_threading_info
*inp
= v
;
2311 struct ifnet
*ifp
= inp
->ifp
;
2313 VERIFY(inp
!= dlil_main_input_thread
);
2314 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
) &&
2315 (ifp
->if_xflags
& IFXF_LEGACY
));
2316 VERIFY(current_thread() == inp
->input_thr
);
2318 /* construct the name for this thread, and then apply it */
2319 bzero(thread_name
, sizeof(thread_name
));
2320 (void) snprintf(thread_name
, sizeof(thread_name
),
2321 "dlil_input_poll_%s", ifp
->if_xname
);
2322 thread_set_thread_name(inp
->input_thr
, thread_name
);
2323 ifnet_decr_pending_thread_count(ifp
);
2325 lck_mtx_lock(&inp
->input_lck
);
2326 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_RUNNING
));
2327 (void) assert_wait(&inp
->input_waiting
, THREAD_UNINT
);
2328 lck_mtx_unlock(&inp
->input_lck
);
2329 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont
, inp
);
2331 __builtin_unreachable();
2334 __attribute__((noreturn
))
2336 dlil_rxpoll_input_thread_cont(void *v
, wait_result_t wres
)
2338 struct dlil_threading_info
*inp
= v
;
2339 struct ifnet
*ifp
= inp
->ifp
;
2342 lck_mtx_lock_spin(&inp
->input_lck
);
2343 if (__improbable(wres
== THREAD_INTERRUPTED
||
2344 (inp
->input_waiting
& DLIL_INPUT_TERMINATE
))) {
2348 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_RUNNING
));
2349 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2352 struct mbuf
*m
= NULL
;
2353 u_int32_t m_cnt
, m_size
, poll_req
= 0;
2355 struct timespec now
, delta
;
2356 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
2360 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2362 if ((ival
= ifp
->if_rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
) {
2363 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2366 /* Link parameters changed? */
2367 if (ifp
->if_poll_update
!= 0) {
2368 ifp
->if_poll_update
= 0;
2369 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
2372 /* Current operating mode */
2373 mode
= ifp
->if_poll_mode
;
2376 * Protocol registration and injection must always use
2377 * the main input thread; in theory the latter can utilize
2378 * the corresponding input thread where the packet arrived
2379 * on, but that requires our knowing the interface in advance
2380 * (and the benefits might not worth the trouble.)
2382 VERIFY(!(inp
->input_waiting
&
2383 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
)));
2385 /* Total count of all packets */
2386 m_cnt
= qlen(&inp
->rcvq_pkts
);
2388 /* Total bytes of all packets */
2389 m_size
= qsize(&inp
->rcvq_pkts
);
2391 /* Packets for this interface */
2392 _getq_all(&inp
->rcvq_pkts
, &pkt
, NULL
, NULL
, NULL
);
2394 VERIFY(m
!= NULL
|| m_cnt
== 0);
2397 if (!net_timerisset(&ifp
->if_poll_sample_lasttime
)) {
2398 *(&ifp
->if_poll_sample_lasttime
) = *(&now
);
2401 net_timersub(&now
, &ifp
->if_poll_sample_lasttime
, &delta
);
2402 if (if_rxpoll
&& net_timerisset(&ifp
->if_poll_sample_holdtime
)) {
2403 u_int32_t ptot
, btot
;
2405 /* Accumulate statistics for current sampling */
2406 PKTCNTR_ADD(&ifp
->if_poll_sstats
, m_cnt
, m_size
);
2408 if (net_timercmp(&delta
, &ifp
->if_poll_sample_holdtime
, <)) {
2412 *(&ifp
->if_poll_sample_lasttime
) = *(&now
);
2414 /* Calculate min/max of inbound bytes */
2415 btot
= (u_int32_t
)ifp
->if_poll_sstats
.bytes
;
2416 if (ifp
->if_rxpoll_bmin
== 0 || ifp
->if_rxpoll_bmin
> btot
) {
2417 ifp
->if_rxpoll_bmin
= btot
;
2419 if (btot
> ifp
->if_rxpoll_bmax
) {
2420 ifp
->if_rxpoll_bmax
= btot
;
2423 /* Calculate EWMA of inbound bytes */
2424 DLIL_EWMA(ifp
->if_rxpoll_bavg
, btot
, if_rxpoll_decay
);
2426 /* Calculate min/max of inbound packets */
2427 ptot
= (u_int32_t
)ifp
->if_poll_sstats
.packets
;
2428 if (ifp
->if_rxpoll_pmin
== 0 || ifp
->if_rxpoll_pmin
> ptot
) {
2429 ifp
->if_rxpoll_pmin
= ptot
;
2431 if (ptot
> ifp
->if_rxpoll_pmax
) {
2432 ifp
->if_rxpoll_pmax
= ptot
;
2435 /* Calculate EWMA of inbound packets */
2436 DLIL_EWMA(ifp
->if_rxpoll_pavg
, ptot
, if_rxpoll_decay
);
2438 /* Reset sampling statistics */
2439 PKTCNTR_CLEAR(&ifp
->if_poll_sstats
);
2441 /* Calculate EWMA of wakeup requests */
2442 DLIL_EWMA(ifp
->if_rxpoll_wavg
, inp
->wtot
, if_rxpoll_decay
);
2446 if (!net_timerisset(&ifp
->if_poll_dbg_lasttime
)) {
2447 *(&ifp
->if_poll_dbg_lasttime
) = *(&now
);
2449 net_timersub(&now
, &ifp
->if_poll_dbg_lasttime
, &delta
);
2450 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
2451 *(&ifp
->if_poll_dbg_lasttime
) = *(&now
);
2452 DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
2453 "limits [%d/%d], wreq avg %d "
2454 "limits [%d/%d], bytes avg %d "
2455 "limits [%d/%d]\n", if_name(ifp
),
2456 (ifp
->if_poll_mode
==
2457 IFNET_MODEL_INPUT_POLL_ON
) ?
2458 "ON" : "OFF", ifp
->if_rxpoll_pavg
,
2459 ifp
->if_rxpoll_pmax
,
2460 ifp
->if_rxpoll_plowat
,
2461 ifp
->if_rxpoll_phiwat
,
2462 ifp
->if_rxpoll_wavg
,
2463 ifp
->if_rxpoll_wlowat
,
2464 ifp
->if_rxpoll_whiwat
,
2465 ifp
->if_rxpoll_bavg
,
2466 ifp
->if_rxpoll_blowat
,
2467 ifp
->if_rxpoll_bhiwat
);
2471 /* Perform mode transition, if necessary */
2472 if (!net_timerisset(&ifp
->if_poll_mode_lasttime
)) {
2473 *(&ifp
->if_poll_mode_lasttime
) = *(&now
);
2476 net_timersub(&now
, &ifp
->if_poll_mode_lasttime
, &delta
);
2477 if (net_timercmp(&delta
, &ifp
->if_poll_mode_holdtime
, <)) {
2481 if (ifp
->if_rxpoll_pavg
<= ifp
->if_rxpoll_plowat
&&
2482 ifp
->if_rxpoll_bavg
<= ifp
->if_rxpoll_blowat
&&
2483 ifp
->if_poll_mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2484 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2485 } else if (ifp
->if_rxpoll_pavg
>= ifp
->if_rxpoll_phiwat
&&
2486 (ifp
->if_rxpoll_bavg
>= ifp
->if_rxpoll_bhiwat
||
2487 ifp
->if_rxpoll_wavg
>= ifp
->if_rxpoll_whiwat
) &&
2488 ifp
->if_poll_mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2489 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2492 if (mode
!= ifp
->if_poll_mode
) {
2493 ifp
->if_poll_mode
= mode
;
2494 *(&ifp
->if_poll_mode_lasttime
) = *(&now
);
2499 notify
= dlil_input_stats_sync(ifp
, inp
);
2501 lck_mtx_unlock(&inp
->input_lck
);
2504 ifnet_notify_data_threshold(ifp
);
2508 * If there's a mode change and interface is still attached,
2509 * perform a downcall to the driver for the new mode. Also
2510 * hold an IO refcnt on the interface to prevent it from
2511 * being detached (will be release below.)
2513 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2514 struct ifnet_model_params p
= {
2515 .model
= mode
, .reserved
= { 0 }
2520 DLIL_PRINTF("%s: polling is now %s, "
2521 "pkts avg %d max %d limits [%d/%d], "
2522 "wreq avg %d limits [%d/%d], "
2523 "bytes avg %d limits [%d/%d]\n",
2525 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2526 "ON" : "OFF", ifp
->if_rxpoll_pavg
,
2527 ifp
->if_rxpoll_pmax
, ifp
->if_rxpoll_plowat
,
2528 ifp
->if_rxpoll_phiwat
, ifp
->if_rxpoll_wavg
,
2529 ifp
->if_rxpoll_wlowat
, ifp
->if_rxpoll_whiwat
,
2530 ifp
->if_rxpoll_bavg
, ifp
->if_rxpoll_blowat
,
2531 ifp
->if_rxpoll_bhiwat
);
2534 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2535 IFNET_CTL_SET_INPUT_MODEL
, sizeof(p
), &p
))) != 0) {
2536 DLIL_PRINTF("%s: error setting polling mode "
2537 "to %s (%d)\n", if_name(ifp
),
2538 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2543 case IFNET_MODEL_INPUT_POLL_OFF
:
2544 ifnet_set_poll_cycle(ifp
, NULL
);
2545 ifp
->if_rxpoll_offreq
++;
2547 ifp
->if_rxpoll_offerr
++;
2551 case IFNET_MODEL_INPUT_POLL_ON
:
2552 net_nsectimer(&ival
, &ts
);
2553 ifnet_set_poll_cycle(ifp
, &ts
);
2555 ifp
->if_rxpoll_onreq
++;
2557 ifp
->if_rxpoll_onerr
++;
2566 /* Release the IO refcnt */
2567 ifnet_decr_iorefcnt(ifp
);
2571 * NOTE warning %%% attention !!!!
2572 * We should think about putting some thread starvation
2573 * safeguards if we deal with long chains of packets.
2576 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2579 lck_mtx_lock_spin(&inp
->input_lck
);
2580 VERIFY(inp
->input_waiting
& DLIL_INPUT_RUNNING
);
2581 if (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2586 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2588 if (__improbable(inp
->input_waiting
& DLIL_INPUT_TERMINATE
)) {
2590 lck_mtx_unlock(&inp
->input_lck
);
2591 dlil_terminate_input_thread(inp
);
2594 (void) assert_wait(&inp
->input_waiting
, THREAD_UNINT
);
2595 lck_mtx_unlock(&inp
->input_lck
);
2596 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont
,
2601 VERIFY(0); /* we should never get here */
2603 __builtin_unreachable();
2607 dlil_rxpoll_validate_params(struct ifnet_poll_params
*p
)
2610 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2611 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0)) {
2614 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2615 p
->packets_lowat
>= p
->packets_hiwat
) {
2618 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2619 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0)) {
2622 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2623 p
->bytes_lowat
>= p
->bytes_hiwat
) {
2626 if (p
->interval_time
!= 0 &&
2627 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
) {
2628 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2635 dlil_rxpoll_update_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2637 u_int64_t sample_holdtime
, inbw
;
2639 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2640 sample_holdtime
= 0; /* polling is disabled */
2641 ifp
->if_rxpoll_wlowat
= ifp
->if_rxpoll_plowat
=
2642 ifp
->if_rxpoll_blowat
= 0;
2643 ifp
->if_rxpoll_whiwat
= ifp
->if_rxpoll_phiwat
=
2644 ifp
->if_rxpoll_bhiwat
= (u_int32_t
)-1;
2645 ifp
->if_rxpoll_plim
= 0;
2646 ifp
->if_rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2648 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2652 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2653 if (inbw
< rxpoll_tbl
[i
].speed
) {
2658 /* auto-tune if caller didn't specify a value */
2659 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2660 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2661 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2662 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2663 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2664 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2665 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2666 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2667 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2668 if_rxpoll_max
: p
->packets_limit
);
2669 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2670 if_rxpoll_interval_time
: p
->interval_time
);
2672 VERIFY(plowat
!= 0 && phiwat
!= 0);
2673 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2674 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2676 sample_holdtime
= if_rxpoll_sample_holdtime
;
2677 ifp
->if_rxpoll_wlowat
= if_sysctl_rxpoll_wlowat
;
2678 ifp
->if_rxpoll_whiwat
= if_sysctl_rxpoll_whiwat
;
2679 ifp
->if_rxpoll_plowat
= plowat
;
2680 ifp
->if_rxpoll_phiwat
= phiwat
;
2681 ifp
->if_rxpoll_blowat
= blowat
;
2682 ifp
->if_rxpoll_bhiwat
= bhiwat
;
2683 ifp
->if_rxpoll_plim
= plim
;
2684 ifp
->if_rxpoll_ival
= ival
;
2687 net_nsectimer(&if_rxpoll_mode_holdtime
, &ifp
->if_poll_mode_holdtime
);
2688 net_nsectimer(&sample_holdtime
, &ifp
->if_poll_sample_holdtime
);
2691 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
2692 "poll interval %llu nsec, pkts per poll %u, "
2693 "pkt limits [%u/%u], wreq limits [%u/%u], "
2694 "bytes limits [%u/%u]\n", if_name(ifp
),
2695 inbw
, sample_holdtime
, ifp
->if_rxpoll_ival
,
2696 ifp
->if_rxpoll_plim
, ifp
->if_rxpoll_plowat
,
2697 ifp
->if_rxpoll_phiwat
, ifp
->if_rxpoll_wlowat
,
2698 ifp
->if_rxpoll_whiwat
, ifp
->if_rxpoll_blowat
,
2699 ifp
->if_rxpoll_bhiwat
);
2704 * Must be called on an attached ifnet (caller is expected to check.)
2705 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2708 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2712 struct dlil_threading_info
*inp
;
2714 VERIFY(ifp
!= NULL
);
2715 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
) {
2718 err
= dlil_rxpoll_validate_params(p
);
2724 lck_mtx_lock(&inp
->input_lck
);
2726 LCK_MTX_ASSERT(&inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
2728 * Normally, we'd reset the parameters to the auto-tuned values
2729 * if the the input thread detects a change in link rate. If the
2730 * driver provides its own parameters right after a link rate
2731 * changes, but before the input thread gets to run, we want to
2732 * make sure to keep the driver's values. Clearing if_poll_update
2733 * will achieve that.
2735 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0) {
2736 ifp
->if_poll_update
= 0;
2738 dlil_rxpoll_update_params(ifp
, p
);
2740 lck_mtx_unlock(&inp
->input_lck
);
2746 * Must be called on an attached ifnet (caller is expected to check.)
2749 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2751 struct dlil_threading_info
*inp
;
2753 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2754 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
) {
2758 bzero(p
, sizeof(*p
));
2760 lck_mtx_lock(&inp
->input_lck
);
2761 p
->packets_limit
= ifp
->if_rxpoll_plim
;
2762 p
->packets_lowat
= ifp
->if_rxpoll_plowat
;
2763 p
->packets_hiwat
= ifp
->if_rxpoll_phiwat
;
2764 p
->bytes_lowat
= ifp
->if_rxpoll_blowat
;
2765 p
->bytes_hiwat
= ifp
->if_rxpoll_bhiwat
;
2766 p
->interval_time
= ifp
->if_rxpoll_ival
;
2767 lck_mtx_unlock(&inp
->input_lck
);
2773 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2774 const struct ifnet_stat_increment_param
*s
)
2776 return ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
);
2780 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2781 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2783 return ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
);
2787 ifnet_input_poll(struct ifnet
*ifp
, struct mbuf
*m_head
,
2788 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2790 return ifnet_input_common(ifp
, m_head
, m_tail
, s
,
2791 (m_head
!= NULL
), TRUE
);
2795 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2796 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2798 dlil_input_func input_func
;
2799 struct ifnet_stat_increment_param _s
;
2800 u_int32_t m_cnt
= 0, m_size
= 0;
2804 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2805 if (m_head
!= NULL
) {
2806 mbuf_freem_list(m_head
);
2811 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2812 VERIFY(m_tail
== NULL
|| ext
);
2813 VERIFY(s
!= NULL
|| !ext
);
2816 * Drop the packet(s) if the parameters are invalid, or if the
2817 * interface is no longer attached; else hold an IO refcnt to
2818 * prevent it from being detached (will be released below.)
2820 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_datamov_begin(ifp
))) {
2821 if (m_head
!= NULL
) {
2822 mbuf_freem_list(m_head
);
2827 input_func
= ifp
->if_input_dlil
;
2828 VERIFY(input_func
!= NULL
);
2830 if (m_tail
== NULL
) {
2832 while (m_head
!= NULL
) {
2833 #if IFNET_INPUT_SANITY_CHK
2834 if (dlil_input_sanity_check
!= 0) {
2835 DLIL_INPUT_CHECK(last
, ifp
);
2837 #endif /* IFNET_INPUT_SANITY_CHK */
2839 m_size
+= m_length(last
);
2840 if (mbuf_nextpkt(last
) == NULL
) {
2843 last
= mbuf_nextpkt(last
);
2847 #if IFNET_INPUT_SANITY_CHK
2848 if (dlil_input_sanity_check
!= 0) {
2851 DLIL_INPUT_CHECK(last
, ifp
);
2853 m_size
+= m_length(last
);
2854 if (mbuf_nextpkt(last
) == NULL
) {
2857 last
= mbuf_nextpkt(last
);
2860 m_cnt
= s
->packets_in
;
2861 m_size
= s
->bytes_in
;
2865 m_cnt
= s
->packets_in
;
2866 m_size
= s
->bytes_in
;
2868 #endif /* IFNET_INPUT_SANITY_CHK */
2871 if (last
!= m_tail
) {
2872 panic_plain("%s: invalid input packet chain for %s, "
2873 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2878 * Assert packet count only for the extended variant, for backwards
2879 * compatibility, since this came directly from the device driver.
2880 * Relax this assertion for input bytes, as the driver may have
2881 * included the link-layer headers in the computation; hence
2882 * m_size is just an approximation.
2884 if (ext
&& s
->packets_in
!= m_cnt
) {
2885 panic_plain("%s: input packet count mismatch for %s, "
2886 "%d instead of %d\n", __func__
, if_name(ifp
),
2887 s
->packets_in
, m_cnt
);
2891 bzero(&_s
, sizeof(_s
));
2896 _s
.packets_in
= m_cnt
;
2897 _s
.bytes_in
= m_size
;
2899 err
= (*input_func
)(ifp
, m_head
, m_tail
, s
, poll
, current_thread());
2901 if (ifp
!= lo_ifp
) {
2902 /* Release the IO refcnt */
2903 ifnet_datamov_end(ifp
);
2911 dlil_output_handler(struct ifnet
*ifp
, struct mbuf
*m
)
2913 return ifp
->if_output(ifp
, m
);
2917 dlil_input_handler(struct ifnet
*ifp
, struct mbuf
*m_head
,
2918 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
2919 boolean_t poll
, struct thread
*tp
)
2921 struct dlil_threading_info
*inp
;
2922 u_int32_t m_cnt
= s
->packets_in
;
2923 u_int32_t m_size
= s
->bytes_in
;
2924 boolean_t notify
= FALSE
;
2926 if ((inp
= ifp
->if_inp
) == NULL
) {
2927 inp
= dlil_main_input_thread
;
2931 * If there is a matching DLIL input thread associated with an
2932 * affinity set, associate this thread with the same set. We
2933 * will only do this once.
2935 lck_mtx_lock_spin(&inp
->input_lck
);
2936 if (inp
!= dlil_main_input_thread
&& inp
->net_affinity
&& tp
!= NULL
&&
2937 ((!poll
&& inp
->wloop_thr
== THREAD_NULL
) ||
2938 (poll
&& inp
->poll_thr
== THREAD_NULL
))) {
2939 u_int32_t tag
= inp
->tag
;
2942 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2945 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2946 inp
->wloop_thr
= tp
;
2948 lck_mtx_unlock(&inp
->input_lck
);
2950 /* Associate the current thread with the new affinity tag */
2951 (void) dlil_affinity_set(tp
, tag
);
2954 * Take a reference on the current thread; during detach,
2955 * we will need to refer to it in order to tear down its
2958 thread_reference(tp
);
2959 lck_mtx_lock_spin(&inp
->input_lck
);
2962 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
2965 * Because of loopbacked multicast we cannot stuff the ifp in
2966 * the rcvif of the packet header: loopback (lo0) packets use a
2967 * dedicated list so that we can later associate them with lo_ifp
2968 * on their way up the stack. Packets for other interfaces without
2969 * dedicated input threads go to the regular list.
2971 if (m_head
!= NULL
) {
2972 classq_pkt_t head
, tail
;
2973 CLASSQ_PKT_INIT_MBUF(&head
, m_head
);
2974 CLASSQ_PKT_INIT_MBUF(&tail
, m_tail
);
2975 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
2976 struct dlil_main_threading_info
*inpm
=
2977 (struct dlil_main_threading_info
*)inp
;
2978 _addq_multi(&inpm
->lo_rcvq_pkts
, &head
, &tail
,
2981 _addq_multi(&inp
->rcvq_pkts
, &head
, &tail
,
2986 #if IFNET_INPUT_SANITY_CHK
2987 if (dlil_input_sanity_check
!= 0) {
2991 for (m0
= m_head
, count
= 0; m0
; m0
= mbuf_nextpkt(m0
)) {
2995 if (count
!= m_cnt
) {
2996 panic_plain("%s: invalid packet count %d "
2997 "(expected %d)\n", if_name(ifp
),
3002 inp
->input_mbuf_cnt
+= m_cnt
;
3004 #endif /* IFNET_INPUT_SANITY_CHK */
3006 dlil_input_stats_add(s
, inp
, ifp
, poll
);
3008 * If we're using the main input thread, synchronize the
3009 * stats now since we have the interface context. All
3010 * other cases involving dedicated input threads will
3011 * have their stats synchronized there.
3013 if (inp
== dlil_main_input_thread
) {
3014 notify
= dlil_input_stats_sync(ifp
, inp
);
3017 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
3018 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
3020 wakeup_one((caddr_t
)&inp
->input_waiting
);
3022 lck_mtx_unlock(&inp
->input_lck
);
3025 ifnet_notify_data_threshold(ifp
);
3033 ifnet_start_common(struct ifnet
*ifp
, boolean_t resetfc
)
3035 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3039 * If the starter thread is inactive, signal it to do work,
3040 * unless the interface is being flow controlled from below,
3041 * e.g. a virtual interface being flow controlled by a real
3042 * network interface beneath it, or it's been disabled via
3043 * a call to ifnet_disable_output().
3045 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3047 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
3048 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
3049 lck_mtx_unlock(&ifp
->if_start_lock
);
3052 ifp
->if_start_req
++;
3053 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
&&
3054 (resetfc
|| !(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) ||
3055 IFCQ_LEN(&ifp
->if_snd
) >= ifp
->if_start_delay_qlen
||
3056 ifp
->if_start_delayed
== 0)) {
3057 (void) thread_wakeup_thread((caddr_t
)&ifp
->if_start_thread
,
3058 ifp
->if_start_thread
);
3060 lck_mtx_unlock(&ifp
->if_start_lock
);
3064 ifnet_start(struct ifnet
*ifp
)
3066 ifnet_start_common(ifp
, FALSE
);
3069 __attribute__((noreturn
))
3071 ifnet_start_thread_func(void *v
, wait_result_t w
)
3074 struct ifnet
*ifp
= v
;
3075 char thread_name
[MAXTHREADNAMESIZE
];
3077 /* Construct the name for this thread, and then apply it. */
3078 bzero(thread_name
, sizeof(thread_name
));
3079 (void) snprintf(thread_name
, sizeof(thread_name
),
3080 "ifnet_start_%s", ifp
->if_xname
);
3081 ASSERT(ifp
->if_start_thread
== current_thread());
3082 thread_set_thread_name(current_thread(), thread_name
);
3085 * Treat the dedicated starter thread for lo0 as equivalent to
3086 * the driver workloop thread; if net_affinity is enabled for
3087 * the main input thread, associate this starter thread to it
3088 * by binding them with the same affinity tag. This is done
3089 * only once (as we only have one lo_ifp which never goes away.)
3091 if (ifp
== lo_ifp
) {
3092 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
3093 struct thread
*tp
= current_thread();
3095 lck_mtx_lock(&inp
->input_lck
);
3096 if (inp
->net_affinity
) {
3097 u_int32_t tag
= inp
->tag
;
3099 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
3100 VERIFY(inp
->poll_thr
== THREAD_NULL
);
3101 inp
->wloop_thr
= tp
;
3102 lck_mtx_unlock(&inp
->input_lck
);
3104 /* Associate this thread with the affinity tag */
3105 (void) dlil_affinity_set(tp
, tag
);
3107 lck_mtx_unlock(&inp
->input_lck
);
3110 ifnet_decr_pending_thread_count(ifp
);
3112 lck_mtx_lock(&ifp
->if_start_lock
);
3113 VERIFY(!ifp
->if_start_active
);
3114 (void) assert_wait(&ifp
->if_start_thread
, THREAD_UNINT
);
3115 lck_mtx_unlock(&ifp
->if_start_lock
);
3116 (void) thread_block_parameter(ifnet_start_thread_cont
, ifp
);
3118 __builtin_unreachable();
3121 __attribute__((noreturn
))
3123 ifnet_start_thread_cont(void *v
, wait_result_t wres
)
3125 struct ifnet
*ifp
= v
;
3126 struct ifclassq
*ifq
= &ifp
->if_snd
;
3128 lck_mtx_lock(&ifp
->if_start_lock
);
3129 if (__improbable(wres
== THREAD_INTERRUPTED
||
3130 ifp
->if_start_thread
== THREAD_NULL
)) {
3134 ifp
->if_start_active
= 1;
3137 * Keep on servicing until no more request.
3140 u_int32_t req
= ifp
->if_start_req
;
3141 if (!IFCQ_IS_EMPTY(ifq
) &&
3142 (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3143 ifp
->if_start_delayed
== 0 &&
3144 IFCQ_LEN(ifq
) < ifp
->if_start_delay_qlen
&&
3145 (ifp
->if_eflags
& IFEF_DELAY_START
)) {
3146 ifp
->if_start_delayed
= 1;
3147 ifnet_start_delayed
++;
3150 ifp
->if_start_delayed
= 0;
3152 lck_mtx_unlock(&ifp
->if_start_lock
);
3155 * If no longer attached, don't call start because ifp
3156 * is being destroyed; else hold an IO refcnt to
3157 * prevent the interface from being detached (will be
3160 if (!ifnet_datamov_begin(ifp
)) {
3161 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3165 /* invoke the driver's start routine */
3166 ((*ifp
->if_start
)(ifp
));
3169 * Release the io ref count taken above.
3171 ifnet_datamov_end(ifp
);
3173 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3176 * If there's no pending request or if the
3177 * interface has been disabled, we're done.
3179 if (req
== ifp
->if_start_req
||
3180 (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
)) {
3185 ifp
->if_start_req
= 0;
3186 ifp
->if_start_active
= 0;
3189 if (__probable(ifp
->if_start_thread
!= THREAD_NULL
)) {
3190 uint64_t deadline
= TIMEOUT_WAIT_FOREVER
;
3191 struct timespec delay_start_ts
;
3192 struct timespec
*ts
;
3195 * Wakeup N ns from now if rate-controlled by TBR, and if
3196 * there are still packets in the send queue which haven't
3197 * been dequeued so far; else sleep indefinitely (ts = NULL)
3198 * until ifnet_start() is called again.
3200 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
3201 &ifp
->if_start_cycle
: NULL
);
3203 if (ts
== NULL
&& ifp
->if_start_delayed
== 1) {
3204 delay_start_ts
.tv_sec
= 0;
3205 delay_start_ts
.tv_nsec
= ifp
->if_start_delay_timeout
;
3206 ts
= &delay_start_ts
;
3209 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0) {
3213 if (__improbable(ts
!= NULL
)) {
3214 clock_interval_to_deadline((ts
->tv_nsec
+
3215 (ts
->tv_sec
* NSEC_PER_SEC
)), 1, &deadline
);
3218 (void) assert_wait_deadline(&ifp
->if_start_thread
,
3219 THREAD_UNINT
, deadline
);
3220 lck_mtx_unlock(&ifp
->if_start_lock
);
3221 (void) thread_block_parameter(ifnet_start_thread_cont
, ifp
);
3225 /* interface is detached? */
3226 ifnet_set_start_cycle(ifp
, NULL
);
3227 lck_mtx_unlock(&ifp
->if_start_lock
);
3231 DLIL_PRINTF("%s: starter thread terminated\n",
3235 /* for the extra refcnt from kernel_thread_start() */
3236 thread_deallocate(current_thread());
3237 /* this is the end */
3238 thread_terminate(current_thread());
3242 /* must never get here */
3245 __builtin_unreachable();
3249 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3252 bzero(&ifp
->if_start_cycle
, sizeof(ifp
->if_start_cycle
));
3254 *(&ifp
->if_start_cycle
) = *ts
;
3257 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
) {
3258 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
3259 if_name(ifp
), ts
->tv_nsec
);
3264 ifnet_poll(struct ifnet
*ifp
)
3267 * If the poller thread is inactive, signal it to do work.
3269 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3271 if (!(ifp
->if_poll_flags
& IF_POLLF_RUNNING
) &&
3272 ifp
->if_poll_thread
!= THREAD_NULL
) {
3273 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
3275 lck_mtx_unlock(&ifp
->if_poll_lock
);
3278 __attribute__((noreturn
))
3280 ifnet_poll_thread_func(void *v
, wait_result_t w
)
3283 char thread_name
[MAXTHREADNAMESIZE
];
3284 struct ifnet
*ifp
= v
;
3286 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
3287 VERIFY(current_thread() == ifp
->if_poll_thread
);
3289 /* construct the name for this thread, and then apply it */
3290 bzero(thread_name
, sizeof(thread_name
));
3291 (void) snprintf(thread_name
, sizeof(thread_name
),
3292 "ifnet_poller_%s", ifp
->if_xname
);
3293 thread_set_thread_name(ifp
->if_poll_thread
, thread_name
);
3294 ifnet_decr_pending_thread_count(ifp
);
3296 lck_mtx_lock(&ifp
->if_poll_lock
);
3297 (void) assert_wait(&ifp
->if_poll_thread
, THREAD_UNINT
);
3298 lck_mtx_unlock(&ifp
->if_poll_lock
);
3299 (void) thread_block_parameter(ifnet_poll_thread_cont
, ifp
);
3301 __builtin_unreachable();
3304 __attribute__((noreturn
))
3306 ifnet_poll_thread_cont(void *v
, wait_result_t wres
)
3308 struct dlil_threading_info
*inp
;
3309 struct ifnet
*ifp
= v
;
3310 struct ifnet_stat_increment_param s
;
3311 struct timespec start_time
;
3313 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
3315 bzero(&s
, sizeof(s
));
3316 net_timerclear(&start_time
);
3318 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3319 if (__improbable(wres
== THREAD_INTERRUPTED
||
3320 ifp
->if_poll_thread
== THREAD_NULL
)) {
3325 VERIFY(inp
!= NULL
);
3327 ifp
->if_poll_flags
|= IF_POLLF_RUNNING
;
3330 * Keep on servicing until no more request.
3333 struct mbuf
*m_head
, *m_tail
;
3334 u_int32_t m_lim
, m_cnt
, m_totlen
;
3335 u_int16_t req
= ifp
->if_poll_req
;
3337 m_lim
= (ifp
->if_rxpoll_plim
!= 0) ? ifp
->if_rxpoll_plim
:
3338 MAX((qlimit(&inp
->rcvq_pkts
)), (ifp
->if_rxpoll_phiwat
<< 2));
3339 lck_mtx_unlock(&ifp
->if_poll_lock
);
3342 * If no longer attached, there's nothing to do;
3343 * else hold an IO refcnt to prevent the interface
3344 * from being detached (will be released below.)
3346 if (!ifnet_is_attached(ifp
, 1)) {
3347 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3351 if (dlil_verbose
> 1) {
3352 DLIL_PRINTF("%s: polling up to %d pkts, "
3353 "pkts avg %d max %d, wreq avg %d, "
3355 if_name(ifp
), m_lim
,
3356 ifp
->if_rxpoll_pavg
, ifp
->if_rxpoll_pmax
,
3357 ifp
->if_rxpoll_wavg
, ifp
->if_rxpoll_bavg
);
3360 /* invoke the driver's input poll routine */
3361 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
3362 &m_cnt
, &m_totlen
));
3364 if (m_head
!= NULL
) {
3365 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
3367 if (dlil_verbose
> 1) {
3368 DLIL_PRINTF("%s: polled %d pkts, "
3369 "pkts avg %d max %d, wreq avg %d, "
3371 if_name(ifp
), m_cnt
,
3372 ifp
->if_rxpoll_pavg
, ifp
->if_rxpoll_pmax
,
3373 ifp
->if_rxpoll_wavg
, ifp
->if_rxpoll_bavg
);
3376 /* stats are required for extended variant */
3377 s
.packets_in
= m_cnt
;
3378 s
.bytes_in
= m_totlen
;
3380 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
3383 if (dlil_verbose
> 1) {
3384 DLIL_PRINTF("%s: no packets, "
3385 "pkts avg %d max %d, wreq avg %d, "
3387 if_name(ifp
), ifp
->if_rxpoll_pavg
,
3388 ifp
->if_rxpoll_pmax
, ifp
->if_rxpoll_wavg
,
3389 ifp
->if_rxpoll_bavg
);
3392 (void) ifnet_input_common(ifp
, NULL
, NULL
,
3396 /* Release the io ref count */
3397 ifnet_decr_iorefcnt(ifp
);
3399 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3401 /* if there's no pending request, we're done */
3402 if (req
== ifp
->if_poll_req
||
3403 ifp
->if_poll_thread
== THREAD_NULL
) {
3408 ifp
->if_poll_req
= 0;
3409 ifp
->if_poll_flags
&= ~IF_POLLF_RUNNING
;
3411 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
3412 uint64_t deadline
= TIMEOUT_WAIT_FOREVER
;
3413 struct timespec
*ts
;
3416 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3417 * until ifnet_poll() is called again.
3419 ts
= &ifp
->if_poll_cycle
;
3420 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0) {
3425 clock_interval_to_deadline((ts
->tv_nsec
+
3426 (ts
->tv_sec
* NSEC_PER_SEC
)), 1, &deadline
);
3429 (void) assert_wait_deadline(&ifp
->if_poll_thread
,
3430 THREAD_UNINT
, deadline
);
3431 lck_mtx_unlock(&ifp
->if_poll_lock
);
3432 (void) thread_block_parameter(ifnet_poll_thread_cont
, ifp
);
3436 /* interface is detached (maybe while asleep)? */
3437 ifnet_set_poll_cycle(ifp
, NULL
);
3438 lck_mtx_unlock(&ifp
->if_poll_lock
);
3441 DLIL_PRINTF("%s: poller thread terminated\n",
3445 /* for the extra refcnt from kernel_thread_start() */
3446 thread_deallocate(current_thread());
3447 /* this is the end */
3448 thread_terminate(current_thread());
3452 /* must never get here */
3455 __builtin_unreachable();
3459 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3462 bzero(&ifp
->if_poll_cycle
, sizeof(ifp
->if_poll_cycle
));
3464 *(&ifp
->if_poll_cycle
) = *ts
;
3467 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
) {
3468 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
3469 if_name(ifp
), ts
->tv_nsec
);
3474 ifnet_purge(struct ifnet
*ifp
)
3476 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
)) {
3482 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
3484 IFCQ_LOCK_ASSERT_HELD(ifq
);
3486 if (!(IFCQ_IS_READY(ifq
))) {
3490 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
3491 struct tb_profile tb
= {
3492 .rate
= ifq
->ifcq_tbr
.tbr_rate_raw
,
3493 .percent
= ifq
->ifcq_tbr
.tbr_percent
, .depth
= 0
3495 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
3498 ifclassq_update(ifq
, ev
);
3502 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
3505 case CLASSQ_EV_LINK_BANDWIDTH
:
3506 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
3507 ifp
->if_poll_update
++;
3517 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
3519 struct ifclassq
*ifq
;
3523 if (ifp
== NULL
|| model
>= IFNET_SCHED_MODEL_MAX
) {
3525 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3531 omodel
= ifp
->if_output_sched_model
;
3532 ifp
->if_output_sched_model
= model
;
3533 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0) {
3534 ifp
->if_output_sched_model
= omodel
;
3542 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3546 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3550 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
3556 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3558 if (ifp
== NULL
|| maxqlen
== NULL
) {
3560 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3564 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
3570 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
3574 if (ifp
== NULL
|| pkts
== NULL
) {
3576 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3579 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
3587 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3588 u_int32_t
*pkts
, u_int32_t
*bytes
)
3592 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
3593 (pkts
== NULL
&& bytes
== NULL
)) {
3595 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3598 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
3605 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3607 struct dlil_threading_info
*inp
;
3611 } else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
) {
3616 maxqlen
= if_rcvq_maxlen
;
3617 } else if (maxqlen
< IF_RCVQ_MINLEN
) {
3618 maxqlen
= IF_RCVQ_MINLEN
;
3622 lck_mtx_lock(&inp
->input_lck
);
3623 qlimit(&inp
->rcvq_pkts
) = maxqlen
;
3624 lck_mtx_unlock(&inp
->input_lck
);
3630 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3632 struct dlil_threading_info
*inp
;
3634 if (ifp
== NULL
|| maxqlen
== NULL
) {
3636 } else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
) {
3641 lck_mtx_lock(&inp
->input_lck
);
3642 *maxqlen
= qlimit(&inp
->rcvq_pkts
);
3643 lck_mtx_unlock(&inp
->input_lck
);
3648 ifnet_enqueue_multi_setup(struct ifnet
*ifp
, uint16_t delay_qlen
,
3649 uint16_t delay_timeout
)
3651 if (delay_qlen
> 0 && delay_timeout
> 0) {
3652 ifp
->if_eflags
|= IFEF_ENQUEUE_MULTI
;
3653 ifp
->if_start_delay_qlen
= min(100, delay_qlen
);
3654 ifp
->if_start_delay_timeout
= min(20000, delay_timeout
);
3655 /* convert timeout to nanoseconds */
3656 ifp
->if_start_delay_timeout
*= 1000;
3657 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3658 ifp
->if_xname
, (uint32_t)delay_qlen
,
3659 (uint32_t)delay_timeout
);
3661 ifp
->if_eflags
&= ~IFEF_ENQUEUE_MULTI
;
3666 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
3667 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
3668 * buf holds the full header.
3670 static __attribute__((noinline
)) void
3671 ifnet_mcast_clear_dscp(uint8_t *buf
, uint8_t ip_ver
)
3674 struct ip6_hdr
*ip6
;
3675 uint8_t lbuf
[64] __attribute__((aligned(8)));
3678 if (ip_ver
== IPVERSION
) {
3682 if (__improbable(!IP_HDR_ALIGNED_P(p
))) {
3683 DTRACE_IP1(not__aligned__v4
, uint8_t *, buf
);
3684 bcopy(buf
, lbuf
, sizeof(struct ip
));
3687 ip
= (struct ip
*)(void *)p
;
3688 if (__probable((ip
->ip_tos
& ~IPTOS_ECN_MASK
) == 0)) {
3692 DTRACE_IP1(clear__v4
, struct ip
*, ip
);
3693 old_tos
= ip
->ip_tos
;
3694 ip
->ip_tos
&= IPTOS_ECN_MASK
;
3695 sum
= ip
->ip_sum
+ htons(old_tos
) - htons(ip
->ip_tos
);
3696 sum
= (sum
>> 16) + (sum
& 0xffff);
3697 ip
->ip_sum
= (uint16_t)(sum
& 0xffff);
3699 if (__improbable(p
== lbuf
)) {
3700 bcopy(lbuf
, buf
, sizeof(struct ip
));
3704 ASSERT(ip_ver
== IPV6_VERSION
);
3706 if (__improbable(!IP_HDR_ALIGNED_P(p
))) {
3707 DTRACE_IP1(not__aligned__v6
, uint8_t *, buf
);
3708 bcopy(buf
, lbuf
, sizeof(struct ip6_hdr
));
3711 ip6
= (struct ip6_hdr
*)(void *)p
;
3712 flow
= ntohl(ip6
->ip6_flow
);
3713 if (__probable((flow
& IP6FLOW_DSCP_MASK
) == 0)) {
3717 DTRACE_IP1(clear__v6
, struct ip6_hdr
*, ip6
);
3718 ip6
->ip6_flow
= htonl(flow
& ~IP6FLOW_DSCP_MASK
);
3720 if (__improbable(p
== lbuf
)) {
3721 bcopy(lbuf
, buf
, sizeof(struct ip6_hdr
));
3726 static inline errno_t
3727 ifnet_enqueue_ifclassq(struct ifnet
*ifp
, classq_pkt_t
*p
, boolean_t flush
,
3730 volatile uint64_t *fg_ts
= NULL
;
3731 volatile uint64_t *rt_ts
= NULL
;
3732 struct timespec now
;
3733 u_int64_t now_nsec
= 0;
3735 uint8_t *mcast_buf
= NULL
;
3738 ASSERT(ifp
->if_eflags
& IFEF_TXSTART
);
3741 * If packet already carries a timestamp, either from dlil_output()
3742 * or from flowswitch, use it here. Otherwise, record timestamp.
3743 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3744 * the timestamp value is used internally there.
3746 switch (p
->cp_ptype
) {
3748 ASSERT(p
->cp_mbuf
->m_flags
& M_PKTHDR
);
3749 ASSERT(p
->cp_mbuf
->m_nextpkt
== NULL
);
3751 if (!(p
->cp_mbuf
->m_pkthdr
.pkt_flags
& PKTF_TS_VALID
) ||
3752 p
->cp_mbuf
->m_pkthdr
.pkt_timestamp
== 0) {
3754 net_timernsec(&now
, &now_nsec
);
3755 p
->cp_mbuf
->m_pkthdr
.pkt_timestamp
= now_nsec
;
3757 p
->cp_mbuf
->m_pkthdr
.pkt_flags
&= ~PKTF_TS_VALID
;
3759 * If the packet service class is not background,
3760 * update the timestamp to indicate recent activity
3761 * on a foreground socket.
3763 if ((p
->cp_mbuf
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
3764 p
->cp_mbuf
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
) {
3765 if (!(p
->cp_mbuf
->m_pkthdr
.pkt_flags
&
3766 PKTF_SO_BACKGROUND
)) {
3767 ifp
->if_fg_sendts
= _net_uptime
;
3768 if (fg_ts
!= NULL
) {
3769 *fg_ts
= _net_uptime
;
3772 if (p
->cp_mbuf
->m_pkthdr
.pkt_flags
& PKTF_SO_REALTIME
) {
3773 ifp
->if_rt_sendts
= _net_uptime
;
3774 if (rt_ts
!= NULL
) {
3775 *rt_ts
= _net_uptime
;
3781 * Some Wi-Fi AP implementations do not correctly handle
3782 * multicast IP packets with DSCP bits set (radr://9331522).
3783 * As a workaround we clear the DSCP bits and set the service
3786 if ((p
->cp_mbuf
->m_flags
& M_MCAST
) != 0 &&
3787 IFNET_IS_WIFI_INFRA(ifp
)) {
3788 size_t len
= mbuf_len(p
->cp_mbuf
), hlen
;
3789 struct ether_header
*eh
;
3790 boolean_t pullup
= FALSE
;
3793 if (__improbable(len
< sizeof(struct ether_header
))) {
3794 DTRACE_IP1(small__ether
, size_t, len
);
3795 if ((p
->cp_mbuf
= m_pullup(p
->cp_mbuf
,
3796 sizeof(struct ether_header
))) == NULL
) {
3800 eh
= (struct ether_header
*)mbuf_data(p
->cp_mbuf
);
3801 etype
= ntohs(eh
->ether_type
);
3802 if (etype
== ETHERTYPE_IP
) {
3803 hlen
= sizeof(struct ether_header
) +
3806 DTRACE_IP1(small__v4
, size_t, len
);
3810 } else if (etype
== ETHERTYPE_IPV6
) {
3811 hlen
= sizeof(struct ether_header
) +
3812 sizeof(struct ip6_hdr
);
3814 DTRACE_IP1(small__v6
, size_t, len
);
3817 ip_ver
= IPV6_VERSION
;
3819 DTRACE_IP1(invalid__etype
, uint16_t, etype
);
3823 if ((p
->cp_mbuf
= m_pullup(p
->cp_mbuf
, hlen
)) ==
3828 eh
= (struct ether_header
*)mbuf_data(
3831 mbuf_set_service_class(p
->cp_mbuf
, MBUF_SC_BE
);
3832 mcast_buf
= (uint8_t *)(eh
+ 1);
3834 * ifnet_mcast_clear_dscp() will finish the work below.
3835 * Note that the pullups above ensure that mcast_buf
3836 * points to a full IP header.
3845 __builtin_unreachable();
3848 if (mcast_buf
!= NULL
) {
3849 ifnet_mcast_clear_dscp(mcast_buf
, ip_ver
);
3852 if (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) {
3853 if (now_nsec
== 0) {
3855 net_timernsec(&now
, &now_nsec
);
3858 * If the driver chose to delay start callback for
3859 * coalescing multiple packets, Then use the following
3860 * heuristics to make sure that start callback will
3861 * be delayed only when bulk data transfer is detected.
3862 * 1. number of packets enqueued in (delay_win * 2) is
3863 * greater than or equal to the delay qlen.
3864 * 2. If delay_start is enabled it will stay enabled for
3865 * another 10 idle windows. This is to take into account
3866 * variable RTT and burst traffic.
3867 * 3. If the time elapsed since last enqueue is more
3868 * than 200ms we disable delaying start callback. This is
3869 * is to take idle time into account.
3871 u_int64_t dwin
= (ifp
->if_start_delay_timeout
<< 1);
3872 if (ifp
->if_start_delay_swin
> 0) {
3873 if ((ifp
->if_start_delay_swin
+ dwin
) > now_nsec
) {
3874 ifp
->if_start_delay_cnt
++;
3875 } else if ((now_nsec
- ifp
->if_start_delay_swin
)
3876 >= (200 * 1000 * 1000)) {
3877 ifp
->if_start_delay_swin
= now_nsec
;
3878 ifp
->if_start_delay_cnt
= 1;
3879 ifp
->if_start_delay_idle
= 0;
3880 if (ifp
->if_eflags
& IFEF_DELAY_START
) {
3882 ~(IFEF_DELAY_START
);
3883 ifnet_delay_start_disabled
++;
3886 if (ifp
->if_start_delay_cnt
>=
3887 ifp
->if_start_delay_qlen
) {
3888 ifp
->if_eflags
|= IFEF_DELAY_START
;
3889 ifp
->if_start_delay_idle
= 0;
3891 if (ifp
->if_start_delay_idle
>= 10) {
3893 ~(IFEF_DELAY_START
);
3894 ifnet_delay_start_disabled
++;
3896 ifp
->if_start_delay_idle
++;
3899 ifp
->if_start_delay_swin
= now_nsec
;
3900 ifp
->if_start_delay_cnt
= 1;
3903 ifp
->if_start_delay_swin
= now_nsec
;
3904 ifp
->if_start_delay_cnt
= 1;
3905 ifp
->if_start_delay_idle
= 0;
3906 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3909 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3912 /* enqueue the packet (caller consumes object) */
3913 error
= ifclassq_enqueue(&ifp
->if_snd
, p
, pdrop
);
3916 * Tell the driver to start dequeueing; do this even when the queue
3917 * for the packet is suspended (EQSUSPENDED), as the driver could still
3918 * be dequeueing from other unsuspended queues.
3920 if (!(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3921 ((error
== 0 && flush
) || error
== EQFULL
|| error
== EQSUSPENDED
)) {
3929 ifnet_enqueue_netem(void *handle
, pktsched_pkt_t
*pkts
, uint32_t n_pkts
)
3931 struct ifnet
*ifp
= handle
;
3932 boolean_t pdrop
; /* dummy */
3935 ASSERT(n_pkts
>= 1);
3936 for (i
= 0; i
< n_pkts
- 1; i
++) {
3937 (void) ifnet_enqueue_ifclassq(ifp
, &pkts
[i
].pktsched_pkt
,
3940 /* flush with the last packet */
3941 (void) ifnet_enqueue_ifclassq(ifp
, &pkts
[i
].pktsched_pkt
, TRUE
, &pdrop
);
3946 static inline errno_t
3947 ifnet_enqueue_common(struct ifnet
*ifp
, classq_pkt_t
*pkt
, boolean_t flush
,
3950 if (ifp
->if_output_netem
!= NULL
) {
3951 return netem_enqueue(ifp
->if_output_netem
, pkt
, pdrop
);
3953 return ifnet_enqueue_ifclassq(ifp
, pkt
, flush
, pdrop
);
3958 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
3961 return ifnet_enqueue_mbuf(ifp
, m
, TRUE
, &pdrop
);
3965 ifnet_enqueue_mbuf(struct ifnet
*ifp
, struct mbuf
*m
, boolean_t flush
,
3970 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
3971 m
->m_nextpkt
!= NULL
) {
3977 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3978 !IF_FULLY_ATTACHED(ifp
)) {
3979 /* flag tested without lock for performance */
3983 } else if (!(ifp
->if_flags
& IFF_UP
)) {
3989 CLASSQ_PKT_INIT_MBUF(&pkt
, m
);
3990 return ifnet_enqueue_common(ifp
, &pkt
, flush
, pdrop
);
3995 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
3998 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
4000 if (ifp
== NULL
|| mp
== NULL
) {
4002 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4003 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4006 if (!ifnet_is_attached(ifp
, 1)) {
4010 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
4011 &pkt
, NULL
, NULL
, NULL
);
4012 VERIFY((pkt
.cp_ptype
== QP_MBUF
) || (pkt
.cp_mbuf
== NULL
));
4013 ifnet_decr_iorefcnt(ifp
);
4019 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
4023 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
4025 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
)) {
4027 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4028 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4031 if (!ifnet_is_attached(ifp
, 1)) {
4035 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1,
4036 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &pkt
, NULL
, NULL
, NULL
);
4037 VERIFY((pkt
.cp_ptype
== QP_MBUF
) || (pkt
.cp_mbuf
== NULL
));
4038 ifnet_decr_iorefcnt(ifp
);
4044 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t pkt_limit
,
4045 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
4048 classq_pkt_t pkt_head
= CLASSQ_PKT_INITIALIZER(pkt_head
);
4049 classq_pkt_t pkt_tail
= CLASSQ_PKT_INITIALIZER(pkt_tail
);
4051 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1) {
4053 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4054 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4057 if (!ifnet_is_attached(ifp
, 1)) {
4061 rc
= ifclassq_dequeue(&ifp
->if_snd
, pkt_limit
,
4062 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &pkt_head
, &pkt_tail
, cnt
, len
);
4063 VERIFY((pkt_head
.cp_ptype
== QP_MBUF
) || (pkt_head
.cp_mbuf
== NULL
));
4064 ifnet_decr_iorefcnt(ifp
);
4065 *head
= pkt_head
.cp_mbuf
;
4067 *tail
= pkt_tail
.cp_mbuf
;
4073 ifnet_dequeue_multi_bytes(struct ifnet
*ifp
, u_int32_t byte_limit
,
4074 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
4077 classq_pkt_t pkt_head
= CLASSQ_PKT_INITIALIZER(pkt_head
);
4078 classq_pkt_t pkt_tail
= CLASSQ_PKT_INITIALIZER(pkt_tail
);
4080 if (ifp
== NULL
|| head
== NULL
|| byte_limit
< 1) {
4082 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4083 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4086 if (!ifnet_is_attached(ifp
, 1)) {
4090 rc
= ifclassq_dequeue(&ifp
->if_snd
, CLASSQ_DEQUEUE_MAX_PKT_LIMIT
,
4091 byte_limit
, &pkt_head
, &pkt_tail
, cnt
, len
);
4092 VERIFY((pkt_head
.cp_ptype
== QP_MBUF
) || (pkt_head
.cp_mbuf
== NULL
));
4093 ifnet_decr_iorefcnt(ifp
);
4094 *head
= pkt_head
.cp_mbuf
;
4096 *tail
= pkt_tail
.cp_mbuf
;
4102 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
4103 u_int32_t pkt_limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
4107 classq_pkt_t pkt_head
= CLASSQ_PKT_INITIALIZER(pkt_head
);
4108 classq_pkt_t pkt_tail
= CLASSQ_PKT_INITIALIZER(pkt_tail
);
4110 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1 ||
4111 !MBUF_VALID_SC(sc
)) {
4113 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4114 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4117 if (!ifnet_is_attached(ifp
, 1)) {
4121 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, pkt_limit
,
4122 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &pkt_head
, &pkt_tail
,
4124 VERIFY((pkt_head
.cp_ptype
== QP_MBUF
) || (pkt_head
.cp_mbuf
== NULL
));
4125 ifnet_decr_iorefcnt(ifp
);
4126 *head
= pkt_head
.cp_mbuf
;
4128 *tail
= pkt_tail
.cp_mbuf
;
4133 #if !CONFIG_EMBEDDED
4135 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
4136 const struct sockaddr
*dest
, const char *dest_linkaddr
,
4137 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
4146 return ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
);
4148 #endif /* !CONFIG_EMBEDDED */
4151 packet_has_vlan_tag(struct mbuf
* m
)
4155 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) != 0) {
4156 tag
= EVL_VLANOFTAG(m
->m_pkthdr
.vlan_tag
);
4158 /* the packet is just priority-tagged, clear the bit */
4159 m
->m_pkthdr
.csum_flags
&= ~CSUM_VLAN_TAG_VALID
;
4166 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
4167 char **frame_header_p
, protocol_family_t protocol_family
)
4169 boolean_t is_vlan_packet
= FALSE
;
4170 struct ifnet_filter
*filter
;
4171 struct mbuf
*m
= *m_p
;
4173 is_vlan_packet
= packet_has_vlan_tag(m
);
4176 * Pass the inbound packet to the interface filters
4178 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4179 /* prevent filter list from changing in case we drop the lock */
4180 if_flt_monitor_busy(ifp
);
4181 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4184 /* exclude VLAN packets from external filters PR-3586856 */
4185 if (is_vlan_packet
&&
4186 (filter
->filt_flags
& DLIL_IFF_INTERNAL
) == 0) {
4190 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
4191 (filter
->filt_protocol
== 0 ||
4192 filter
->filt_protocol
== protocol_family
)) {
4193 lck_mtx_unlock(&ifp
->if_flt_lock
);
4195 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
4196 ifp
, protocol_family
, m_p
, frame_header_p
);
4198 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4200 /* we're done with the filter list */
4201 if_flt_monitor_unbusy(ifp
);
4202 lck_mtx_unlock(&ifp
->if_flt_lock
);
4207 /* we're done with the filter list */
4208 if_flt_monitor_unbusy(ifp
);
4209 lck_mtx_unlock(&ifp
->if_flt_lock
);
4212 * Strip away M_PROTO1 bit prior to sending packet up the stack as
4213 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
4216 (*m_p
)->m_flags
&= ~M_PROTO1
;
4223 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
4224 protocol_family_t protocol_family
)
4226 boolean_t is_vlan_packet
;
4227 struct ifnet_filter
*filter
;
4228 struct mbuf
*m
= *m_p
;
4230 is_vlan_packet
= packet_has_vlan_tag(m
);
4233 * Pass the outbound packet to the interface filters
4235 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4236 /* prevent filter list from changing in case we drop the lock */
4237 if_flt_monitor_busy(ifp
);
4238 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4241 /* exclude VLAN packets from external filters PR-3586856 */
4242 if (is_vlan_packet
&&
4243 (filter
->filt_flags
& DLIL_IFF_INTERNAL
) == 0) {
4247 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
4248 (filter
->filt_protocol
== 0 ||
4249 filter
->filt_protocol
== protocol_family
)) {
4250 lck_mtx_unlock(&ifp
->if_flt_lock
);
4252 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
4253 protocol_family
, m_p
);
4255 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4257 /* we're done with the filter list */
4258 if_flt_monitor_unbusy(ifp
);
4259 lck_mtx_unlock(&ifp
->if_flt_lock
);
4264 /* we're done with the filter list */
4265 if_flt_monitor_unbusy(ifp
);
4266 lck_mtx_unlock(&ifp
->if_flt_lock
);
4272 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
4276 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
4277 /* Version 1 protocols get one packet at a time */
4279 char * frame_header
;
4282 next_packet
= m
->m_nextpkt
;
4283 m
->m_nextpkt
= NULL
;
4284 frame_header
= m
->m_pkthdr
.pkt_hdr
;
4285 m
->m_pkthdr
.pkt_hdr
= NULL
;
4286 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
4287 ifproto
->protocol_family
, m
, frame_header
);
4288 if (error
!= 0 && error
!= EJUSTRETURN
) {
4293 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
4294 /* Version 2 protocols support packet lists */
4295 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
4296 ifproto
->protocol_family
, m
);
4297 if (error
!= 0 && error
!= EJUSTRETURN
) {
4304 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
4305 struct dlil_threading_info
*inp
, struct ifnet
*ifp
, boolean_t poll
)
4307 struct ifnet_stat_increment_param
*d
= &inp
->stats
;
4309 if (s
->packets_in
!= 0) {
4310 d
->packets_in
+= s
->packets_in
;
4312 if (s
->bytes_in
!= 0) {
4313 d
->bytes_in
+= s
->bytes_in
;
4315 if (s
->errors_in
!= 0) {
4316 d
->errors_in
+= s
->errors_in
;
4319 if (s
->packets_out
!= 0) {
4320 d
->packets_out
+= s
->packets_out
;
4322 if (s
->bytes_out
!= 0) {
4323 d
->bytes_out
+= s
->bytes_out
;
4325 if (s
->errors_out
!= 0) {
4326 d
->errors_out
+= s
->errors_out
;
4329 if (s
->collisions
!= 0) {
4330 d
->collisions
+= s
->collisions
;
4332 if (s
->dropped
!= 0) {
4333 d
->dropped
+= s
->dropped
;
4337 PKTCNTR_ADD(&ifp
->if_poll_tstats
, s
->packets_in
, s
->bytes_in
);
4342 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
4344 struct ifnet_stat_increment_param
*s
= &inp
->stats
;
4347 * Use of atomic operations is unavoidable here because
4348 * these stats may also be incremented elsewhere via KPIs.
4350 if (s
->packets_in
!= 0) {
4351 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
4354 if (s
->bytes_in
!= 0) {
4355 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
4358 if (s
->errors_in
!= 0) {
4359 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
4363 if (s
->packets_out
!= 0) {
4364 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
4367 if (s
->bytes_out
!= 0) {
4368 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
4371 if (s
->errors_out
!= 0) {
4372 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
4376 if (s
->collisions
!= 0) {
4377 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
4380 if (s
->dropped
!= 0) {
4381 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
4386 * No need for atomic operations as they are modified here
4387 * only from within the DLIL input thread context.
4389 if (ifp
->if_poll_tstats
.packets
!= 0) {
4390 ifp
->if_poll_pstats
.ifi_poll_packets
+= ifp
->if_poll_tstats
.packets
;
4391 ifp
->if_poll_tstats
.packets
= 0;
4393 if (ifp
->if_poll_tstats
.bytes
!= 0) {
4394 ifp
->if_poll_pstats
.ifi_poll_bytes
+= ifp
->if_poll_tstats
.bytes
;
4395 ifp
->if_poll_tstats
.bytes
= 0;
4398 return ifp
->if_data_threshold
!= 0;
4401 __private_extern__
void
4402 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
4404 return dlil_input_packet_list_common(ifp
, m
, 0,
4405 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
);
4408 __private_extern__
void
4409 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
4410 u_int32_t cnt
, ifnet_model_t mode
)
4412 return dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
);
4416 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
4417 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
4420 protocol_family_t protocol_family
;
4422 ifnet_t ifp
= ifp_param
;
4423 char *frame_header
= NULL
;
4424 struct if_proto
*last_ifproto
= NULL
;
4425 mbuf_t pkt_first
= NULL
;
4426 mbuf_t
*pkt_next
= NULL
;
4427 u_int32_t poll_thresh
= 0, poll_ival
= 0;
4429 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4431 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
4432 (poll_ival
= if_rxpoll_interval_pkts
) > 0) {
4437 struct if_proto
*ifproto
= NULL
;
4439 uint32_t pktf_mask
; /* pkt flags to preserve */
4441 if (ifp_param
== NULL
) {
4442 ifp
= m
->m_pkthdr
.rcvif
;
4445 if ((ifp
->if_eflags
& IFEF_RXPOLL
) &&
4446 (ifp
->if_xflags
& IFXF_LEGACY
) && poll_thresh
!= 0 &&
4447 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0) {
4451 /* Check if this mbuf looks valid */
4452 MBUF_INPUT_CHECK(m
, ifp
);
4454 next_packet
= m
->m_nextpkt
;
4455 m
->m_nextpkt
= NULL
;
4456 frame_header
= m
->m_pkthdr
.pkt_hdr
;
4457 m
->m_pkthdr
.pkt_hdr
= NULL
;
4460 * Get an IO reference count if the interface is not
4461 * loopback (lo0) and it is attached; lo0 never goes
4462 * away, so optimize for that.
4464 if (ifp
!= lo_ifp
) {
4465 if (!ifnet_datamov_begin(ifp
)) {
4471 * Preserve the time stamp if it was set.
4473 pktf_mask
= PKTF_TS_VALID
;
4476 * If this arrived on lo0, preserve interface addr
4477 * info to allow for connectivity between loopback
4478 * and local interface addresses.
4480 pktf_mask
= (PKTF_LOOP
| PKTF_IFAINFO
);
4483 /* make sure packet comes in clean */
4484 m_classifier_init(m
, pktf_mask
);
4486 ifp_inc_traffic_class_in(ifp
, m
);
4488 /* find which protocol family this packet is for */
4489 ifnet_lock_shared(ifp
);
4490 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
4492 ifnet_lock_done(ifp
);
4494 if (error
== EJUSTRETURN
) {
4497 protocol_family
= 0;
4500 pktap_input(ifp
, protocol_family
, m
, frame_header
);
4502 /* Drop v4 packets received on CLAT46 enabled interface */
4503 if (protocol_family
== PF_INET
&& IS_INTF_CLAT46(ifp
)) {
4505 ip6stat
.ip6s_clat464_in_v4_drop
++;
4509 /* Translate the packet if it is received on CLAT interface */
4510 if (protocol_family
== PF_INET6
&& IS_INTF_CLAT46(ifp
)
4511 && dlil_is_clat_needed(protocol_family
, m
)) {
4513 struct ether_header eh
;
4514 struct ether_header
*ehp
= NULL
;
4516 if (ifp
->if_type
== IFT_ETHER
) {
4517 ehp
= (struct ether_header
*)(void *)frame_header
;
4518 /* Skip RX Ethernet packets if they are not IPV6 */
4519 if (ntohs(ehp
->ether_type
) != ETHERTYPE_IPV6
) {
4523 /* Keep a copy of frame_header for Ethernet packets */
4524 bcopy(frame_header
, (caddr_t
)&eh
, ETHER_HDR_LEN
);
4526 error
= dlil_clat64(ifp
, &protocol_family
, &m
);
4527 data
= (char *) mbuf_data(m
);
4530 ip6stat
.ip6s_clat464_in_drop
++;
4533 /* Native v6 should be No-op */
4534 if (protocol_family
!= PF_INET
) {
4538 /* Do this only for translated v4 packets. */
4539 switch (ifp
->if_type
) {
4541 frame_header
= data
;
4545 * Drop if the mbuf doesn't have enough
4546 * space for Ethernet header
4548 if (M_LEADINGSPACE(m
) < ETHER_HDR_LEN
) {
4550 ip6stat
.ip6s_clat464_in_drop
++;
4554 * Set the frame_header ETHER_HDR_LEN bytes
4555 * preceeding the data pointer. Change
4556 * the ether_type too.
4558 frame_header
= data
- ETHER_HDR_LEN
;
4559 eh
.ether_type
= htons(ETHERTYPE_IP
);
4560 bcopy((caddr_t
)&eh
, frame_header
, ETHER_HDR_LEN
);
4565 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
4566 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
)) {
4567 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
4571 * For partial checksum offload, we expect the driver to
4572 * set the start offset indicating the start of the span
4573 * that is covered by the hardware-computed checksum;
4574 * adjust this start offset accordingly because the data
4575 * pointer has been advanced beyond the link-layer header.
4577 * Virtual lan types (bridge, vlan, bond) can call
4578 * dlil_input_packet_list() with the same packet with the
4579 * checksum flags set. Set a flag indicating that the
4580 * adjustment has already been done.
4582 if ((m
->m_pkthdr
.csum_flags
& CSUM_ADJUST_DONE
) != 0) {
4583 /* adjustment has already been done */
4584 } else if ((m
->m_pkthdr
.csum_flags
&
4585 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4586 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4588 if (frame_header
== NULL
||
4589 frame_header
< (char *)mbuf_datastart(m
) ||
4590 frame_header
> (char *)m
->m_data
||
4591 (adj
= (m
->m_data
- frame_header
)) >
4592 m
->m_pkthdr
.csum_rx_start
) {
4593 m
->m_pkthdr
.csum_data
= 0;
4594 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
4595 hwcksum_in_invalidated
++;
4597 m
->m_pkthdr
.csum_rx_start
-= adj
;
4599 /* make sure we don't adjust more than once */
4600 m
->m_pkthdr
.csum_flags
|= CSUM_ADJUST_DONE
;
4603 pktap_input(ifp
, protocol_family
, m
, frame_header
);
4606 if (m
->m_flags
& (M_BCAST
| M_MCAST
)) {
4607 atomic_add_64(&ifp
->if_imcasts
, 1);
4610 /* run interface filters */
4611 error
= dlil_interface_filters_input(ifp
, &m
,
4612 &frame_header
, protocol_family
);
4614 if (error
!= EJUSTRETURN
) {
4619 if ((m
->m_flags
& M_PROMISC
) != 0) {
4624 /* Lookup the protocol attachment to this interface */
4625 if (protocol_family
== 0) {
4627 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
4628 (last_ifproto
->protocol_family
== protocol_family
)) {
4629 VERIFY(ifproto
== NULL
);
4630 ifproto
= last_ifproto
;
4631 if_proto_ref(last_ifproto
);
4633 VERIFY(ifproto
== NULL
);
4634 ifnet_lock_shared(ifp
);
4635 /* callee holds a proto refcnt upon success */
4636 ifproto
= find_attached_proto(ifp
, protocol_family
);
4637 ifnet_lock_done(ifp
);
4639 if (ifproto
== NULL
) {
4640 /* no protocol for this packet, discard */
4644 if (ifproto
!= last_ifproto
) {
4645 if (last_ifproto
!= NULL
) {
4646 /* pass up the list for the previous protocol */
4647 dlil_ifproto_input(last_ifproto
, pkt_first
);
4649 if_proto_free(last_ifproto
);
4651 last_ifproto
= ifproto
;
4652 if_proto_ref(ifproto
);
4654 /* extend the list */
4655 m
->m_pkthdr
.pkt_hdr
= frame_header
;
4656 if (pkt_first
== NULL
) {
4661 pkt_next
= &m
->m_nextpkt
;
4664 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
4665 /* pass up the last list of packets */
4666 dlil_ifproto_input(last_ifproto
, pkt_first
);
4667 if_proto_free(last_ifproto
);
4668 last_ifproto
= NULL
;
4670 if (ifproto
!= NULL
) {
4671 if_proto_free(ifproto
);
4677 /* update the driver's multicast filter, if needed */
4678 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0) {
4679 ifp
->if_updatemcasts
= 0;
4681 if (iorefcnt
== 1) {
4682 ifnet_datamov_end(ifp
);
4686 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4690 if_mcasts_update(struct ifnet
*ifp
)
4694 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
4695 if (err
== EAFNOSUPPORT
) {
4698 DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
4699 "(err=%d)\n", if_name(ifp
),
4700 (err
== 0 ? "successfully restored" : "failed to restore"),
4701 ifp
->if_updatemcasts
, err
);
4703 /* just return success */
4707 /* If ifp is set, we will increment the generation for the interface */
4709 dlil_post_complete_msg(struct ifnet
*ifp
, struct kev_msg
*event
)
4712 ifnet_increment_generation(ifp
);
4716 necp_update_all_clients();
4719 return kev_post_msg(event
);
4722 __private_extern__
void
4723 dlil_post_sifflags_msg(struct ifnet
* ifp
)
4725 struct kev_msg ev_msg
;
4726 struct net_event_data ev_data
;
4728 bzero(&ev_data
, sizeof(ev_data
));
4729 bzero(&ev_msg
, sizeof(ev_msg
));
4730 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
4731 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
4732 ev_msg
.kev_subclass
= KEV_DL_SUBCLASS
;
4733 ev_msg
.event_code
= KEV_DL_SIFFLAGS
;
4734 strlcpy(&ev_data
.if_name
[0], ifp
->if_name
, IFNAMSIZ
);
4735 ev_data
.if_family
= ifp
->if_family
;
4736 ev_data
.if_unit
= (u_int32_t
) ifp
->if_unit
;
4737 ev_msg
.dv
[0].data_length
= sizeof(struct net_event_data
);
4738 ev_msg
.dv
[0].data_ptr
= &ev_data
;
4739 ev_msg
.dv
[1].data_length
= 0;
4740 dlil_post_complete_msg(ifp
, &ev_msg
);
4743 #define TMP_IF_PROTO_ARR_SIZE 10
4745 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
, bool update_generation
)
4747 struct ifnet_filter
*filter
= NULL
;
4748 struct if_proto
*proto
= NULL
;
4749 int if_proto_count
= 0;
4750 struct if_proto
**tmp_ifproto_arr
= NULL
;
4751 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
4752 int tmp_ifproto_arr_idx
= 0;
4753 bool tmp_malloc
= false;
4756 * Pass the event to the interface filters
4758 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4759 /* prevent filter list from changing in case we drop the lock */
4760 if_flt_monitor_busy(ifp
);
4761 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4762 if (filter
->filt_event
!= NULL
) {
4763 lck_mtx_unlock(&ifp
->if_flt_lock
);
4765 filter
->filt_event(filter
->filt_cookie
, ifp
,
4766 filter
->filt_protocol
, event
);
4768 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4771 /* we're done with the filter list */
4772 if_flt_monitor_unbusy(ifp
);
4773 lck_mtx_unlock(&ifp
->if_flt_lock
);
4775 /* Get an io ref count if the interface is attached */
4776 if (!ifnet_is_attached(ifp
, 1)) {
4781 * An embedded tmp_list_entry in if_proto may still get
4782 * over-written by another thread after giving up ifnet lock,
4783 * therefore we are avoiding embedded pointers here.
4785 ifnet_lock_shared(ifp
);
4786 if_proto_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
4787 if (if_proto_count
) {
4789 VERIFY(ifp
->if_proto_hash
!= NULL
);
4790 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
4791 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
4793 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
4794 sizeof(*tmp_ifproto_arr
) * if_proto_count
,
4796 if (tmp_ifproto_arr
== NULL
) {
4797 ifnet_lock_done(ifp
);
4803 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
4804 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
4806 if_proto_ref(proto
);
4807 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
4808 tmp_ifproto_arr_idx
++;
4811 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
4813 ifnet_lock_done(ifp
);
4815 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
4816 tmp_ifproto_arr_idx
++) {
4817 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
4818 VERIFY(proto
!= NULL
);
4819 proto_media_event eventp
=
4820 (proto
->proto_kpi
== kProtoKPI_v1
?
4821 proto
->kpi
.v1
.event
:
4822 proto
->kpi
.v2
.event
);
4824 if (eventp
!= NULL
) {
4825 eventp(ifp
, proto
->protocol_family
,
4828 if_proto_free(proto
);
4833 FREE(tmp_ifproto_arr
, M_TEMP
);
4836 /* Pass the event to the interface */
4837 if (ifp
->if_event
!= NULL
) {
4838 ifp
->if_event(ifp
, event
);
4841 /* Release the io ref count */
4842 ifnet_decr_iorefcnt(ifp
);
4844 return dlil_post_complete_msg(update_generation
? ifp
: NULL
, event
);
4848 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
4850 struct kev_msg kev_msg
;
4853 if (ifp
== NULL
|| event
== NULL
) {
4857 bzero(&kev_msg
, sizeof(kev_msg
));
4858 kev_msg
.vendor_code
= event
->vendor_code
;
4859 kev_msg
.kev_class
= event
->kev_class
;
4860 kev_msg
.kev_subclass
= event
->kev_subclass
;
4861 kev_msg
.event_code
= event
->event_code
;
4862 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
4863 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
4864 kev_msg
.dv
[1].data_length
= 0;
4866 result
= dlil_event_internal(ifp
, &kev_msg
, TRUE
);
4872 #include <netinet/ip6.h>
4873 #include <netinet/ip.h>
4875 dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
4879 struct ip6_hdr
*ip6
;
4880 int type
= SOCK_RAW
;
4885 m
= m_pullup(*mp
, sizeof(struct ip
));
4890 ip
= mtod(m
, struct ip
*);
4891 if (ip
->ip_p
== IPPROTO_TCP
) {
4893 } else if (ip
->ip_p
== IPPROTO_UDP
) {
4898 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
4903 ip6
= mtod(m
, struct ip6_hdr
*);
4904 if (ip6
->ip6_nxt
== IPPROTO_TCP
) {
4906 } else if (ip6
->ip6_nxt
== IPPROTO_UDP
) {
4918 dlil_count_chain_len(mbuf_t m
, struct chain_len_stats
*cls
)
4931 atomic_add_64(&cls
->cls_one
, 1);
4934 atomic_add_64(&cls
->cls_two
, 1);
4937 atomic_add_64(&cls
->cls_three
, 1);
4940 atomic_add_64(&cls
->cls_four
, 1);
4944 atomic_add_64(&cls
->cls_five_or_more
, 1);
4952 * Caller should have a lock on the protocol domain if the protocol
4953 * doesn't support finer grained locking. In most cases, the lock
4954 * will be held from the socket layer and won't be released until
4955 * we return back to the socket layer.
4957 * This does mean that we must take a protocol lock before we take
4958 * an interface lock if we're going to take both. This makes sense
4959 * because a protocol is likely to interact with an ifp while it
4960 * is under the protocol lock.
4962 * An advisory code will be returned if adv is not null. This
4963 * can be used to provide feedback about interface queues to the
4967 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
4968 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
4970 char *frame_type
= NULL
;
4971 char *dst_linkaddr
= NULL
;
4973 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
4974 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
4975 struct if_proto
*proto
= NULL
;
4977 mbuf_t send_head
= NULL
;
4978 mbuf_t
*send_tail
= &send_head
;
4980 u_int32_t pre
= 0, post
= 0;
4981 u_int32_t fpkts
= 0, fbytes
= 0;
4983 struct timespec now
;
4985 boolean_t did_clat46
= FALSE
;
4986 protocol_family_t old_proto_family
= proto_family
;
4987 struct sockaddr_in6 dest6
;
4988 struct rtentry
*rt
= NULL
;
4989 u_int32_t m_loop_set
= 0;
4991 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4994 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4995 * from happening while this operation is in progress
4997 if (!ifnet_datamov_begin(ifp
)) {
5003 VERIFY(ifp
->if_output_dlil
!= NULL
);
5005 /* update the driver's multicast filter, if needed */
5006 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0) {
5007 ifp
->if_updatemcasts
= 0;
5010 frame_type
= frame_type_buffer
;
5011 dst_linkaddr
= dst_linkaddr_buffer
;
5014 ifnet_lock_shared(ifp
);
5015 /* callee holds a proto refcnt upon success */
5016 proto
= find_attached_proto(ifp
, proto_family
);
5017 if (proto
== NULL
) {
5018 ifnet_lock_done(ifp
);
5022 ifnet_lock_done(ifp
);
5026 if (packetlist
== NULL
) {
5031 packetlist
= packetlist
->m_nextpkt
;
5032 m
->m_nextpkt
= NULL
;
5035 * Perform address family translation for the first
5036 * packet outside the loop in order to perform address
5037 * lookup for the translated proto family.
5039 if (proto_family
== PF_INET
&& IS_INTF_CLAT46(ifp
) &&
5040 (ifp
->if_type
== IFT_CELLULAR
||
5041 dlil_is_clat_needed(proto_family
, m
))) {
5042 retval
= dlil_clat46(ifp
, &proto_family
, &m
);
5044 * Go to the next packet if translation fails
5049 ip6stat
.ip6s_clat464_out_drop
++;
5050 /* Make sure that the proto family is PF_INET */
5051 ASSERT(proto_family
== PF_INET
);
5055 * Free the old one and make it point to the IPv6 proto structure.
5057 * Change proto for the first time we have successfully
5058 * performed address family translation.
5060 if (!did_clat46
&& proto_family
== PF_INET6
) {
5063 if (proto
!= NULL
) {
5064 if_proto_free(proto
);
5066 ifnet_lock_shared(ifp
);
5067 /* callee holds a proto refcnt upon success */
5068 proto
= find_attached_proto(ifp
, proto_family
);
5069 if (proto
== NULL
) {
5070 ifnet_lock_done(ifp
);
5076 ifnet_lock_done(ifp
);
5077 if (ifp
->if_type
== IFT_ETHER
) {
5078 /* Update the dest to translated v6 address */
5079 dest6
.sin6_len
= sizeof(struct sockaddr_in6
);
5080 dest6
.sin6_family
= AF_INET6
;
5081 dest6
.sin6_addr
= (mtod(m
, struct ip6_hdr
*))->ip6_dst
;
5082 dest
= (const struct sockaddr
*)&dest6
;
5085 * Lookup route to the translated destination
5086 * Free this route ref during cleanup
5088 rt
= rtalloc1_scoped((struct sockaddr
*)&dest6
,
5089 0, 0, ifp
->if_index
);
5097 * This path gets packet chain going to the same destination.
5098 * The pre output routine is used to either trigger resolution of
5099 * the next hop or retreive the next hop's link layer addressing.
5100 * For ex: ether_inet(6)_pre_output routine.
5102 * If the routine returns EJUSTRETURN, it implies that packet has
5103 * been queued, and therefore we have to call preout_again for the
5104 * following packet in the chain.
5106 * For errors other than EJUSTRETURN, the current packet is freed
5107 * and the rest of the chain (pointed by packetlist is freed as
5110 * Else if there is no error the retrieved information is used for
5111 * all the packets in the chain.
5114 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
5115 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
5117 if (preoutp
!= NULL
) {
5118 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
5119 frame_type
, dst_linkaddr
);
5122 if (retval
== EJUSTRETURN
) {
5133 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
5134 dlil_get_socket_type(&m
, proto_family
, raw
));
5143 * Perform address family translation if needed.
5144 * For now we only support stateless 4 to 6 translation
5147 * The routine below translates IP header, updates protocol
5148 * checksum and also translates ICMP.
5150 * We skip the first packet as it is already translated and
5151 * the proto family is set to PF_INET6.
5153 if (proto_family
== PF_INET
&& IS_INTF_CLAT46(ifp
) &&
5154 (ifp
->if_type
== IFT_CELLULAR
||
5155 dlil_is_clat_needed(proto_family
, m
))) {
5156 retval
= dlil_clat46(ifp
, &proto_family
, &m
);
5157 /* Goto the next packet if the translation fails */
5161 ip6stat
.ip6s_clat464_out_drop
++;
5167 if (!raw
&& proto_family
== PF_INET
) {
5168 struct ip
*ip
= mtod(m
, struct ip
*);
5169 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
5170 struct ip
*, ip
, struct ifnet
*, ifp
,
5171 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
5172 } else if (!raw
&& proto_family
== PF_INET6
) {
5173 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
5174 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
5175 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
5176 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
5178 #endif /* CONFIG_DTRACE */
5180 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
5184 * If this is a broadcast packet that needs to be
5185 * looped back into the system, set the inbound ifp
5186 * to that of the outbound ifp. This will allow
5187 * us to determine that it is a legitimate packet
5188 * for the system. Only set the ifp if it's not
5189 * already set, just to be safe.
5191 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
5192 m
->m_pkthdr
.rcvif
== NULL
) {
5193 m
->m_pkthdr
.rcvif
= ifp
;
5196 m_loop_set
= m
->m_flags
& M_LOOP
;
5197 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
5198 frame_type
, &pre
, &post
);
5200 if (retval
!= EJUSTRETURN
) {
5207 * For partial checksum offload, adjust the start
5208 * and stuff offsets based on the prepended header.
5210 if ((m
->m_pkthdr
.csum_flags
&
5211 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
5212 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
5213 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
5214 m
->m_pkthdr
.csum_tx_start
+= pre
;
5217 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
)) {
5218 dlil_output_cksum_dbg(ifp
, m
, pre
,
5223 * Clear the ifp if it was set above, and to be
5224 * safe, only if it is still the same as the
5225 * outbound ifp we have in context. If it was
5226 * looped back, then a copy of it was sent to the
5227 * loopback interface with the rcvif set, and we
5228 * are clearing the one that will go down to the
5231 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
) {
5232 m
->m_pkthdr
.rcvif
= NULL
;
5237 * Let interface filters (if any) do their thing ...
5239 retval
= dlil_interface_filters_output(ifp
, &m
, proto_family
);
5241 if (retval
!= EJUSTRETURN
) {
5247 * Strip away M_PROTO1 bit prior to sending packet
5248 * to the driver as this field may be used by the driver
5250 m
->m_flags
&= ~M_PROTO1
;
5253 * If the underlying interface is not capable of handling a
5254 * packet whose data portion spans across physically disjoint
5255 * pages, we need to "normalize" the packet so that we pass
5256 * down a chain of mbufs where each mbuf points to a span that
5257 * resides in the system page boundary. If the packet does
5258 * not cross page(s), the following is a no-op.
5260 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
5261 if ((m
= m_normalize(m
)) == NULL
) {
5267 * If this is a TSO packet, make sure the interface still
5268 * advertise TSO capability.
5270 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
5276 ifp_inc_traffic_class_out(ifp
, m
);
5277 pktap_output(ifp
, proto_family
, m
, pre
, post
);
5280 * Count the number of elements in the mbuf chain
5282 if (tx_chain_len_count
) {
5283 dlil_count_chain_len(m
, &tx_chain_len_stats
);
5287 * Record timestamp; ifnet_enqueue() will use this info
5288 * rather than redoing the work. An optimization could
5289 * involve doing this just once at the top, if there are
5290 * no interface filters attached, but that's probably
5294 net_timernsec(&now
, &now_nsec
);
5295 (void) mbuf_set_timestamp(m
, now_nsec
, TRUE
);
5298 * Discard partial sum information if this packet originated
5299 * from another interface; the packet would already have the
5300 * final checksum and we shouldn't recompute it.
5302 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) &&
5303 (m
->m_pkthdr
.csum_flags
& (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
5304 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
5305 m
->m_pkthdr
.csum_flags
&= ~CSUM_TX_FLAGS
;
5306 m
->m_pkthdr
.csum_data
= 0;
5310 * Finally, call the driver.
5312 if (ifp
->if_eflags
& (IFEF_SENDLIST
| IFEF_ENQUEUE_MULTI
)) {
5313 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
5314 flen
+= (m_pktlen(m
) - (pre
+ post
));
5315 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
5318 send_tail
= &m
->m_nextpkt
;
5320 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
5321 flen
= (m_pktlen(m
) - (pre
+ post
));
5322 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
5326 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
5328 retval
= (*ifp
->if_output_dlil
)(ifp
, m
);
5329 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
5330 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
5331 adv
->code
= (retval
== EQFULL
?
5332 FADV_FLOW_CONTROLLED
:
5337 if (retval
== 0 && flen
> 0) {
5341 if (retval
!= 0 && dlil_verbose
) {
5342 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5343 __func__
, if_name(ifp
),
5346 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
5349 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
5354 m
->m_flags
|= m_loop_set
;
5355 packetlist
= packetlist
->m_nextpkt
;
5356 m
->m_nextpkt
= NULL
;
5358 /* Reset the proto family to old proto family for CLAT */
5360 proto_family
= old_proto_family
;
5362 } while (m
!= NULL
);
5364 if (send_head
!= NULL
) {
5365 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
5367 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
5368 retval
= (*ifp
->if_output_dlil
)(ifp
, send_head
);
5369 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
5371 adv
->code
= (retval
== EQFULL
?
5372 FADV_FLOW_CONTROLLED
:
5377 if (retval
== 0 && flen
> 0) {
5381 if (retval
!= 0 && dlil_verbose
) {
5382 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5383 __func__
, if_name(ifp
), retval
);
5386 struct mbuf
*send_m
;
5388 VERIFY(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
);
5389 while (send_head
!= NULL
) {
5391 send_head
= send_m
->m_nextpkt
;
5392 send_m
->m_nextpkt
= NULL
;
5393 retval
= (*ifp
->if_output_dlil
)(ifp
, send_m
);
5394 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
5396 adv
->code
= (retval
== EQFULL
?
5397 FADV_FLOW_CONTROLLED
:
5408 if (retval
!= 0 && dlil_verbose
) {
5409 DLIL_PRINTF("%s: output error on %s "
5411 __func__
, if_name(ifp
), retval
);
5419 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
5422 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
5426 ifp
->if_fbytes
+= fbytes
;
5429 ifp
->if_fpackets
+= fpkts
;
5431 if (proto
!= NULL
) {
5432 if_proto_free(proto
);
5434 if (packetlist
) { /* if any packets are left, clean up */
5435 mbuf_freem_list(packetlist
);
5437 if (retval
== EJUSTRETURN
) {
5440 if (iorefcnt
== 1) {
5441 ifnet_datamov_end(ifp
);
5452 * This routine checks if the destination address is not a loopback, link-local,
5453 * multicast or broadcast address.
5456 dlil_is_clat_needed(protocol_family_t proto_family
, mbuf_t m
)
5459 switch (proto_family
) {
5461 struct ip
*iph
= mtod(m
, struct ip
*);
5462 if (CLAT46_NEEDED(ntohl(iph
->ip_dst
.s_addr
))) {
5468 struct ip6_hdr
*ip6h
= mtod(m
, struct ip6_hdr
*);
5469 if ((size_t)m_pktlen(m
) >= sizeof(struct ip6_hdr
) &&
5470 CLAT64_NEEDED(&ip6h
->ip6_dst
)) {
5480 * @brief This routine translates IPv4 packet to IPv6 packet,
5481 * updates protocol checksum and also translates ICMP for code
5482 * along with inner header translation.
5484 * @param ifp Pointer to the interface
5485 * @param proto_family pointer to protocol family. It is updated if function
5486 * performs the translation successfully.
5487 * @param m Pointer to the pointer pointing to the packet. Needed because this
5488 * routine can end up changing the mbuf to a different one.
5490 * @return 0 on success or else a negative value.
5493 dlil_clat46(ifnet_t ifp
, protocol_family_t
*proto_family
, mbuf_t
*m
)
5495 VERIFY(*proto_family
== PF_INET
);
5496 VERIFY(IS_INTF_CLAT46(ifp
));
5498 pbuf_t pbuf_store
, *pbuf
= NULL
;
5499 struct ip
*iph
= NULL
;
5500 struct in_addr osrc
, odst
;
5502 struct in6_ifaddr
*ia6_clat_src
= NULL
;
5503 struct in6_addr
*src
= NULL
;
5504 struct in6_addr dst
;
5507 uint64_t tot_len
= 0;
5508 uint16_t ip_id_val
= 0;
5509 uint16_t ip_frag_off
= 0;
5511 boolean_t is_frag
= FALSE
;
5512 boolean_t is_first_frag
= TRUE
;
5513 boolean_t is_last_frag
= TRUE
;
5515 pbuf_init_mbuf(&pbuf_store
, *m
, ifp
);
5517 iph
= pbuf
->pb_data
;
5522 off
= iph
->ip_hl
<< 2;
5523 ip_id_val
= iph
->ip_id
;
5524 ip_frag_off
= ntohs(iph
->ip_off
) & IP_OFFMASK
;
5526 tot_len
= ntohs(iph
->ip_len
);
5529 * For packets that are not first frags
5530 * we only need to adjust CSUM.
5531 * For 4 to 6, Fragmentation header gets appended
5532 * after proto translation.
5534 if (ntohs(iph
->ip_off
) & ~(IP_DF
| IP_RF
)) {
5537 /* If the offset is not zero, it is not first frag */
5538 if (ip_frag_off
!= 0) {
5539 is_first_frag
= FALSE
;
5542 /* If IP_MF is set, then it is not last frag */
5543 if (ntohs(iph
->ip_off
) & IP_MF
) {
5544 is_last_frag
= FALSE
;
5549 * Retrive the local IPv6 CLAT46 address reserved for stateless
5552 ia6_clat_src
= in6ifa_ifpwithflag(ifp
, IN6_IFF_CLAT46
);
5553 if (ia6_clat_src
== NULL
) {
5554 ip6stat
.ip6s_clat464_out_nov6addr_drop
++;
5559 src
= &ia6_clat_src
->ia_addr
.sin6_addr
;
5562 * Translate IPv4 destination to IPv6 destination by using the
5563 * prefixes learned through prior PLAT discovery.
5565 if ((error
= nat464_synthesize_ipv6(ifp
, &odst
, &dst
)) != 0) {
5566 ip6stat
.ip6s_clat464_out_v6synthfail_drop
++;
5570 /* Translate the IP header part first */
5571 error
= (nat464_translate_46(pbuf
, off
, iph
->ip_tos
, iph
->ip_p
,
5572 iph
->ip_ttl
, *src
, dst
, tot_len
) == NT_NAT64
) ? 0 : -1;
5574 iph
= NULL
; /* Invalidate iph as pbuf has been modified */
5577 ip6stat
.ip6s_clat464_out_46transfail_drop
++;
5582 * Translate protocol header, update checksum, checksum flags
5583 * and related fields.
5585 error
= (nat464_translate_proto(pbuf
, (struct nat464_addr
*)&osrc
, (struct nat464_addr
*)&odst
,
5586 proto
, PF_INET
, PF_INET6
, NT_OUT
, !is_first_frag
) == NT_NAT64
) ? 0 : -1;
5589 ip6stat
.ip6s_clat464_out_46proto_transfail_drop
++;
5593 /* Now insert the IPv6 fragment header */
5595 error
= nat464_insert_frag46(pbuf
, ip_id_val
, ip_frag_off
, is_last_frag
);
5598 ip6stat
.ip6s_clat464_out_46frag_transfail_drop
++;
5604 if (ia6_clat_src
!= NULL
) {
5605 IFA_REMREF(&ia6_clat_src
->ia_ifa
);
5608 if (pbuf_is_valid(pbuf
)) {
5610 pbuf
->pb_mbuf
= NULL
;
5614 ip6stat
.ip6s_clat464_out_invalpbuf_drop
++;
5618 *proto_family
= PF_INET6
;
5619 ip6stat
.ip6s_clat464_out_success
++;
5626 * @brief This routine translates incoming IPv6 to IPv4 packet,
5627 * updates protocol checksum and also translates ICMPv6 outer
5630 * @return 0 on success or else a negative value.
5633 dlil_clat64(ifnet_t ifp
, protocol_family_t
*proto_family
, mbuf_t
*m
)
5635 VERIFY(*proto_family
== PF_INET6
);
5636 VERIFY(IS_INTF_CLAT46(ifp
));
5638 struct ip6_hdr
*ip6h
= NULL
;
5639 struct in6_addr osrc
, odst
;
5641 struct in6_ifaddr
*ia6_clat_dst
= NULL
;
5642 struct in_ifaddr
*ia4_clat_dst
= NULL
;
5643 struct in_addr
*dst
= NULL
;
5647 u_int64_t tot_len
= 0;
5649 boolean_t is_first_frag
= TRUE
;
5651 /* Incoming mbuf does not contain valid IP6 header */
5652 if ((size_t)(*m
)->m_pkthdr
.len
< sizeof(struct ip6_hdr
) ||
5653 ((size_t)(*m
)->m_len
< sizeof(struct ip6_hdr
) &&
5654 (*m
= m_pullup(*m
, sizeof(struct ip6_hdr
))) == NULL
)) {
5655 ip6stat
.ip6s_clat464_in_tooshort_drop
++;
5659 ip6h
= mtod(*m
, struct ip6_hdr
*);
5660 /* Validate that mbuf contains IP payload equal to ip6_plen */
5661 if ((size_t)(*m
)->m_pkthdr
.len
< ntohs(ip6h
->ip6_plen
) + sizeof(struct ip6_hdr
)) {
5662 ip6stat
.ip6s_clat464_in_tooshort_drop
++;
5666 osrc
= ip6h
->ip6_src
;
5667 odst
= ip6h
->ip6_dst
;
5670 * Retrieve the local CLAT46 reserved IPv6 address.
5671 * Let the packet pass if we don't find one, as the flag
5672 * may get set before IPv6 configuration has taken place.
5674 ia6_clat_dst
= in6ifa_ifpwithflag(ifp
, IN6_IFF_CLAT46
);
5675 if (ia6_clat_dst
== NULL
) {
5680 * Check if the original dest in the packet is same as the reserved
5681 * CLAT46 IPv6 address
5683 if (IN6_ARE_ADDR_EQUAL(&odst
, &ia6_clat_dst
->ia_addr
.sin6_addr
)) {
5684 pbuf_t pbuf_store
, *pbuf
= NULL
;
5685 pbuf_init_mbuf(&pbuf_store
, *m
, ifp
);
5689 * Retrive the local CLAT46 IPv4 address reserved for stateless
5692 ia4_clat_dst
= inifa_ifpclatv4(ifp
);
5693 if (ia4_clat_dst
== NULL
) {
5694 IFA_REMREF(&ia6_clat_dst
->ia_ifa
);
5695 ip6stat
.ip6s_clat464_in_nov4addr_drop
++;
5699 IFA_REMREF(&ia6_clat_dst
->ia_ifa
);
5701 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5702 dst
= &ia4_clat_dst
->ia_addr
.sin_addr
;
5703 if ((error
= nat464_synthesize_ipv4(ifp
, &osrc
, &src
)) != 0) {
5704 ip6stat
.ip6s_clat464_in_v4synthfail_drop
++;
5709 ip6h
= pbuf
->pb_data
;
5710 off
= sizeof(struct ip6_hdr
);
5711 proto
= ip6h
->ip6_nxt
;
5712 tos
= (ntohl(ip6h
->ip6_flow
) >> 20) & 0xff;
5713 tot_len
= ntohs(ip6h
->ip6_plen
) + sizeof(struct ip6_hdr
);
5716 * Translate the IP header and update the fragmentation
5719 error
= (nat464_translate_64(pbuf
, off
, tos
, &proto
,
5720 ip6h
->ip6_hlim
, src
, *dst
, tot_len
, &is_first_frag
) == NT_NAT64
) ?
5723 ip6h
= NULL
; /* Invalidate ip6h as pbuf has been changed */
5726 ip6stat
.ip6s_clat464_in_64transfail_drop
++;
5731 * Translate protocol header, update checksum, checksum flags
5732 * and related fields.
5734 error
= (nat464_translate_proto(pbuf
, (struct nat464_addr
*)&osrc
,
5735 (struct nat464_addr
*)&odst
, proto
, PF_INET6
, PF_INET
,
5736 NT_IN
, !is_first_frag
) == NT_NAT64
) ? 0 : -1;
5739 ip6stat
.ip6s_clat464_in_64proto_transfail_drop
++;
5744 if (ia4_clat_dst
!= NULL
) {
5745 IFA_REMREF(&ia4_clat_dst
->ia_ifa
);
5748 if (pbuf_is_valid(pbuf
)) {
5750 pbuf
->pb_mbuf
= NULL
;
5754 ip6stat
.ip6s_clat464_in_invalpbuf_drop
++;
5758 *proto_family
= PF_INET
;
5759 ip6stat
.ip6s_clat464_in_success
++;
5761 } /* CLAT traffic */
5768 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
5771 struct ifnet_filter
*filter
;
5772 int retval
= EOPNOTSUPP
;
5775 if (ifp
== NULL
|| ioctl_code
== 0) {
5779 /* Get an io ref count if the interface is attached */
5780 if (!ifnet_is_attached(ifp
, 1)) {
5785 * Run the interface filters first.
5786 * We want to run all filters before calling the protocol,
5787 * interface family, or interface.
5789 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5790 /* prevent filter list from changing in case we drop the lock */
5791 if_flt_monitor_busy(ifp
);
5792 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
5793 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
5794 filter
->filt_protocol
== proto_fam
)) {
5795 lck_mtx_unlock(&ifp
->if_flt_lock
);
5797 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
5798 proto_fam
, ioctl_code
, ioctl_arg
);
5800 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5802 /* Only update retval if no one has handled the ioctl */
5803 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
5804 if (result
== ENOTSUP
) {
5805 result
= EOPNOTSUPP
;
5808 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
5809 /* we're done with the filter list */
5810 if_flt_monitor_unbusy(ifp
);
5811 lck_mtx_unlock(&ifp
->if_flt_lock
);
5817 /* we're done with the filter list */
5818 if_flt_monitor_unbusy(ifp
);
5819 lck_mtx_unlock(&ifp
->if_flt_lock
);
5821 /* Allow the protocol to handle the ioctl */
5822 if (proto_fam
!= 0) {
5823 struct if_proto
*proto
;
5825 /* callee holds a proto refcnt upon success */
5826 ifnet_lock_shared(ifp
);
5827 proto
= find_attached_proto(ifp
, proto_fam
);
5828 ifnet_lock_done(ifp
);
5829 if (proto
!= NULL
) {
5830 proto_media_ioctl ioctlp
=
5831 (proto
->proto_kpi
== kProtoKPI_v1
?
5832 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
5833 result
= EOPNOTSUPP
;
5834 if (ioctlp
!= NULL
) {
5835 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
5838 if_proto_free(proto
);
5840 /* Only update retval if no one has handled the ioctl */
5841 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
5842 if (result
== ENOTSUP
) {
5843 result
= EOPNOTSUPP
;
5846 if (retval
&& retval
!= EOPNOTSUPP
) {
5853 /* retval is either 0 or EOPNOTSUPP */
5856 * Let the interface handle this ioctl.
5857 * If it returns EOPNOTSUPP, ignore that, we may have
5858 * already handled this in the protocol or family.
5860 if (ifp
->if_ioctl
) {
5861 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
5864 /* Only update retval if no one has handled the ioctl */
5865 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
5866 if (result
== ENOTSUP
) {
5867 result
= EOPNOTSUPP
;
5870 if (retval
&& retval
!= EOPNOTSUPP
) {
5876 if (retval
== EJUSTRETURN
) {
5880 ifnet_decr_iorefcnt(ifp
);
5885 __private_extern__ errno_t
5886 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
5891 if (ifp
->if_set_bpf_tap
) {
5892 /* Get an io reference on the interface if it is attached */
5893 if (!ifnet_is_attached(ifp
, 1)) {
5896 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
5897 ifnet_decr_iorefcnt(ifp
);
5903 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
5904 struct sockaddr
*ll_addr
, size_t ll_len
)
5906 errno_t result
= EOPNOTSUPP
;
5907 struct if_proto
*proto
;
5908 const struct sockaddr
*verify
;
5909 proto_media_resolve_multi resolvep
;
5911 if (!ifnet_is_attached(ifp
, 1)) {
5915 bzero(ll_addr
, ll_len
);
5917 /* Call the protocol first; callee holds a proto refcnt upon success */
5918 ifnet_lock_shared(ifp
);
5919 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
5920 ifnet_lock_done(ifp
);
5921 if (proto
!= NULL
) {
5922 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
5923 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
5924 if (resolvep
!= NULL
) {
5925 result
= resolvep(ifp
, proto_addr
,
5926 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
5928 if_proto_free(proto
);
5931 /* Let the interface verify the multicast address */
5932 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
5936 verify
= proto_addr
;
5938 result
= ifp
->if_check_multi(ifp
, verify
);
5941 ifnet_decr_iorefcnt(ifp
);
5945 __private_extern__ errno_t
5946 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
5947 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
5948 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
5950 struct if_proto
*proto
;
5953 /* callee holds a proto refcnt upon success */
5954 ifnet_lock_shared(ifp
);
5955 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
5956 ifnet_lock_done(ifp
);
5957 if (proto
== NULL
) {
5960 proto_media_send_arp arpp
;
5961 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
5962 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
5968 arpstat
.txrequests
++;
5969 if (target_hw
!= NULL
) {
5970 arpstat
.txurequests
++;
5974 arpstat
.txreplies
++;
5977 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
5978 target_hw
, target_proto
);
5980 if_proto_free(proto
);
5986 struct net_thread_marks
{ };
5987 static const struct net_thread_marks net_thread_marks_base
= { };
5989 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
5990 &net_thread_marks_base
;
5992 __private_extern__ net_thread_marks_t
5993 net_thread_marks_push(u_int32_t push
)
5995 static const char *const base
= (const void*)&net_thread_marks_base
;
5999 struct uthread
*uth
= get_bsdthread_info(current_thread());
6001 pop
= push
& ~uth
->uu_network_marks
;
6003 uth
->uu_network_marks
|= pop
;
6007 return (net_thread_marks_t
)&base
[pop
];
6010 __private_extern__ net_thread_marks_t
6011 net_thread_unmarks_push(u_int32_t unpush
)
6013 static const char *const base
= (const void*)&net_thread_marks_base
;
6014 u_int32_t unpop
= 0;
6017 struct uthread
*uth
= get_bsdthread_info(current_thread());
6019 unpop
= unpush
& uth
->uu_network_marks
;
6021 uth
->uu_network_marks
&= ~unpop
;
6025 return (net_thread_marks_t
)&base
[unpop
];
6028 __private_extern__
void
6029 net_thread_marks_pop(net_thread_marks_t popx
)
6031 static const char *const base
= (const void*)&net_thread_marks_base
;
6032 const ptrdiff_t pop
= (const char *)popx
- (const char *)base
;
6035 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
6036 struct uthread
*uth
= get_bsdthread_info(current_thread());
6038 VERIFY((pop
& ones
) == pop
);
6039 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
6040 uth
->uu_network_marks
&= ~pop
;
6044 __private_extern__
void
6045 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
6047 static const char *const base
= (const void*)&net_thread_marks_base
;
6048 ptrdiff_t unpop
= (const char *)unpopx
- (const char *)base
;
6051 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
6052 struct uthread
*uth
= get_bsdthread_info(current_thread());
6054 VERIFY((unpop
& ones
) == unpop
);
6055 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
6056 uth
->uu_network_marks
|= unpop
;
6060 __private_extern__ u_int32_t
6061 net_thread_is_marked(u_int32_t check
)
6064 struct uthread
*uth
= get_bsdthread_info(current_thread());
6065 return uth
->uu_network_marks
& check
;
6071 __private_extern__ u_int32_t
6072 net_thread_is_unmarked(u_int32_t check
)
6075 struct uthread
*uth
= get_bsdthread_info(current_thread());
6076 return ~uth
->uu_network_marks
& check
;
6082 static __inline__
int
6083 _is_announcement(const struct sockaddr_in
* sender_sin
,
6084 const struct sockaddr_in
* target_sin
)
6086 if (target_sin
== NULL
|| sender_sin
== NULL
) {
6090 return sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
;
6093 __private_extern__ errno_t
6094 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
*sender_hw
,
6095 const struct sockaddr
*sender_proto
, const struct sockaddr_dl
*target_hw
,
6096 const struct sockaddr
*target_proto0
, u_int32_t rtflags
)
6099 const struct sockaddr_in
* sender_sin
;
6100 const struct sockaddr_in
* target_sin
;
6101 struct sockaddr_inarp target_proto_sinarp
;
6102 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
6104 if (target_proto
== NULL
|| sender_proto
== NULL
) {
6108 if (sender_proto
->sa_family
!= target_proto
->sa_family
) {
6113 * If the target is a (default) router, provide that
6114 * information to the send_arp callback routine.
6116 if (rtflags
& RTF_ROUTER
) {
6117 bcopy(target_proto
, &target_proto_sinarp
,
6118 sizeof(struct sockaddr_in
));
6119 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
6120 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
6124 * If this is an ARP request and the target IP is IPv4LL,
6125 * send the request on all interfaces. The exception is
6126 * an announcement, which must only appear on the specific
6129 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
6130 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
6131 if (target_proto
->sa_family
== AF_INET
&&
6132 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
6133 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
6134 !_is_announcement(sender_sin
, target_sin
)) {
6141 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
6142 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
6144 ifaddr_t source_hw
= NULL
;
6145 ifaddr_t source_ip
= NULL
;
6146 struct sockaddr_in source_ip_copy
;
6147 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
6150 * Only arp on interfaces marked for IPv4LL
6151 * ARPing. This may mean that we don't ARP on
6152 * the interface the subnet route points to.
6154 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
)) {
6158 /* Find the source IP address */
6159 ifnet_lock_shared(cur_ifp
);
6160 source_hw
= cur_ifp
->if_lladdr
;
6161 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
6163 IFA_LOCK(source_ip
);
6164 if (source_ip
->ifa_addr
!= NULL
&&
6165 source_ip
->ifa_addr
->sa_family
==
6167 /* Copy the source IP address */
6169 *(struct sockaddr_in
*)
6170 (void *)source_ip
->ifa_addr
;
6171 IFA_UNLOCK(source_ip
);
6174 IFA_UNLOCK(source_ip
);
6177 /* No IP Source, don't arp */
6178 if (source_ip
== NULL
) {
6179 ifnet_lock_done(cur_ifp
);
6183 IFA_ADDREF(source_hw
);
6184 ifnet_lock_done(cur_ifp
);
6187 new_result
= dlil_send_arp_internal(cur_ifp
,
6188 arpop
, (struct sockaddr_dl
*)(void *)
6189 source_hw
->ifa_addr
,
6190 (struct sockaddr
*)&source_ip_copy
, NULL
,
6193 IFA_REMREF(source_hw
);
6194 if (result
== ENOTSUP
) {
6195 result
= new_result
;
6198 ifnet_list_free(ifp_list
);
6201 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
6202 sender_proto
, target_hw
, target_proto
);
6209 * Caller must hold ifnet head lock.
6212 ifnet_lookup(struct ifnet
*ifp
)
6216 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
6217 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
6222 return _ifp
!= NULL
;
6226 * Caller has to pass a non-zero refio argument to get a
6227 * IO reference count. This will prevent ifnet_detach from
6228 * being called when there are outstanding io reference counts.
6231 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
6235 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6236 if ((ret
= IF_FULLY_ATTACHED(ifp
))) {
6241 lck_mtx_unlock(&ifp
->if_ref_lock
);
6247 ifnet_incr_pending_thread_count(struct ifnet
*ifp
)
6249 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6250 ifp
->if_threads_pending
++;
6251 lck_mtx_unlock(&ifp
->if_ref_lock
);
6255 ifnet_decr_pending_thread_count(struct ifnet
*ifp
)
6257 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6258 VERIFY(ifp
->if_threads_pending
> 0);
6259 ifp
->if_threads_pending
--;
6260 if (ifp
->if_threads_pending
== 0) {
6261 wakeup(&ifp
->if_threads_pending
);
6263 lck_mtx_unlock(&ifp
->if_ref_lock
);
6267 * Caller must ensure the interface is attached; the assumption is that
6268 * there is at least an outstanding IO reference count held already.
6269 * Most callers would call ifnet_is_{attached,data_ready}() instead.
6272 ifnet_incr_iorefcnt(struct ifnet
*ifp
)
6274 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6275 VERIFY(IF_FULLY_ATTACHED(ifp
));
6276 VERIFY(ifp
->if_refio
> 0);
6278 lck_mtx_unlock(&ifp
->if_ref_lock
);
6281 __attribute__((always_inline
))
6283 ifnet_decr_iorefcnt_locked(struct ifnet
*ifp
)
6285 LCK_MTX_ASSERT(&ifp
->if_ref_lock
, LCK_MTX_ASSERT_OWNED
);
6287 VERIFY(ifp
->if_refio
> 0);
6288 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
6291 VERIFY(ifp
->if_refio
!= 0 || ifp
->if_datamov
== 0);
6294 * if there are no more outstanding io references, wakeup the
6295 * ifnet_detach thread if detaching flag is set.
6297 if (ifp
->if_refio
== 0 && (ifp
->if_refflags
& IFRF_DETACHING
)) {
6298 wakeup(&(ifp
->if_refio
));
6303 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
6305 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6306 ifnet_decr_iorefcnt_locked(ifp
);
6307 lck_mtx_unlock(&ifp
->if_ref_lock
);
6311 ifnet_datamov_begin(struct ifnet
*ifp
)
6315 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6316 if ((ret
= IF_FULLY_ATTACHED_AND_READY(ifp
))) {
6320 lck_mtx_unlock(&ifp
->if_ref_lock
);
6326 ifnet_datamov_end(struct ifnet
*ifp
)
6328 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6329 VERIFY(ifp
->if_datamov
> 0);
6331 * if there's no more thread moving data, wakeup any
6332 * drainers that's blocked waiting for this.
6334 if (--ifp
->if_datamov
== 0 && ifp
->if_drainers
> 0) {
6335 wakeup(&(ifp
->if_datamov
));
6337 ifnet_decr_iorefcnt_locked(ifp
);
6338 lck_mtx_unlock(&ifp
->if_ref_lock
);
6342 ifnet_datamov_suspend(struct ifnet
*ifp
)
6344 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6345 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
6347 if (ifp
->if_suspend
++ == 0) {
6348 VERIFY(ifp
->if_refflags
& IFRF_READY
);
6349 ifp
->if_refflags
&= ~IFRF_READY
;
6351 lck_mtx_unlock(&ifp
->if_ref_lock
);
6355 ifnet_datamov_drain(struct ifnet
*ifp
)
6357 lck_mtx_lock(&ifp
->if_ref_lock
);
6358 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
6359 /* data movement must already be suspended */
6360 VERIFY(ifp
->if_suspend
> 0);
6361 VERIFY(!(ifp
->if_refflags
& IFRF_READY
));
6363 while (ifp
->if_datamov
!= 0) {
6364 (void) msleep(&(ifp
->if_datamov
), &ifp
->if_ref_lock
,
6365 (PZERO
- 1), __func__
, NULL
);
6367 VERIFY(!(ifp
->if_refflags
& IFRF_READY
));
6368 VERIFY(ifp
->if_drainers
> 0);
6370 lck_mtx_unlock(&ifp
->if_ref_lock
);
6372 /* purge the interface queues */
6373 if ((ifp
->if_eflags
& IFEF_TXSTART
) != 0) {
6379 ifnet_datamov_resume(struct ifnet
*ifp
)
6381 lck_mtx_lock(&ifp
->if_ref_lock
);
6382 /* data movement must already be suspended */
6383 VERIFY(ifp
->if_suspend
> 0);
6384 if (--ifp
->if_suspend
== 0) {
6385 VERIFY(!(ifp
->if_refflags
& IFRF_READY
));
6386 ifp
->if_refflags
|= IFRF_READY
;
6388 ifnet_decr_iorefcnt_locked(ifp
);
6389 lck_mtx_unlock(&ifp
->if_ref_lock
);
6393 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
6395 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
6400 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
6401 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
6406 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
6407 tr
= dl_if_dbg
->dldbg_if_refhold
;
6409 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
6410 tr
= dl_if_dbg
->dldbg_if_refrele
;
6413 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
6414 ctrace_record(&tr
[idx
]);
6418 dlil_if_ref(struct ifnet
*ifp
)
6420 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6422 if (dl_if
== NULL
) {
6426 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
6427 ++dl_if
->dl_if_refcnt
;
6428 if (dl_if
->dl_if_refcnt
== 0) {
6429 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
6432 if (dl_if
->dl_if_trace
!= NULL
) {
6433 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
6435 lck_mtx_unlock(&dl_if
->dl_if_lock
);
6441 dlil_if_free(struct ifnet
*ifp
)
6443 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6444 bool need_release
= FALSE
;
6446 if (dl_if
== NULL
) {
6450 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
6451 switch (dl_if
->dl_if_refcnt
) {
6453 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
6457 if ((ifp
->if_refflags
& IFRF_EMBRYONIC
) != 0) {
6458 need_release
= TRUE
;
6464 --dl_if
->dl_if_refcnt
;
6465 if (dl_if
->dl_if_trace
!= NULL
) {
6466 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
6468 lck_mtx_unlock(&dl_if
->dl_if_lock
);
6470 dlil_if_release(ifp
);
6476 dlil_attach_protocol_internal(struct if_proto
*proto
,
6477 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
,
6478 uint32_t * proto_count
)
6480 struct kev_dl_proto_data ev_pr_data
;
6481 struct ifnet
*ifp
= proto
->ifp
;
6483 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
6484 struct if_proto
*prev_proto
;
6485 struct if_proto
*_proto
;
6487 /* callee holds a proto refcnt upon success */
6488 ifnet_lock_exclusive(ifp
);
6489 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
6490 if (_proto
!= NULL
) {
6491 ifnet_lock_done(ifp
);
6492 if_proto_free(_proto
);
6497 * Call family module add_proto routine so it can refine the
6498 * demux descriptors as it wishes.
6500 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
6503 ifnet_lock_done(ifp
);
6508 * Insert the protocol in the hash
6510 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
6511 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
) {
6512 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
6515 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
6517 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
6521 /* hold a proto refcnt for attach */
6522 if_proto_ref(proto
);
6525 * The reserved field carries the number of protocol still attached
6526 * (subject to change)
6528 ev_pr_data
.proto_family
= proto
->protocol_family
;
6529 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
6531 ifnet_lock_done(ifp
);
6533 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
6534 (struct net_event_data
*)&ev_pr_data
,
6535 sizeof(struct kev_dl_proto_data
));
6536 if (proto_count
!= NULL
) {
6537 *proto_count
= ev_pr_data
.proto_remaining_count
;
6543 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
6544 const struct ifnet_attach_proto_param
*proto_details
)
6547 struct if_proto
*ifproto
= NULL
;
6548 uint32_t proto_count
= 0;
6550 ifnet_head_lock_shared();
6551 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
6555 /* Check that the interface is in the global list */
6556 if (!ifnet_lookup(ifp
)) {
6561 ifproto
= zalloc(dlif_proto_zone
);
6562 if (ifproto
== NULL
) {
6566 bzero(ifproto
, dlif_proto_size
);
6568 /* refcnt held above during lookup */
6570 ifproto
->protocol_family
= protocol
;
6571 ifproto
->proto_kpi
= kProtoKPI_v1
;
6572 ifproto
->kpi
.v1
.input
= proto_details
->input
;
6573 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
6574 ifproto
->kpi
.v1
.event
= proto_details
->event
;
6575 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
6576 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
6577 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
6578 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
6580 retval
= dlil_attach_protocol_internal(ifproto
,
6581 proto_details
->demux_list
, proto_details
->demux_count
,
6585 if (retval
!= 0 && retval
!= EEXIST
) {
6586 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
6587 ifp
!= NULL
? if_name(ifp
) : "N/A", protocol
, retval
);
6590 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
6591 ifp
!= NULL
? if_name(ifp
) : "N/A",
6592 protocol
, proto_count
);
6598 * A protocol has been attached, mark the interface up.
6599 * This used to be done by configd.KernelEventMonitor, but that
6600 * is inherently prone to races (rdar://problem/30810208).
6602 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
6603 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
6604 dlil_post_sifflags_msg(ifp
);
6605 } else if (ifproto
!= NULL
) {
6606 zfree(dlif_proto_zone
, ifproto
);
6612 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
6613 const struct ifnet_attach_proto_param_v2
*proto_details
)
6616 struct if_proto
*ifproto
= NULL
;
6617 uint32_t proto_count
= 0;
6619 ifnet_head_lock_shared();
6620 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
6624 /* Check that the interface is in the global list */
6625 if (!ifnet_lookup(ifp
)) {
6630 ifproto
= zalloc(dlif_proto_zone
);
6631 if (ifproto
== NULL
) {
6635 bzero(ifproto
, sizeof(*ifproto
));
6637 /* refcnt held above during lookup */
6639 ifproto
->protocol_family
= protocol
;
6640 ifproto
->proto_kpi
= kProtoKPI_v2
;
6641 ifproto
->kpi
.v2
.input
= proto_details
->input
;
6642 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
6643 ifproto
->kpi
.v2
.event
= proto_details
->event
;
6644 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
6645 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
6646 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
6647 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
6649 retval
= dlil_attach_protocol_internal(ifproto
,
6650 proto_details
->demux_list
, proto_details
->demux_count
,
6654 if (retval
!= 0 && retval
!= EEXIST
) {
6655 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
6656 ifp
!= NULL
? if_name(ifp
) : "N/A", protocol
, retval
);
6659 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
6660 ifp
!= NULL
? if_name(ifp
) : "N/A",
6661 protocol
, proto_count
);
6667 * A protocol has been attached, mark the interface up.
6668 * This used to be done by configd.KernelEventMonitor, but that
6669 * is inherently prone to races (rdar://problem/30810208).
6671 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
6672 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
6673 dlil_post_sifflags_msg(ifp
);
6674 } else if (ifproto
!= NULL
) {
6675 zfree(dlif_proto_zone
, ifproto
);
6681 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
6683 struct if_proto
*proto
= NULL
;
6686 if (ifp
== NULL
|| proto_family
== 0) {
6691 ifnet_lock_exclusive(ifp
);
6692 /* callee holds a proto refcnt upon success */
6693 proto
= find_attached_proto(ifp
, proto_family
);
6694 if (proto
== NULL
) {
6696 ifnet_lock_done(ifp
);
6700 /* call family module del_proto */
6701 if (ifp
->if_del_proto
) {
6702 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
6705 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
6706 proto
, if_proto
, next_hash
);
6708 if (proto
->proto_kpi
== kProtoKPI_v1
) {
6709 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
6710 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
6711 proto
->kpi
.v1
.event
= ifproto_media_event
;
6712 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
6713 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
6714 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
6716 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
6717 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
6718 proto
->kpi
.v2
.event
= ifproto_media_event
;
6719 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
6720 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
6721 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
6723 proto
->detached
= 1;
6724 ifnet_lock_done(ifp
);
6727 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp
),
6728 (proto
->proto_kpi
== kProtoKPI_v1
) ?
6729 "v1" : "v2", proto_family
);
6732 /* release proto refcnt held during protocol attach */
6733 if_proto_free(proto
);
6736 * Release proto refcnt held during lookup; the rest of
6737 * protocol detach steps will happen when the last proto
6738 * reference is released.
6740 if_proto_free(proto
);
6748 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
6749 struct mbuf
*packet
, char *header
)
6751 #pragma unused(ifp, protocol, packet, header)
6756 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
6757 struct mbuf
*packet
)
6759 #pragma unused(ifp, protocol, packet)
6764 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
6765 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
6766 char *link_layer_dest
)
6768 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
6773 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
6774 const struct kev_msg
*event
)
6776 #pragma unused(ifp, protocol, event)
6780 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
6781 unsigned long command
, void *argument
)
6783 #pragma unused(ifp, protocol, command, argument)
6788 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
6789 struct sockaddr_dl
*out_ll
, size_t ll_len
)
6791 #pragma unused(ifp, proto_addr, out_ll, ll_len)
6796 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
6797 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
6798 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
6800 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
6804 extern int if_next_index(void);
6805 extern int tcp_ecn_outbound
;
6808 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
6810 struct ifnet
*tmp_if
;
6812 struct if_data_internal if_data_saved
;
6813 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6814 struct dlil_threading_info
*dl_inp
;
6815 u_int32_t sflags
= 0;
6823 * Serialize ifnet attach using dlil_ifnet_lock, in order to
6824 * prevent the interface from being configured while it is
6825 * embryonic, as ifnet_head_lock is dropped and reacquired
6826 * below prior to marking the ifnet with IFRF_ATTACHED.
6829 ifnet_head_lock_exclusive();
6830 /* Verify we aren't already on the list */
6831 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
6832 if (tmp_if
== ifp
) {
6839 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6840 if (!(ifp
->if_refflags
& IFRF_EMBRYONIC
)) {
6841 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
6845 lck_mtx_unlock(&ifp
->if_ref_lock
);
6847 ifnet_lock_exclusive(ifp
);
6850 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
6851 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
6852 VERIFY(ifp
->if_threads_pending
== 0);
6854 if (ll_addr
!= NULL
) {
6855 if (ifp
->if_addrlen
== 0) {
6856 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
6857 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
6858 ifnet_lock_done(ifp
);
6866 * Allow interfaces without protocol families to attach
6867 * only if they have the necessary fields filled out.
6869 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
6870 DLIL_PRINTF("%s: Attempt to attach interface without "
6871 "family module - %d\n", __func__
, ifp
->if_family
);
6872 ifnet_lock_done(ifp
);
6878 /* Allocate protocol hash table */
6879 VERIFY(ifp
->if_proto_hash
== NULL
);
6880 ifp
->if_proto_hash
= zalloc(dlif_phash_zone
);
6881 if (ifp
->if_proto_hash
== NULL
) {
6882 ifnet_lock_done(ifp
);
6887 bzero(ifp
->if_proto_hash
, dlif_phash_size
);
6889 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
6890 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
6891 TAILQ_INIT(&ifp
->if_flt_head
);
6892 VERIFY(ifp
->if_flt_busy
== 0);
6893 VERIFY(ifp
->if_flt_waiters
== 0);
6894 lck_mtx_unlock(&ifp
->if_flt_lock
);
6896 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
6897 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
6898 LIST_INIT(&ifp
->if_multiaddrs
);
6901 VERIFY(ifp
->if_allhostsinm
== NULL
);
6902 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
6903 TAILQ_INIT(&ifp
->if_addrhead
);
6905 if (ifp
->if_index
== 0) {
6906 int idx
= if_next_index();
6910 ifnet_lock_done(ifp
);
6915 ifp
->if_index
= idx
;
6917 /* There should not be anything occupying this slot */
6918 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
6920 /* allocate (if needed) and initialize a link address */
6921 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
6923 ifnet_lock_done(ifp
);
6929 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
6930 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
6932 /* make this address the first on the list */
6934 /* hold a reference for ifnet_addrs[] */
6935 IFA_ADDREF_LOCKED(ifa
);
6936 /* if_attach_link_ifa() holds a reference for ifa_link */
6937 if_attach_link_ifa(ifp
, ifa
);
6941 mac_ifnet_label_associate(ifp
);
6944 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
6945 ifindex2ifnet
[ifp
->if_index
] = ifp
;
6947 /* Hold a reference to the underlying dlil_ifnet */
6948 ifnet_reference(ifp
);
6950 /* Clear stats (save and restore other fields that we care) */
6951 if_data_saved
= ifp
->if_data
;
6952 bzero(&ifp
->if_data
, sizeof(ifp
->if_data
));
6953 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
6954 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
6955 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
6956 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
6957 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
6958 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
6959 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
6960 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
6961 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
6962 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
6963 ifnet_touch_lastchange(ifp
);
6965 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
6966 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
||
6967 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_FQ_CODEL
);
6969 /* By default, use SFB and enable flow advisory */
6970 sflags
= PKTSCHEDF_QALG_SFB
;
6972 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
6975 if (if_delaybased_queue
) {
6976 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
6979 if (ifp
->if_output_sched_model
==
6980 IFNET_SCHED_MODEL_DRIVER_MANAGED
) {
6981 sflags
|= PKTSCHEDF_QALG_DRIVER_MANAGED
;
6984 /* Initialize transmit queue(s) */
6985 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
6987 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
6988 "err=%d", __func__
, ifp
, err
);
6992 /* Sanity checks on the input thread storage */
6993 dl_inp
= &dl_if
->dl_if_inpstorage
;
6994 bzero(&dl_inp
->stats
, sizeof(dl_inp
->stats
));
6995 VERIFY(dl_inp
->input_waiting
== 0);
6996 VERIFY(dl_inp
->wtot
== 0);
6997 VERIFY(dl_inp
->ifp
== NULL
);
6998 VERIFY(qhead(&dl_inp
->rcvq_pkts
) == NULL
&& qempty(&dl_inp
->rcvq_pkts
));
6999 VERIFY(qlimit(&dl_inp
->rcvq_pkts
) == 0);
7000 VERIFY(!dl_inp
->net_affinity
);
7001 VERIFY(ifp
->if_inp
== NULL
);
7002 VERIFY(dl_inp
->input_thr
== THREAD_NULL
);
7003 VERIFY(dl_inp
->wloop_thr
== THREAD_NULL
);
7004 VERIFY(dl_inp
->poll_thr
== THREAD_NULL
);
7005 VERIFY(dl_inp
->tag
== 0);
7007 #if IFNET_INPUT_SANITY_CHK
7008 VERIFY(dl_inp
->input_mbuf_cnt
== 0);
7009 #endif /* IFNET_INPUT_SANITY_CHK */
7011 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
7012 dlil_reset_rxpoll_params(ifp
);
7014 * A specific DLIL input thread is created per non-loopback interface.
7016 if (ifp
->if_family
!= IFNET_FAMILY_LOOPBACK
) {
7017 ifp
->if_inp
= dl_inp
;
7018 ifnet_incr_pending_thread_count(ifp
);
7019 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
);
7021 panic_plain("%s: ifp=%p couldn't get an input thread; "
7022 "err=%d", __func__
, ifp
, err
);
7027 * If the driver supports the new transmit model, calculate flow hash
7028 * and create a workloop starter thread to invoke the if_start callback
7029 * where the packets may be dequeued and transmitted.
7031 if (ifp
->if_eflags
& IFEF_TXSTART
) {
7032 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
7033 VERIFY(ifp
->if_flowhash
!= 0);
7034 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
7036 ifnet_set_start_cycle(ifp
, NULL
);
7037 ifp
->if_start_active
= 0;
7038 ifp
->if_start_req
= 0;
7039 ifp
->if_start_flags
= 0;
7040 VERIFY(ifp
->if_start
!= NULL
);
7041 ifnet_incr_pending_thread_count(ifp
);
7042 if ((err
= kernel_thread_start(ifnet_start_thread_func
,
7043 ifp
, &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
7045 "ifp=%p couldn't get a start thread; "
7046 "err=%d", __func__
, ifp
, err
);
7049 ml_thread_policy(ifp
->if_start_thread
, MACHINE_GROUP
,
7050 (MACHINE_NETWORK_GROUP
| MACHINE_NETWORK_WORKLOOP
));
7052 ifp
->if_flowhash
= 0;
7055 /* Reset polling parameters */
7056 ifnet_set_poll_cycle(ifp
, NULL
);
7057 ifp
->if_poll_update
= 0;
7058 ifp
->if_poll_flags
= 0;
7059 ifp
->if_poll_req
= 0;
7060 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
7063 * If the driver supports the new receive model, create a poller
7064 * thread to invoke if_input_poll callback where the packets may
7065 * be dequeued from the driver and processed for reception.
7066 * if the interface is netif compat then the poller thread is managed by netif.
7068 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
) &&
7069 (ifp
->if_xflags
& IFXF_LEGACY
)) {
7070 VERIFY(ifp
->if_input_poll
!= NULL
);
7071 VERIFY(ifp
->if_input_ctl
!= NULL
);
7072 ifnet_incr_pending_thread_count(ifp
);
7073 if ((err
= kernel_thread_start(ifnet_poll_thread_func
, ifp
,
7074 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
7075 panic_plain("%s: ifp=%p couldn't get a poll thread; "
7076 "err=%d", __func__
, ifp
, err
);
7079 ml_thread_policy(ifp
->if_poll_thread
, MACHINE_GROUP
,
7080 (MACHINE_NETWORK_GROUP
| MACHINE_NETWORK_WORKLOOP
));
7083 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
7084 VERIFY(ifp
->if_desc
.ifd_len
== 0);
7085 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
7087 /* Record attach PC stacktrace */
7088 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
7090 ifp
->if_updatemcasts
= 0;
7091 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
7092 struct ifmultiaddr
*ifma
;
7093 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
7095 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
7096 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
) {
7097 ifp
->if_updatemcasts
++;
7102 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
7103 "membership(s)\n", if_name(ifp
),
7104 ifp
->if_updatemcasts
);
7107 /* Clear logging parameters */
7108 bzero(&ifp
->if_log
, sizeof(ifp
->if_log
));
7110 /* Clear foreground/realtime activity timestamps */
7111 ifp
->if_fg_sendts
= 0;
7112 ifp
->if_rt_sendts
= 0;
7114 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
7115 VERIFY(ifp
->if_delegated
.type
== 0);
7116 VERIFY(ifp
->if_delegated
.family
== 0);
7117 VERIFY(ifp
->if_delegated
.subfamily
== 0);
7118 VERIFY(ifp
->if_delegated
.expensive
== 0);
7119 VERIFY(ifp
->if_delegated
.constrained
== 0);
7121 VERIFY(ifp
->if_agentids
== NULL
);
7122 VERIFY(ifp
->if_agentcount
== 0);
7124 /* Reset interface state */
7125 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
7126 ifp
->if_interface_state
.valid_bitmask
|=
7127 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7128 ifp
->if_interface_state
.interface_availability
=
7129 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
;
7131 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
7132 if (ifp
== lo_ifp
) {
7133 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_GOOD
;
7134 ifp
->if_interface_state
.valid_bitmask
|=
7135 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7137 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_UNKNOWN
;
7141 * Enable ECN capability on this interface depending on the
7142 * value of ECN global setting
7144 if (tcp_ecn_outbound
== 2 && !IFNET_IS_CELLULAR(ifp
)) {
7145 ifp
->if_eflags
|= IFEF_ECN_ENABLE
;
7146 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
7150 * Built-in Cyclops always on policy for WiFi infra
7152 if (IFNET_IS_WIFI_INFRA(ifp
) && net_qos_policy_wifi_enabled
!= 0) {
7155 error
= if_set_qosmarking_mode(ifp
,
7156 IFRTYPE_QOSMARKING_FASTLANE
);
7158 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
7159 __func__
, ifp
->if_xname
, error
);
7161 ifp
->if_eflags
|= IFEF_QOSMARKING_ENABLED
;
7162 #if (DEVELOPMENT || DEBUG)
7163 DLIL_PRINTF("%s fastlane enabled on %s\n",
7164 __func__
, ifp
->if_xname
);
7165 #endif /* (DEVELOPMENT || DEBUG) */
7169 ifnet_lock_done(ifp
);
7173 lck_mtx_lock(&ifp
->if_cached_route_lock
);
7174 /* Enable forwarding cached route */
7175 ifp
->if_fwd_cacheok
= 1;
7176 /* Clean up any existing cached routes */
7177 ROUTE_RELEASE(&ifp
->if_fwd_route
);
7178 bzero(&ifp
->if_fwd_route
, sizeof(ifp
->if_fwd_route
));
7179 ROUTE_RELEASE(&ifp
->if_src_route
);
7180 bzero(&ifp
->if_src_route
, sizeof(ifp
->if_src_route
));
7181 ROUTE_RELEASE(&ifp
->if_src_route6
);
7182 bzero(&ifp
->if_src_route6
, sizeof(ifp
->if_src_route6
));
7183 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7185 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
7188 * Allocate and attach IGMPv3/MLDv2 interface specific variables
7189 * and trees; do this before the ifnet is marked as attached.
7190 * The ifnet keeps the reference to the info structures even after
7191 * the ifnet is detached, since the network-layer records still
7192 * refer to the info structures even after that. This also
7193 * makes it possible for them to still function after the ifnet
7194 * is recycled or reattached.
7197 if (IGMP_IFINFO(ifp
) == NULL
) {
7198 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, M_WAITOK
);
7199 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
7201 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
7202 igmp_domifreattach(IGMP_IFINFO(ifp
));
7206 if (MLD_IFINFO(ifp
) == NULL
) {
7207 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, M_WAITOK
);
7208 VERIFY(MLD_IFINFO(ifp
) != NULL
);
7210 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
7211 mld_domifreattach(MLD_IFINFO(ifp
));
7215 VERIFY(ifp
->if_data_threshold
== 0);
7216 VERIFY(ifp
->if_dt_tcall
!= NULL
);
7219 * Wait for the created kernel threads for I/O to get
7220 * scheduled and run at least once before we proceed
7221 * to mark interface as attached.
7223 lck_mtx_lock(&ifp
->if_ref_lock
);
7224 while (ifp
->if_threads_pending
!= 0) {
7225 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
7226 "interface %s to get scheduled at least once.\n",
7227 __func__
, ifp
->if_xname
);
7228 (void) msleep(&ifp
->if_threads_pending
, &ifp
->if_ref_lock
, (PZERO
- 1),
7230 LCK_MTX_ASSERT(&ifp
->if_ref_lock
, LCK_ASSERT_OWNED
);
7232 lck_mtx_unlock(&ifp
->if_ref_lock
);
7233 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
7234 "at least once. Proceeding.\n", __func__
, ifp
->if_xname
);
7236 /* Final mark this ifnet as attached. */
7237 lck_mtx_lock(rnh_lock
);
7238 ifnet_lock_exclusive(ifp
);
7239 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7240 ifp
->if_refflags
= (IFRF_ATTACHED
| IFRF_READY
); /* clears embryonic */
7241 lck_mtx_unlock(&ifp
->if_ref_lock
);
7243 /* boot-args override; enable idle notification */
7244 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
7247 /* apply previous request(s) to set the idle flags, if any */
7248 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
7249 ifp
->if_idle_new_flags_mask
);
7251 ifnet_lock_done(ifp
);
7252 lck_mtx_unlock(rnh_lock
);
7257 * Attach packet filter to this interface, if enabled.
7259 pf_ifnet_hook(ifp
, 1);
7262 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
7265 DLIL_PRINTF("%s: attached%s\n", if_name(ifp
),
7266 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
7273 * Prepare the storage for the first/permanent link address, which must
7274 * must have the same lifetime as the ifnet itself. Although the link
7275 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
7276 * its location in memory must never change as it may still be referred
7277 * to by some parts of the system afterwards (unfortunate implementation
7278 * artifacts inherited from BSD.)
7280 * Caller must hold ifnet lock as writer.
7282 static struct ifaddr
*
7283 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
7285 struct ifaddr
*ifa
, *oifa
;
7286 struct sockaddr_dl
*asdl
, *msdl
;
7287 char workbuf
[IFNAMSIZ
* 2];
7288 int namelen
, masklen
, socksize
;
7289 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
7291 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
7292 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
7294 namelen
= scnprintf(workbuf
, sizeof(workbuf
), "%s",
7296 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0])
7297 + ((namelen
> 0) ? namelen
: 0);
7298 socksize
= masklen
+ ifp
->if_addrlen
;
7299 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
7300 if ((u_int32_t
)socksize
< sizeof(struct sockaddr_dl
)) {
7301 socksize
= sizeof(struct sockaddr_dl
);
7303 socksize
= ROUNDUP(socksize
);
7306 ifa
= ifp
->if_lladdr
;
7307 if (socksize
> DLIL_SDLMAXLEN
||
7308 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
7310 * Rare, but in the event that the link address requires
7311 * more storage space than DLIL_SDLMAXLEN, allocate the
7312 * largest possible storages for address and mask, such
7313 * that we can reuse the same space when if_addrlen grows.
7314 * This same space will be used when if_addrlen shrinks.
7316 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
7317 int ifasize
= sizeof(*ifa
) + 2 * SOCK_MAXADDRLEN
;
7318 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
7323 /* Don't set IFD_ALLOC, as this is permanent */
7324 ifa
->ifa_debug
= IFD_LINK
;
7327 /* address and mask sockaddr_dl locations */
7328 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
7329 bzero(asdl
, SOCK_MAXADDRLEN
);
7330 msdl
= (struct sockaddr_dl
*)(void *)
7331 ((char *)asdl
+ SOCK_MAXADDRLEN
);
7332 bzero(msdl
, SOCK_MAXADDRLEN
);
7334 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
7336 * Use the storage areas for address and mask within the
7337 * dlil_ifnet structure. This is the most common case.
7340 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
7342 /* Don't set IFD_ALLOC, as this is permanent */
7343 ifa
->ifa_debug
= IFD_LINK
;
7346 /* address and mask sockaddr_dl locations */
7347 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
7348 bzero(asdl
, sizeof(dl_if
->dl_if_lladdr
.asdl
));
7349 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
7350 bzero(msdl
, sizeof(dl_if
->dl_if_lladdr
.msdl
));
7353 /* hold a permanent reference for the ifnet itself */
7354 IFA_ADDREF_LOCKED(ifa
);
7355 oifa
= ifp
->if_lladdr
;
7356 ifp
->if_lladdr
= ifa
;
7358 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
7360 ifa
->ifa_rtrequest
= link_rtrequest
;
7361 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
7362 asdl
->sdl_len
= socksize
;
7363 asdl
->sdl_family
= AF_LINK
;
7365 bcopy(workbuf
, asdl
->sdl_data
, min(namelen
,
7366 sizeof(asdl
->sdl_data
)));
7367 asdl
->sdl_nlen
= namelen
;
7371 asdl
->sdl_index
= ifp
->if_index
;
7372 asdl
->sdl_type
= ifp
->if_type
;
7373 if (ll_addr
!= NULL
) {
7374 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
7375 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
7379 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
7380 msdl
->sdl_len
= masklen
;
7381 while (namelen
> 0) {
7382 msdl
->sdl_data
[--namelen
] = 0xff;
7394 if_purgeaddrs(struct ifnet
*ifp
)
7400 in6_purgeaddrs(ifp
);
7405 ifnet_detach(ifnet_t ifp
)
7407 struct ifnet
*delegated_ifp
;
7408 struct nd_ifinfo
*ndi
= NULL
;
7414 ndi
= ND_IFINFO(ifp
);
7416 ndi
->cga_initialized
= FALSE
;
7419 lck_mtx_lock(rnh_lock
);
7420 ifnet_head_lock_exclusive();
7421 ifnet_lock_exclusive(ifp
);
7423 if (ifp
->if_output_netem
!= NULL
) {
7424 netem_destroy(ifp
->if_output_netem
);
7425 ifp
->if_output_netem
= NULL
;
7429 * Check to see if this interface has previously triggered
7430 * aggressive protocol draining; if so, decrement the global
7431 * refcnt and clear PR_AGGDRAIN on the route domain if
7432 * there are no more of such an interface around.
7434 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
7436 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7437 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
7438 lck_mtx_unlock(&ifp
->if_ref_lock
);
7439 ifnet_lock_done(ifp
);
7441 lck_mtx_unlock(rnh_lock
);
7443 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
7444 /* Interface has already been detached */
7445 lck_mtx_unlock(&ifp
->if_ref_lock
);
7446 ifnet_lock_done(ifp
);
7448 lck_mtx_unlock(rnh_lock
);
7451 VERIFY(!(ifp
->if_refflags
& IFRF_EMBRYONIC
));
7452 /* Indicate this interface is being detached */
7453 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
7454 ifp
->if_refflags
|= IFRF_DETACHING
;
7455 lck_mtx_unlock(&ifp
->if_ref_lock
);
7458 DLIL_PRINTF("%s: detaching\n", if_name(ifp
));
7461 /* clean up flow control entry object if there's any */
7462 if (ifp
->if_eflags
& IFEF_TXSTART
) {
7463 ifnet_flowadv(ifp
->if_flowhash
);
7466 /* Reset ECN enable/disable flags */
7467 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
7468 ifp
->if_eflags
&= ~IFEF_ECN_ENABLE
;
7470 /* Reset CLAT46 flag */
7471 ifp
->if_eflags
&= ~IFEF_CLAT46
;
7474 * We do not reset the TCP keep alive counters in case
7475 * a TCP connection stays connection after the interface
7478 if (ifp
->if_tcp_kao_cnt
> 0) {
7479 os_log(OS_LOG_DEFAULT
, "%s %s tcp_kao_cnt %u not zero",
7480 __func__
, if_name(ifp
), ifp
->if_tcp_kao_cnt
);
7482 ifp
->if_tcp_kao_max
= 0;
7485 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
7486 * no longer be visible during lookups from this point.
7488 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
7489 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
7490 ifp
->if_link
.tqe_next
= NULL
;
7491 ifp
->if_link
.tqe_prev
= NULL
;
7492 if (ifp
->if_ordered_link
.tqe_next
!= NULL
||
7493 ifp
->if_ordered_link
.tqe_prev
!= NULL
) {
7494 ifnet_remove_from_ordered_list(ifp
);
7496 ifindex2ifnet
[ifp
->if_index
] = NULL
;
7498 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
7499 ifp
->if_eflags
&= ~(IFEF_IPV4_ROUTER
| IFEF_IPV6_ROUTER
);
7501 /* Record detach PC stacktrace */
7502 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
7504 /* Clear logging parameters */
7505 bzero(&ifp
->if_log
, sizeof(ifp
->if_log
));
7507 /* Clear delegated interface info (reference released below) */
7508 delegated_ifp
= ifp
->if_delegated
.ifp
;
7509 bzero(&ifp
->if_delegated
, sizeof(ifp
->if_delegated
));
7511 /* Reset interface state */
7512 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
7514 ifnet_lock_done(ifp
);
7516 lck_mtx_unlock(rnh_lock
);
7519 /* Release reference held on the delegated interface */
7520 if (delegated_ifp
!= NULL
) {
7521 ifnet_release(delegated_ifp
);
7524 /* Reset Link Quality Metric (unless loopback [lo0]) */
7525 if (ifp
!= lo_ifp
) {
7526 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
, 0);
7529 /* Reset TCP local statistics */
7530 if (ifp
->if_tcp_stat
!= NULL
) {
7531 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
7534 /* Reset UDP local statistics */
7535 if (ifp
->if_udp_stat
!= NULL
) {
7536 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
7539 /* Reset ifnet IPv4 stats */
7540 if (ifp
->if_ipv4_stat
!= NULL
) {
7541 bzero(ifp
->if_ipv4_stat
, sizeof(*ifp
->if_ipv4_stat
));
7544 /* Reset ifnet IPv6 stats */
7545 if (ifp
->if_ipv6_stat
!= NULL
) {
7546 bzero(ifp
->if_ipv6_stat
, sizeof(*ifp
->if_ipv6_stat
));
7549 /* Release memory held for interface link status report */
7550 if (ifp
->if_link_status
!= NULL
) {
7551 FREE(ifp
->if_link_status
, M_TEMP
);
7552 ifp
->if_link_status
= NULL
;
7555 /* Clear agent IDs */
7556 if (ifp
->if_agentids
!= NULL
) {
7557 FREE(ifp
->if_agentids
, M_NETAGENT
);
7558 ifp
->if_agentids
= NULL
;
7560 ifp
->if_agentcount
= 0;
7563 /* Let BPF know we're detaching */
7566 /* Mark the interface as DOWN */
7569 /* Disable forwarding cached route */
7570 lck_mtx_lock(&ifp
->if_cached_route_lock
);
7571 ifp
->if_fwd_cacheok
= 0;
7572 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7574 /* Disable data threshold and wait for any pending event posting */
7575 ifp
->if_data_threshold
= 0;
7576 VERIFY(ifp
->if_dt_tcall
!= NULL
);
7577 (void) thread_call_cancel_wait(ifp
->if_dt_tcall
);
7580 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
7581 * references to the info structures and leave them attached to
7585 igmp_domifdetach(ifp
);
7588 mld_domifdetach(ifp
);
7591 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
7593 /* Let worker thread take care of the rest, to avoid reentrancy */
7595 ifnet_detaching_enqueue(ifp
);
7602 ifnet_detaching_enqueue(struct ifnet
*ifp
)
7604 dlil_if_lock_assert();
7606 ++ifnet_detaching_cnt
;
7607 VERIFY(ifnet_detaching_cnt
!= 0);
7608 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
7609 wakeup((caddr_t
)&ifnet_delayed_run
);
7612 static struct ifnet
*
7613 ifnet_detaching_dequeue(void)
7617 dlil_if_lock_assert();
7619 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
7620 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
7622 VERIFY(ifnet_detaching_cnt
!= 0);
7623 --ifnet_detaching_cnt
;
7624 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
7625 ifp
->if_detaching_link
.tqe_next
= NULL
;
7626 ifp
->if_detaching_link
.tqe_prev
= NULL
;
7632 ifnet_detacher_thread_cont(int err
)
7638 dlil_if_lock_assert();
7639 while (ifnet_detaching_cnt
== 0) {
7640 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
7641 (PZERO
- 1), "ifnet_detacher_cont", 0,
7642 ifnet_detacher_thread_cont
);
7646 net_update_uptime();
7648 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
7650 /* Take care of detaching ifnet */
7651 ifp
= ifnet_detaching_dequeue();
7654 ifnet_detach_final(ifp
);
7662 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
7664 #pragma unused(v, w)
7665 dlil_decr_pending_thread_count();
7667 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
7668 (PZERO
- 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont
);
7670 * msleep0() shouldn't have returned as PCATCH was not set;
7671 * therefore assert in this case.
7678 ifnet_detach_final(struct ifnet
*ifp
)
7680 struct ifnet_filter
*filter
, *filter_next
;
7681 struct ifnet_filter_head fhead
;
7682 struct dlil_threading_info
*inp
;
7684 ifnet_detached_func if_free
;
7687 lck_mtx_lock(&ifp
->if_ref_lock
);
7688 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
7689 panic("%s: flags mismatch (detaching not set) ifp=%p",
7695 * Wait until the existing IO references get released
7696 * before we proceed with ifnet_detach. This is not a
7697 * common case, so block without using a continuation.
7699 while (ifp
->if_refio
> 0) {
7700 DLIL_PRINTF("%s: Waiting for IO references on %s interface "
7701 "to be released\n", __func__
, if_name(ifp
));
7702 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
7703 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
7706 VERIFY(ifp
->if_datamov
== 0);
7707 VERIFY(ifp
->if_drainers
== 0);
7708 VERIFY(ifp
->if_suspend
== 0);
7709 ifp
->if_refflags
&= ~IFRF_READY
;
7710 lck_mtx_unlock(&ifp
->if_ref_lock
);
7712 /* Drain and destroy send queue */
7713 ifclassq_teardown(ifp
);
7715 /* Detach interface filters */
7716 lck_mtx_lock(&ifp
->if_flt_lock
);
7717 if_flt_monitor_enter(ifp
);
7719 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
7720 fhead
= ifp
->if_flt_head
;
7721 TAILQ_INIT(&ifp
->if_flt_head
);
7723 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
7724 filter_next
= TAILQ_NEXT(filter
, filt_next
);
7725 lck_mtx_unlock(&ifp
->if_flt_lock
);
7727 dlil_detach_filter_internal(filter
, 1);
7728 lck_mtx_lock(&ifp
->if_flt_lock
);
7730 if_flt_monitor_leave(ifp
);
7731 lck_mtx_unlock(&ifp
->if_flt_lock
);
7733 /* Tell upper layers to drop their network addresses */
7736 ifnet_lock_exclusive(ifp
);
7738 /* Uplumb all protocols */
7739 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
7740 struct if_proto
*proto
;
7742 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
7743 while (proto
!= NULL
) {
7744 protocol_family_t family
= proto
->protocol_family
;
7745 ifnet_lock_done(ifp
);
7746 proto_unplumb(family
, ifp
);
7747 ifnet_lock_exclusive(ifp
);
7748 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
7750 /* There should not be any protocols left */
7751 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
7753 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
7754 ifp
->if_proto_hash
= NULL
;
7756 /* Detach (permanent) link address from if_addrhead */
7757 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
7758 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
7760 if_detach_link_ifa(ifp
, ifa
);
7763 /* Remove (permanent) link address from ifnet_addrs[] */
7765 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
7767 /* This interface should not be on {ifnet_head,detaching} */
7768 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
7769 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
7770 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
7771 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
7772 VERIFY(ifp
->if_ordered_link
.tqe_next
== NULL
);
7773 VERIFY(ifp
->if_ordered_link
.tqe_prev
== NULL
);
7775 /* The slot should have been emptied */
7776 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
7778 /* There should not be any addresses left */
7779 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
7782 * Signal the starter thread to terminate itself.
7784 if (ifp
->if_start_thread
!= THREAD_NULL
) {
7785 lck_mtx_lock_spin(&ifp
->if_start_lock
);
7786 ifp
->if_start_flags
= 0;
7787 ifp
->if_start_thread
= THREAD_NULL
;
7788 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
7789 lck_mtx_unlock(&ifp
->if_start_lock
);
7793 * Signal the poller thread to terminate itself.
7795 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
7796 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
7797 ifp
->if_poll_thread
= THREAD_NULL
;
7798 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
7799 lck_mtx_unlock(&ifp
->if_poll_lock
);
7803 * If thread affinity was set for the workloop thread, we will need
7804 * to tear down the affinity and release the extra reference count
7805 * taken at attach time. Does not apply to lo0 or other interfaces
7806 * without dedicated input threads.
7808 if ((inp
= ifp
->if_inp
) != NULL
) {
7809 VERIFY(inp
!= dlil_main_input_thread
);
7811 if (inp
->net_affinity
) {
7812 struct thread
*tp
, *wtp
, *ptp
;
7814 lck_mtx_lock_spin(&inp
->input_lck
);
7815 wtp
= inp
->wloop_thr
;
7816 inp
->wloop_thr
= THREAD_NULL
;
7817 ptp
= inp
->poll_thr
;
7818 inp
->poll_thr
= THREAD_NULL
;
7819 tp
= inp
->input_thr
; /* don't nullify now */
7821 inp
->net_affinity
= FALSE
;
7822 lck_mtx_unlock(&inp
->input_lck
);
7824 /* Tear down poll thread affinity */
7826 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
7827 VERIFY(ifp
->if_xflags
& IFXF_LEGACY
);
7828 (void) dlil_affinity_set(ptp
,
7829 THREAD_AFFINITY_TAG_NULL
);
7830 thread_deallocate(ptp
);
7833 /* Tear down workloop thread affinity */
7835 (void) dlil_affinity_set(wtp
,
7836 THREAD_AFFINITY_TAG_NULL
);
7837 thread_deallocate(wtp
);
7840 /* Tear down DLIL input thread affinity */
7841 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
7842 thread_deallocate(tp
);
7845 /* disassociate ifp DLIL input thread */
7848 /* tell the input thread to terminate */
7849 lck_mtx_lock_spin(&inp
->input_lck
);
7850 inp
->input_waiting
|= DLIL_INPUT_TERMINATE
;
7851 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
7852 wakeup_one((caddr_t
)&inp
->input_waiting
);
7854 lck_mtx_unlock(&inp
->input_lck
);
7855 ifnet_lock_done(ifp
);
7857 /* wait for the input thread to terminate */
7858 lck_mtx_lock_spin(&inp
->input_lck
);
7859 while ((inp
->input_waiting
& DLIL_INPUT_TERMINATE_COMPLETE
)
7861 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
7862 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
7864 lck_mtx_unlock(&inp
->input_lck
);
7865 ifnet_lock_exclusive(ifp
);
7867 /* clean-up input thread state */
7868 dlil_clean_threading_info(inp
);
7869 /* clean-up poll parameters */
7870 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
7871 dlil_reset_rxpoll_params(ifp
);
7874 /* The driver might unload, so point these to ourselves */
7875 if_free
= ifp
->if_free
;
7876 ifp
->if_output_dlil
= ifp_if_output
;
7877 ifp
->if_output
= ifp_if_output
;
7878 ifp
->if_pre_enqueue
= ifp_if_output
;
7879 ifp
->if_start
= ifp_if_start
;
7880 ifp
->if_output_ctl
= ifp_if_ctl
;
7881 ifp
->if_input_dlil
= ifp_if_input
;
7882 ifp
->if_input_poll
= ifp_if_input_poll
;
7883 ifp
->if_input_ctl
= ifp_if_ctl
;
7884 ifp
->if_ioctl
= ifp_if_ioctl
;
7885 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
7886 ifp
->if_free
= ifp_if_free
;
7887 ifp
->if_demux
= ifp_if_demux
;
7888 ifp
->if_event
= ifp_if_event
;
7889 ifp
->if_framer_legacy
= ifp_if_framer
;
7890 ifp
->if_framer
= ifp_if_framer_extended
;
7891 ifp
->if_add_proto
= ifp_if_add_proto
;
7892 ifp
->if_del_proto
= ifp_if_del_proto
;
7893 ifp
->if_check_multi
= ifp_if_check_multi
;
7895 /* wipe out interface description */
7896 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
7897 ifp
->if_desc
.ifd_len
= 0;
7898 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
7899 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
7901 /* there shouldn't be any delegation by now */
7902 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
7903 VERIFY(ifp
->if_delegated
.type
== 0);
7904 VERIFY(ifp
->if_delegated
.family
== 0);
7905 VERIFY(ifp
->if_delegated
.subfamily
== 0);
7906 VERIFY(ifp
->if_delegated
.expensive
== 0);
7907 VERIFY(ifp
->if_delegated
.constrained
== 0);
7909 /* QoS marking get cleared */
7910 ifp
->if_eflags
&= ~IFEF_QOSMARKING_ENABLED
;
7911 if_set_qosmarking_mode(ifp
, IFRTYPE_QOSMARKING_MODE_NONE
);
7914 ifnet_lock_done(ifp
);
7918 * Detach this interface from packet filter, if enabled.
7920 pf_ifnet_hook(ifp
, 0);
7923 /* Filter list should be empty */
7924 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
7925 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
7926 VERIFY(ifp
->if_flt_busy
== 0);
7927 VERIFY(ifp
->if_flt_waiters
== 0);
7928 lck_mtx_unlock(&ifp
->if_flt_lock
);
7930 /* Last chance to drain send queue */
7933 /* Last chance to cleanup any cached route */
7934 lck_mtx_lock(&ifp
->if_cached_route_lock
);
7935 VERIFY(!ifp
->if_fwd_cacheok
);
7936 ROUTE_RELEASE(&ifp
->if_fwd_route
);
7937 bzero(&ifp
->if_fwd_route
, sizeof(ifp
->if_fwd_route
));
7938 ROUTE_RELEASE(&ifp
->if_src_route
);
7939 bzero(&ifp
->if_src_route
, sizeof(ifp
->if_src_route
));
7940 ROUTE_RELEASE(&ifp
->if_src_route6
);
7941 bzero(&ifp
->if_src_route6
, sizeof(ifp
->if_src_route6
));
7942 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7944 VERIFY(ifp
->if_data_threshold
== 0);
7945 VERIFY(ifp
->if_dt_tcall
!= NULL
);
7946 VERIFY(!thread_call_isactive(ifp
->if_dt_tcall
));
7948 ifnet_llreach_ifdetach(ifp
);
7950 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
7953 * Finally, mark this ifnet as detached.
7955 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7956 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
7957 panic("%s: flags mismatch (detaching not set) ifp=%p",
7961 ifp
->if_refflags
&= ~IFRF_DETACHING
;
7962 lck_mtx_unlock(&ifp
->if_ref_lock
);
7963 if (if_free
!= NULL
) {
7968 DLIL_PRINTF("%s: detached\n", if_name(ifp
));
7971 /* Release reference held during ifnet attach */
7976 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
7984 ifp_if_start(struct ifnet
*ifp
)
7990 ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
7991 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
7992 boolean_t poll
, struct thread
*tp
)
7994 #pragma unused(ifp, m_tail, s, poll, tp)
7995 m_freem_list(m_head
);
8000 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
8001 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
8003 #pragma unused(ifp, flags, max_cnt)
8004 if (m_head
!= NULL
) {
8007 if (m_tail
!= NULL
) {
8019 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
8021 #pragma unused(ifp, cmd, arglen, arg)
8026 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
8028 #pragma unused(ifp, fh, pf)
8034 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
8035 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
8037 #pragma unused(ifp, pf, da, dc)
8042 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
8044 #pragma unused(ifp, pf)
8049 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
8051 #pragma unused(ifp, sa)
8057 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
8058 const struct sockaddr
*sa
, const char *ll
, const char *t
,
8059 u_int32_t
*pre
, u_int32_t
*post
)
8062 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
8063 const struct sockaddr
*sa
, const char *ll
, const char *t
)
8064 #endif /* !CONFIG_EMBEDDED */
8066 #pragma unused(ifp, m, sa, ll, t)
8068 return ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, pre
, post
);
8070 return ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
);
8071 #endif /* !CONFIG_EMBEDDED */
8075 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
8076 const struct sockaddr
*sa
, const char *ll
, const char *t
,
8077 u_int32_t
*pre
, u_int32_t
*post
)
8079 #pragma unused(ifp, sa, ll, t)
8094 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
8096 #pragma unused(ifp, cmd, arg)
8101 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
8103 #pragma unused(ifp, tm, f)
8104 /* XXX not sure what to do here */
8109 ifp_if_free(struct ifnet
*ifp
)
8115 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
8117 #pragma unused(ifp, e)
8121 dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
8122 size_t uniqueid_len
, const char *ifxname
, struct ifnet
**ifp
)
8124 struct ifnet
*ifp1
= NULL
;
8125 struct dlil_ifnet
*dlifp1
= NULL
;
8126 struct dlil_ifnet
*dlifp1_saved
= NULL
;
8127 void *buf
, *base
, **pbuf
;
8130 VERIFY(*ifp
== NULL
);
8133 * We absolutely can't have an interface with the same name
8135 * To make sure of that list has to be traversed completely
8137 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
8138 ifp1
= (struct ifnet
*)dlifp1
;
8140 if (ifp1
->if_family
!= family
) {
8145 * If interface is in use, return EBUSY if either unique id
8146 * or interface extended names are the same
8148 lck_mtx_lock(&dlifp1
->dl_if_lock
);
8149 if (strncmp(ifxname
, ifp1
->if_xname
, IFXNAMSIZ
) == 0) {
8150 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
8151 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
8158 if (uniqueid_len
== dlifp1
->dl_if_uniqueid_len
&&
8159 bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
) == 0) {
8160 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
8161 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
8165 /* Cache the first interface that can be recycled */
8168 dlifp1_saved
= dlifp1
;
8171 * XXX Do not break or jump to end as we have to traverse
8172 * the whole list to ensure there are no name collisions
8177 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
8180 /* If there's an interface that can be recycled, use that */
8182 if (dlifp1_saved
!= NULL
) {
8183 lck_mtx_lock(&dlifp1_saved
->dl_if_lock
);
8184 dlifp1_saved
->dl_if_flags
|= (DLIF_INUSE
| DLIF_REUSE
);
8185 lck_mtx_unlock(&dlifp1_saved
->dl_if_lock
);
8186 dlifp1_saved
= NULL
;
8191 /* no interface found, allocate a new one */
8192 buf
= zalloc(dlif_zone
);
8197 bzero(buf
, dlif_bufsize
);
8199 /* Get the 64-bit aligned base address for this object */
8200 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
8202 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
8205 * Wind back a pointer size from the aligned base and
8206 * save the original address so we can free it later.
8208 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
8213 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
8215 if (dlifp1
->dl_if_uniqueid
== NULL
) {
8216 zfree(dlif_zone
, buf
);
8220 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
8221 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
8224 ifp1
= (struct ifnet
*)dlifp1
;
8225 dlifp1
->dl_if_flags
= DLIF_INUSE
;
8227 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
8228 dlifp1
->dl_if_trace
= dlil_if_trace
;
8230 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
8231 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
8233 /* initialize interface description */
8234 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
8235 ifp1
->if_desc
.ifd_len
= 0;
8236 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
8240 mac_ifnet_label_init(ifp1
);
8243 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
8244 DLIL_PRINTF("%s: failed to allocate if local stats, "
8245 "error: %d\n", __func__
, ret
);
8246 /* This probably shouldn't be fatal */
8250 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8251 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8252 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8253 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8254 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
8256 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8258 lck_rw_init(&ifp1
->if_inetdata_lock
, ifnet_lock_group
,
8260 ifp1
->if_inetdata
= NULL
;
8263 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
,
8265 ifp1
->if_inet6data
= NULL
;
8267 lck_rw_init(&ifp1
->if_link_status_lock
, ifnet_lock_group
,
8269 ifp1
->if_link_status
= NULL
;
8271 /* for send data paths */
8272 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
8274 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
8276 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
8279 /* for receive data paths */
8280 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
8283 /* thread call allocation is done with sleeping zalloc */
8284 ifp1
->if_dt_tcall
= thread_call_allocate_with_options(dlil_dt_tcall_fn
,
8285 ifp1
, THREAD_CALL_PRIORITY_KERNEL
, THREAD_CALL_OPTIONS_ONCE
);
8286 if (ifp1
->if_dt_tcall
== NULL
) {
8287 panic_plain("%s: couldn't create if_dt_tcall", __func__
);
8291 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
8298 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof(u_int64_t
)) &&
8299 IS_P2ALIGNED(&ifp1
->if_data
, sizeof(u_int64_t
))));
8304 __private_extern__
void
8305 dlil_if_release(ifnet_t ifp
)
8307 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
8309 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_count
) > 0);
8310 if (!(ifp
->if_xflags
& IFXF_ALLOC_KPI
)) {
8311 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_os_count
) > 0);
8314 ifnet_lock_exclusive(ifp
);
8315 lck_mtx_lock(&dlifp
->dl_if_lock
);
8316 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
8317 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
8318 ifp
->if_name
= dlifp
->dl_if_namestorage
;
8319 /* Reset external name (name + unit) */
8320 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
8321 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
8322 "%s?", ifp
->if_name
);
8323 lck_mtx_unlock(&dlifp
->dl_if_lock
);
8326 * We can either recycle the MAC label here or in dlil_if_acquire().
8327 * It seems logical to do it here but this means that anything that
8328 * still has a handle on ifp will now see it as unlabeled.
8329 * Since the interface is "dead" that may be OK. Revisit later.
8331 mac_ifnet_label_recycle(ifp
);
8333 ifnet_lock_done(ifp
);
8336 __private_extern__
void
8339 lck_mtx_lock(&dlil_ifnet_lock
);
8342 __private_extern__
void
8343 dlil_if_unlock(void)
8345 lck_mtx_unlock(&dlil_ifnet_lock
);
8348 __private_extern__
void
8349 dlil_if_lock_assert(void)
8351 LCK_MTX_ASSERT(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
8354 __private_extern__
void
8355 dlil_proto_unplumb_all(struct ifnet
*ifp
)
8358 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
8359 * each bucket contains exactly one entry; PF_VLAN does not need an
8362 * if_proto_hash[3] is for other protocols; we expect anything
8363 * in this bucket to respond to the DETACHING event (which would
8364 * have happened by now) and do the unplumb then.
8366 (void) proto_unplumb(PF_INET
, ifp
);
8368 (void) proto_unplumb(PF_INET6
, ifp
);
8373 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
8375 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8376 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8378 route_copyout(dst
, &ifp
->if_src_route
, sizeof(*dst
));
8380 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8384 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
8386 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8387 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8389 if (ifp
->if_fwd_cacheok
) {
8390 route_copyin(src
, &ifp
->if_src_route
, sizeof(*src
));
8394 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8399 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
8401 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8402 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8404 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
8407 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8411 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
8413 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8414 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8416 if (ifp
->if_fwd_cacheok
) {
8417 route_copyin((struct route
*)src
,
8418 (struct route
*)&ifp
->if_src_route6
, sizeof(*src
));
8422 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8427 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
8429 struct route src_rt
;
8430 struct sockaddr_in
*dst
;
8432 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
8434 ifp_src_route_copyout(ifp
, &src_rt
);
8436 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
8437 ROUTE_RELEASE(&src_rt
);
8438 if (dst
->sin_family
!= AF_INET
) {
8439 bzero(&src_rt
.ro_dst
, sizeof(src_rt
.ro_dst
));
8440 dst
->sin_len
= sizeof(src_rt
.ro_dst
);
8441 dst
->sin_family
= AF_INET
;
8443 dst
->sin_addr
= src_ip
;
8445 VERIFY(src_rt
.ro_rt
== NULL
);
8446 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
8447 0, 0, ifp
->if_index
);
8449 if (src_rt
.ro_rt
!= NULL
) {
8450 /* retain a ref, copyin consumes one */
8451 struct rtentry
*rte
= src_rt
.ro_rt
;
8453 ifp_src_route_copyin(ifp
, &src_rt
);
8458 return src_rt
.ro_rt
;
8463 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
8465 struct route_in6 src_rt
;
8467 ifp_src_route6_copyout(ifp
, &src_rt
);
8469 if (ROUTE_UNUSABLE(&src_rt
) ||
8470 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
8471 ROUTE_RELEASE(&src_rt
);
8472 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
8473 bzero(&src_rt
.ro_dst
, sizeof(src_rt
.ro_dst
));
8474 src_rt
.ro_dst
.sin6_len
= sizeof(src_rt
.ro_dst
);
8475 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
8477 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
8478 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
8479 sizeof(src_rt
.ro_dst
.sin6_addr
));
8481 if (src_rt
.ro_rt
== NULL
) {
8482 src_rt
.ro_rt
= rtalloc1_scoped(
8483 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
8486 if (src_rt
.ro_rt
!= NULL
) {
8487 /* retain a ref, copyin consumes one */
8488 struct rtentry
*rte
= src_rt
.ro_rt
;
8490 ifp_src_route6_copyin(ifp
, &src_rt
);
8496 return src_rt
.ro_rt
;
8501 if_lqm_update(struct ifnet
*ifp
, int lqm
, int locked
)
8503 struct kev_dl_link_quality_metric_data ev_lqm_data
;
8505 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
8507 /* Normalize to edge */
8508 if (lqm
>= 0 && lqm
<= IFNET_LQM_THRESH_ABORT
) {
8509 lqm
= IFNET_LQM_THRESH_ABORT
;
8510 atomic_bitset_32(&tcbinfo
.ipi_flags
,
8511 INPCBINFO_HANDLE_LQM_ABORT
);
8512 inpcb_timer_sched(&tcbinfo
, INPCB_TIMER_FAST
);
8513 } else if (lqm
> IFNET_LQM_THRESH_ABORT
&&
8514 lqm
<= IFNET_LQM_THRESH_MINIMALLY_VIABLE
) {
8515 lqm
= IFNET_LQM_THRESH_MINIMALLY_VIABLE
;
8516 } else if (lqm
> IFNET_LQM_THRESH_MINIMALLY_VIABLE
&&
8517 lqm
<= IFNET_LQM_THRESH_POOR
) {
8518 lqm
= IFNET_LQM_THRESH_POOR
;
8519 } else if (lqm
> IFNET_LQM_THRESH_POOR
&&
8520 lqm
<= IFNET_LQM_THRESH_GOOD
) {
8521 lqm
= IFNET_LQM_THRESH_GOOD
;
8525 * Take the lock if needed
8528 ifnet_lock_exclusive(ifp
);
8531 if (lqm
== ifp
->if_interface_state
.lqm_state
&&
8532 (ifp
->if_interface_state
.valid_bitmask
&
8533 IF_INTERFACE_STATE_LQM_STATE_VALID
)) {
8535 * Release the lock if was not held by the caller
8538 ifnet_lock_done(ifp
);
8540 return; /* nothing to update */
8542 ifp
->if_interface_state
.valid_bitmask
|=
8543 IF_INTERFACE_STATE_LQM_STATE_VALID
;
8544 ifp
->if_interface_state
.lqm_state
= lqm
;
8547 * Don't want to hold the lock when issuing kernel events
8549 ifnet_lock_done(ifp
);
8551 bzero(&ev_lqm_data
, sizeof(ev_lqm_data
));
8552 ev_lqm_data
.link_quality_metric
= lqm
;
8554 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
8555 (struct net_event_data
*)&ev_lqm_data
, sizeof(ev_lqm_data
));
8558 * Reacquire the lock for the caller
8561 ifnet_lock_exclusive(ifp
);
8566 if_rrc_state_update(struct ifnet
*ifp
, unsigned int rrc_state
)
8568 struct kev_dl_rrc_state kev
;
8570 if (rrc_state
== ifp
->if_interface_state
.rrc_state
&&
8571 (ifp
->if_interface_state
.valid_bitmask
&
8572 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
8576 ifp
->if_interface_state
.valid_bitmask
|=
8577 IF_INTERFACE_STATE_RRC_STATE_VALID
;
8579 ifp
->if_interface_state
.rrc_state
= rrc_state
;
8582 * Don't want to hold the lock when issuing kernel events
8584 ifnet_lock_done(ifp
);
8586 bzero(&kev
, sizeof(struct kev_dl_rrc_state
));
8587 kev
.rrc_state
= rrc_state
;
8589 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_RRC_STATE_CHANGED
,
8590 (struct net_event_data
*)&kev
, sizeof(struct kev_dl_rrc_state
));
8592 ifnet_lock_exclusive(ifp
);
8596 if_state_update(struct ifnet
*ifp
,
8597 struct if_interface_state
*if_interface_state
)
8599 u_short if_index_available
= 0;
8601 ifnet_lock_exclusive(ifp
);
8603 if ((ifp
->if_type
!= IFT_CELLULAR
) &&
8604 (if_interface_state
->valid_bitmask
&
8605 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
8606 ifnet_lock_done(ifp
);
8609 if ((if_interface_state
->valid_bitmask
&
8610 IF_INTERFACE_STATE_LQM_STATE_VALID
) &&
8611 (if_interface_state
->lqm_state
< IFNET_LQM_MIN
||
8612 if_interface_state
->lqm_state
> IFNET_LQM_MAX
)) {
8613 ifnet_lock_done(ifp
);
8616 if ((if_interface_state
->valid_bitmask
&
8617 IF_INTERFACE_STATE_RRC_STATE_VALID
) &&
8618 if_interface_state
->rrc_state
!=
8619 IF_INTERFACE_STATE_RRC_STATE_IDLE
&&
8620 if_interface_state
->rrc_state
!=
8621 IF_INTERFACE_STATE_RRC_STATE_CONNECTED
) {
8622 ifnet_lock_done(ifp
);
8626 if (if_interface_state
->valid_bitmask
&
8627 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
8628 if_lqm_update(ifp
, if_interface_state
->lqm_state
, 1);
8630 if (if_interface_state
->valid_bitmask
&
8631 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
8632 if_rrc_state_update(ifp
, if_interface_state
->rrc_state
);
8634 if (if_interface_state
->valid_bitmask
&
8635 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
8636 ifp
->if_interface_state
.valid_bitmask
|=
8637 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
8638 ifp
->if_interface_state
.interface_availability
=
8639 if_interface_state
->interface_availability
;
8641 if (ifp
->if_interface_state
.interface_availability
==
8642 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
) {
8643 os_log(OS_LOG_DEFAULT
, "%s: interface %s (%u) available\n",
8644 __func__
, if_name(ifp
), ifp
->if_index
);
8645 if_index_available
= ifp
->if_index
;
8647 os_log(OS_LOG_DEFAULT
, "%s: interface %s (%u) unavailable)\n",
8648 __func__
, if_name(ifp
), ifp
->if_index
);
8651 ifnet_lock_done(ifp
);
8654 * Check if the TCP connections going on this interface should be
8655 * forced to send probe packets instead of waiting for TCP timers
8656 * to fire. This is done on an explicit notification such as
8657 * SIOCSIFINTERFACESTATE which marks the interface as available.
8659 if (if_index_available
> 0) {
8660 tcp_interface_send_probe(if_index_available
);
8667 if_get_state(struct ifnet
*ifp
,
8668 struct if_interface_state
*if_interface_state
)
8670 ifnet_lock_shared(ifp
);
8672 if_interface_state
->valid_bitmask
= 0;
8674 if (ifp
->if_interface_state
.valid_bitmask
&
8675 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
8676 if_interface_state
->valid_bitmask
|=
8677 IF_INTERFACE_STATE_RRC_STATE_VALID
;
8678 if_interface_state
->rrc_state
=
8679 ifp
->if_interface_state
.rrc_state
;
8681 if (ifp
->if_interface_state
.valid_bitmask
&
8682 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
8683 if_interface_state
->valid_bitmask
|=
8684 IF_INTERFACE_STATE_LQM_STATE_VALID
;
8685 if_interface_state
->lqm_state
=
8686 ifp
->if_interface_state
.lqm_state
;
8688 if (ifp
->if_interface_state
.valid_bitmask
&
8689 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
8690 if_interface_state
->valid_bitmask
|=
8691 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
8692 if_interface_state
->interface_availability
=
8693 ifp
->if_interface_state
.interface_availability
;
8696 ifnet_lock_done(ifp
);
8700 if_probe_connectivity(struct ifnet
*ifp
, u_int32_t conn_probe
)
8702 ifnet_lock_exclusive(ifp
);
8703 if (conn_probe
> 1) {
8704 ifnet_lock_done(ifp
);
8707 if (conn_probe
== 0) {
8708 ifp
->if_eflags
&= ~IFEF_PROBE_CONNECTIVITY
;
8710 ifp
->if_eflags
|= IFEF_PROBE_CONNECTIVITY
;
8712 ifnet_lock_done(ifp
);
8715 necp_update_all_clients();
8718 tcp_probe_connectivity(ifp
, conn_probe
);
8724 get_ether_index(int * ret_other_index
)
8728 int other_en_index
= 0;
8729 int any_ether_index
= 0;
8730 short best_unit
= 0;
8732 *ret_other_index
= 0;
8733 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
8735 * find en0, or if not en0, the lowest unit en*, and if not
8736 * that, any ethernet
8738 ifnet_lock_shared(ifp
);
8739 if (strcmp(ifp
->if_name
, "en") == 0) {
8740 if (ifp
->if_unit
== 0) {
8741 /* found en0, we're done */
8742 en0_index
= ifp
->if_index
;
8743 ifnet_lock_done(ifp
);
8746 if (other_en_index
== 0 || ifp
->if_unit
< best_unit
) {
8747 other_en_index
= ifp
->if_index
;
8748 best_unit
= ifp
->if_unit
;
8750 } else if (ifp
->if_type
== IFT_ETHER
&& any_ether_index
== 0) {
8751 any_ether_index
= ifp
->if_index
;
8753 ifnet_lock_done(ifp
);
8755 if (en0_index
== 0) {
8756 if (other_en_index
!= 0) {
8757 *ret_other_index
= other_en_index
;
8758 } else if (any_ether_index
!= 0) {
8759 *ret_other_index
= any_ether_index
;
8766 uuid_get_ethernet(u_int8_t
*node
)
8768 static int en0_index
;
8770 int other_index
= 0;
8774 ifnet_head_lock_shared();
8775 if (en0_index
== 0 || ifindex2ifnet
[en0_index
] == NULL
) {
8776 en0_index
= get_ether_index(&other_index
);
8778 if (en0_index
!= 0) {
8779 the_index
= en0_index
;
8780 } else if (other_index
!= 0) {
8781 the_index
= other_index
;
8783 if (the_index
!= 0) {
8784 ifp
= ifindex2ifnet
[the_index
];
8785 VERIFY(ifp
!= NULL
);
8786 memcpy(node
, IF_LLADDR(ifp
), ETHER_ADDR_LEN
);
8796 sysctl_rxpoll SYSCTL_HANDLER_ARGS
8798 #pragma unused(arg1, arg2)
8804 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8805 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8809 if (net_rxpoll
== 0) {
8818 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
8820 #pragma unused(arg1, arg2)
8824 q
= if_rxpoll_mode_holdtime
;
8826 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
8827 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8831 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
) {
8832 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
8835 if_rxpoll_mode_holdtime
= q
;
8841 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
8843 #pragma unused(arg1, arg2)
8847 q
= if_rxpoll_sample_holdtime
;
8849 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
8850 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8854 if (q
< IF_RXPOLL_SAMPLETIME_MIN
) {
8855 q
= IF_RXPOLL_SAMPLETIME_MIN
;
8858 if_rxpoll_sample_holdtime
= q
;
8864 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
8866 #pragma unused(arg1, arg2)
8870 q
= if_rxpoll_interval_time
;
8872 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
8873 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8877 if (q
< IF_RXPOLL_INTERVALTIME_MIN
) {
8878 q
= IF_RXPOLL_INTERVALTIME_MIN
;
8881 if_rxpoll_interval_time
= q
;
8887 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
8889 #pragma unused(arg1, arg2)
8893 i
= if_sysctl_rxpoll_wlowat
;
8895 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8896 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8900 if (i
== 0 || i
>= if_sysctl_rxpoll_whiwat
) {
8904 if_sysctl_rxpoll_wlowat
= i
;
8909 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
8911 #pragma unused(arg1, arg2)
8915 i
= if_sysctl_rxpoll_whiwat
;
8917 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8918 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8922 if (i
<= if_sysctl_rxpoll_wlowat
) {
8926 if_sysctl_rxpoll_whiwat
= i
;
8931 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
8933 #pragma unused(arg1, arg2)
8938 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8939 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8943 if (i
< IF_SNDQ_MINLEN
) {
8952 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
8954 #pragma unused(arg1, arg2)
8959 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8960 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8964 if (i
< IF_RCVQ_MINLEN
) {
8973 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
8974 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
8976 struct kev_dl_node_presence kev
;
8977 struct sockaddr_dl
*sdl
;
8978 struct sockaddr_in6
*sin6
;
8983 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
8985 bzero(&kev
, sizeof(kev
));
8986 sin6
= &kev
.sin6_node_address
;
8987 sdl
= &kev
.sdl_node_address
;
8988 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
8990 kev
.link_quality_metric
= lqm
;
8991 kev
.node_proximity_metric
= npm
;
8992 bcopy(srvinfo
, kev
.node_service_info
, sizeof(kev
.node_service_info
));
8994 ret
= nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
8996 int err
= dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
8997 &kev
.link_data
, sizeof(kev
));
8999 log(LOG_ERR
, "%s: Post DL_NODE_PRESENCE failed with"
9000 "error %d\n", __func__
, err
);
9007 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
9009 struct kev_dl_node_absence kev
= {};
9010 struct sockaddr_in6
*kev_sin6
= NULL
;
9011 struct sockaddr_dl
*kev_sdl
= NULL
;
9013 VERIFY(ifp
!= NULL
);
9015 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
9017 kev_sin6
= &kev
.sin6_node_address
;
9018 kev_sdl
= &kev
.sdl_node_address
;
9020 if (sa
->sa_family
== AF_INET6
) {
9022 * If IPv6 address is given, get the link layer
9023 * address from what was cached in the neighbor cache
9025 VERIFY(sa
->sa_len
<= sizeof(*kev_sin6
));
9026 bcopy(sa
, kev_sin6
, sa
->sa_len
);
9027 nd6_alt_node_absent(ifp
, kev_sin6
, kev_sdl
);
9030 * If passed address is AF_LINK type, derive the address
9031 * based on the link address.
9033 nd6_alt_node_addr_decompose(ifp
, sa
, kev_sdl
, kev_sin6
);
9034 nd6_alt_node_absent(ifp
, kev_sin6
, NULL
);
9037 kev_sdl
->sdl_type
= ifp
->if_type
;
9038 kev_sdl
->sdl_index
= ifp
->if_index
;
9040 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
9041 &kev
.link_data
, sizeof(kev
));
9045 dlil_node_present_v2(struct ifnet
*ifp
, struct sockaddr
*sa
, struct sockaddr_dl
*sdl
,
9046 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
9048 struct kev_dl_node_presence kev
= {};
9049 struct sockaddr_dl
*kev_sdl
= NULL
;
9050 struct sockaddr_in6
*kev_sin6
= NULL
;
9053 VERIFY(ifp
!= NULL
);
9054 VERIFY(sa
!= NULL
&& sdl
!= NULL
);
9055 VERIFY(sa
->sa_family
== AF_INET6
&& sdl
->sdl_family
== AF_LINK
);
9057 kev_sin6
= &kev
.sin6_node_address
;
9058 kev_sdl
= &kev
.sdl_node_address
;
9060 VERIFY(sdl
->sdl_len
<= sizeof(*kev_sdl
));
9061 bcopy(sdl
, kev_sdl
, sdl
->sdl_len
);
9062 kev_sdl
->sdl_type
= ifp
->if_type
;
9063 kev_sdl
->sdl_index
= ifp
->if_index
;
9065 VERIFY(sa
->sa_len
<= sizeof(*kev_sin6
));
9066 bcopy(sa
, kev_sin6
, sa
->sa_len
);
9069 kev
.link_quality_metric
= lqm
;
9070 kev
.node_proximity_metric
= npm
;
9071 bcopy(srvinfo
, kev
.node_service_info
, sizeof(kev
.node_service_info
));
9073 ret
= nd6_alt_node_present(ifp
, SIN6(sa
), sdl
, rssi
, lqm
, npm
);
9075 int err
= dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
9076 &kev
.link_data
, sizeof(kev
));
9078 log(LOG_ERR
, "%s: Post DL_NODE_PRESENCE failed with",
9079 "error %d\n", __func__
, err
);
9086 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
9087 kauth_cred_t
*credp
)
9089 const u_int8_t
*bytes
;
9092 bytes
= CONST_LLADDR(sdl
);
9093 size
= sdl
->sdl_alen
;
9096 if (dlil_lladdr_ckreq
) {
9097 switch (sdl
->sdl_type
) {
9107 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
9108 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
9116 #pragma unused(credp)
9119 if (sizep
!= NULL
) {
9126 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
9127 u_int8_t info
[DLIL_MODARGLEN
])
9129 struct kev_dl_issues kev
;
9132 VERIFY(ifp
!= NULL
);
9133 VERIFY(modid
!= NULL
);
9134 _CASSERT(sizeof(kev
.modid
) == DLIL_MODIDLEN
);
9135 _CASSERT(sizeof(kev
.info
) == DLIL_MODARGLEN
);
9137 bzero(&kev
, sizeof(kev
));
9140 kev
.timestamp
= tv
.tv_sec
;
9141 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
9143 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
9146 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
9147 &kev
.link_data
, sizeof(kev
));
9151 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
9154 u_int32_t level
= IFNET_THROTTLE_OFF
;
9157 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
9159 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
9161 * XXX: Use priv_check_cred() instead of root check?
9163 if ((result
= proc_suser(p
)) != 0) {
9167 if (ifr
->ifr_opportunistic
.ifo_flags
==
9168 IFRIFOF_BLOCK_OPPORTUNISTIC
) {
9169 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
9170 } else if (ifr
->ifr_opportunistic
.ifo_flags
== 0) {
9171 level
= IFNET_THROTTLE_OFF
;
9177 result
= ifnet_set_throttle(ifp
, level
);
9179 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
9180 ifr
->ifr_opportunistic
.ifo_flags
= 0;
9181 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
9182 ifr
->ifr_opportunistic
.ifo_flags
|=
9183 IFRIFOF_BLOCK_OPPORTUNISTIC
;
9188 * Return the count of current opportunistic connections
9189 * over the interface.
9193 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
9194 INPCB_OPPORTUNISTIC_SETCMD
: 0;
9195 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
9196 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
9197 ifr
->ifr_opportunistic
.ifo_inuse
=
9198 udp_count_opportunistic(ifp
->if_index
, flags
) +
9199 tcp_count_opportunistic(ifp
->if_index
, flags
);
9202 if (result
== EALREADY
) {
9210 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
9212 struct ifclassq
*ifq
;
9215 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
9219 *level
= IFNET_THROTTLE_OFF
;
9223 /* Throttling works only for IFCQ, not ALTQ instances */
9224 if (IFCQ_IS_ENABLED(ifq
)) {
9225 IFCQ_GET_THROTTLE(ifq
, *level
, err
);
9233 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
9235 struct ifclassq
*ifq
;
9238 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
9245 case IFNET_THROTTLE_OFF
:
9246 case IFNET_THROTTLE_OPPORTUNISTIC
:
9253 if (IFCQ_IS_ENABLED(ifq
)) {
9254 IFCQ_SET_THROTTLE(ifq
, level
, err
);
9259 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp
),
9262 necp_update_all_clients();
9264 if (level
== IFNET_THROTTLE_OFF
) {
9273 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
9279 int level
, category
, subcategory
;
9281 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
9283 if (cmd
== SIOCSIFLOG
) {
9284 if ((result
= priv_check_cred(kauth_cred_get(),
9285 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0) {
9289 level
= ifr
->ifr_log
.ifl_level
;
9290 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
) {
9294 flags
= ifr
->ifr_log
.ifl_flags
;
9295 if ((flags
&= IFNET_LOGF_MASK
) == 0) {
9299 category
= ifr
->ifr_log
.ifl_category
;
9300 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
9303 result
= ifnet_set_log(ifp
, level
, flags
,
9304 category
, subcategory
);
9307 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
9310 ifr
->ifr_log
.ifl_level
= level
;
9311 ifr
->ifr_log
.ifl_flags
= flags
;
9312 ifr
->ifr_log
.ifl_category
= category
;
9313 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
9321 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
9322 int32_t category
, int32_t subcategory
)
9326 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
9327 VERIFY(flags
& IFNET_LOGF_MASK
);
9330 * The logging level applies to all facilities; make sure to
9331 * update them all with the most current level.
9333 flags
|= ifp
->if_log
.flags
;
9335 if (ifp
->if_output_ctl
!= NULL
) {
9336 struct ifnet_log_params l
;
9338 bzero(&l
, sizeof(l
));
9341 l
.flags
&= ~IFNET_LOGF_DLIL
;
9342 l
.category
= category
;
9343 l
.subcategory
= subcategory
;
9345 /* Send this request to lower layers */
9347 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
9350 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
9352 * If targeted to the lower layers without an output
9353 * control callback registered on the interface, just
9354 * silently ignore facilities other than ours.
9356 flags
&= IFNET_LOGF_DLIL
;
9357 if (flags
== 0 && (!(ifp
->if_log
.flags
& IFNET_LOGF_DLIL
))) {
9363 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
) {
9364 ifp
->if_log
.flags
= 0;
9366 ifp
->if_log
.flags
|= flags
;
9369 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
9370 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
9371 ifp
->if_log
.level
, ifp
->if_log
.flags
,
9372 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
9373 category
, subcategory
);
9380 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
9381 int32_t *category
, int32_t *subcategory
)
9383 if (level
!= NULL
) {
9384 *level
= ifp
->if_log
.level
;
9386 if (flags
!= NULL
) {
9387 *flags
= ifp
->if_log
.flags
;
9389 if (category
!= NULL
) {
9390 *category
= ifp
->if_log
.category
;
9392 if (subcategory
!= NULL
) {
9393 *subcategory
= ifp
->if_log
.subcategory
;
9400 ifnet_notify_address(struct ifnet
*ifp
, int af
)
9402 struct ifnet_notify_address_params na
;
9405 (void) pf_ifaddr_hook(ifp
);
9408 if (ifp
->if_output_ctl
== NULL
) {
9412 bzero(&na
, sizeof(na
));
9413 na
.address_family
= af
;
9415 return ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
9420 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
9422 if (ifp
== NULL
|| flowid
== NULL
) {
9424 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
9425 !IF_FULLY_ATTACHED(ifp
)) {
9429 *flowid
= ifp
->if_flowhash
;
9435 ifnet_disable_output(struct ifnet
*ifp
)
9441 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
9442 !IF_FULLY_ATTACHED(ifp
)) {
9446 if ((err
= ifnet_fc_add(ifp
)) == 0) {
9447 lck_mtx_lock_spin(&ifp
->if_start_lock
);
9448 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
9449 lck_mtx_unlock(&ifp
->if_start_lock
);
9455 ifnet_enable_output(struct ifnet
*ifp
)
9459 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
9460 !IF_FULLY_ATTACHED(ifp
)) {
9464 ifnet_start_common(ifp
, TRUE
);
9469 ifnet_flowadv(uint32_t flowhash
)
9471 struct ifnet_fc_entry
*ifce
;
9474 ifce
= ifnet_fc_get(flowhash
);
9479 VERIFY(ifce
->ifce_ifp
!= NULL
);
9480 ifp
= ifce
->ifce_ifp
;
9482 /* flow hash gets recalculated per attach, so check */
9483 if (ifnet_is_attached(ifp
, 1)) {
9484 if (ifp
->if_flowhash
== flowhash
) {
9485 (void) ifnet_enable_output(ifp
);
9487 ifnet_decr_iorefcnt(ifp
);
9489 ifnet_fc_entry_free(ifce
);
9493 * Function to compare ifnet_fc_entries in ifnet flow control tree
9496 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
9498 return fc1
->ifce_flowhash
- fc2
->ifce_flowhash
;
9502 ifnet_fc_add(struct ifnet
*ifp
)
9504 struct ifnet_fc_entry keyfc
, *ifce
;
9507 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
9508 VERIFY(ifp
->if_flowhash
!= 0);
9509 flowhash
= ifp
->if_flowhash
;
9511 bzero(&keyfc
, sizeof(keyfc
));
9512 keyfc
.ifce_flowhash
= flowhash
;
9514 lck_mtx_lock_spin(&ifnet_fc_lock
);
9515 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
9516 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
9517 /* Entry is already in ifnet_fc_tree, return */
9518 lck_mtx_unlock(&ifnet_fc_lock
);
9524 * There is a different fc entry with the same flow hash
9525 * but different ifp pointer. There can be a collision
9526 * on flow hash but the probability is low. Let's just
9527 * avoid adding a second one when there is a collision.
9529 lck_mtx_unlock(&ifnet_fc_lock
);
9533 /* become regular mutex */
9534 lck_mtx_convert_spin(&ifnet_fc_lock
);
9536 ifce
= zalloc(ifnet_fc_zone
);
9538 /* memory allocation failed */
9539 lck_mtx_unlock(&ifnet_fc_lock
);
9542 bzero(ifce
, ifnet_fc_zone_size
);
9544 ifce
->ifce_flowhash
= flowhash
;
9545 ifce
->ifce_ifp
= ifp
;
9547 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
9548 lck_mtx_unlock(&ifnet_fc_lock
);
9552 static struct ifnet_fc_entry
*
9553 ifnet_fc_get(uint32_t flowhash
)
9555 struct ifnet_fc_entry keyfc
, *ifce
;
9558 bzero(&keyfc
, sizeof(keyfc
));
9559 keyfc
.ifce_flowhash
= flowhash
;
9561 lck_mtx_lock_spin(&ifnet_fc_lock
);
9562 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
9564 /* Entry is not present in ifnet_fc_tree, return */
9565 lck_mtx_unlock(&ifnet_fc_lock
);
9569 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
9571 VERIFY(ifce
->ifce_ifp
!= NULL
);
9572 ifp
= ifce
->ifce_ifp
;
9574 /* become regular mutex */
9575 lck_mtx_convert_spin(&ifnet_fc_lock
);
9577 if (!ifnet_is_attached(ifp
, 0)) {
9579 * This ifp is not attached or in the process of being
9580 * detached; just don't process it.
9582 ifnet_fc_entry_free(ifce
);
9585 lck_mtx_unlock(&ifnet_fc_lock
);
9591 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
9593 zfree(ifnet_fc_zone
, ifce
);
9597 ifnet_calc_flowhash(struct ifnet
*ifp
)
9599 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
9600 uint32_t flowhash
= 0;
9602 if (ifnet_flowhash_seed
== 0) {
9603 ifnet_flowhash_seed
= RandomULong();
9606 bzero(&fh
, sizeof(fh
));
9608 (void) snprintf(fh
.ifk_name
, sizeof(fh
.ifk_name
), "%s", ifp
->if_name
);
9609 fh
.ifk_unit
= ifp
->if_unit
;
9610 fh
.ifk_flags
= ifp
->if_flags
;
9611 fh
.ifk_eflags
= ifp
->if_eflags
;
9612 fh
.ifk_capabilities
= ifp
->if_capabilities
;
9613 fh
.ifk_capenable
= ifp
->if_capenable
;
9614 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
9615 fh
.ifk_rand1
= RandomULong();
9616 fh
.ifk_rand2
= RandomULong();
9619 flowhash
= net_flowhash(&fh
, sizeof(fh
), ifnet_flowhash_seed
);
9620 if (flowhash
== 0) {
9621 /* try to get a non-zero flowhash */
9622 ifnet_flowhash_seed
= RandomULong();
9630 ifnet_set_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t len
,
9631 uint16_t flags
, uint8_t *data
)
9633 #pragma unused(flags)
9638 if_inetdata_lock_exclusive(ifp
);
9639 if (IN_IFEXTRA(ifp
) != NULL
) {
9641 /* Allow clearing the signature */
9642 IN_IFEXTRA(ifp
)->netsig_len
= 0;
9643 bzero(IN_IFEXTRA(ifp
)->netsig
,
9644 sizeof(IN_IFEXTRA(ifp
)->netsig
));
9645 if_inetdata_lock_done(ifp
);
9647 } else if (len
> sizeof(IN_IFEXTRA(ifp
)->netsig
)) {
9649 if_inetdata_lock_done(ifp
);
9652 IN_IFEXTRA(ifp
)->netsig_len
= len
;
9653 bcopy(data
, IN_IFEXTRA(ifp
)->netsig
, len
);
9657 if_inetdata_lock_done(ifp
);
9661 if_inet6data_lock_exclusive(ifp
);
9662 if (IN6_IFEXTRA(ifp
) != NULL
) {
9664 /* Allow clearing the signature */
9665 IN6_IFEXTRA(ifp
)->netsig_len
= 0;
9666 bzero(IN6_IFEXTRA(ifp
)->netsig
,
9667 sizeof(IN6_IFEXTRA(ifp
)->netsig
));
9668 if_inet6data_lock_done(ifp
);
9670 } else if (len
> sizeof(IN6_IFEXTRA(ifp
)->netsig
)) {
9672 if_inet6data_lock_done(ifp
);
9675 IN6_IFEXTRA(ifp
)->netsig_len
= len
;
9676 bcopy(data
, IN6_IFEXTRA(ifp
)->netsig
, len
);
9680 if_inet6data_lock_done(ifp
);
9692 ifnet_get_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t *len
,
9693 uint16_t *flags
, uint8_t *data
)
9697 if (ifp
== NULL
|| len
== NULL
|| data
== NULL
) {
9703 if_inetdata_lock_shared(ifp
);
9704 if (IN_IFEXTRA(ifp
) != NULL
) {
9705 if (*len
== 0 || *len
< IN_IFEXTRA(ifp
)->netsig_len
) {
9707 if_inetdata_lock_done(ifp
);
9710 if ((*len
= IN_IFEXTRA(ifp
)->netsig_len
) > 0) {
9711 bcopy(IN_IFEXTRA(ifp
)->netsig
, data
, *len
);
9718 if_inetdata_lock_done(ifp
);
9722 if_inet6data_lock_shared(ifp
);
9723 if (IN6_IFEXTRA(ifp
) != NULL
) {
9724 if (*len
== 0 || *len
< IN6_IFEXTRA(ifp
)->netsig_len
) {
9726 if_inet6data_lock_done(ifp
);
9729 if ((*len
= IN6_IFEXTRA(ifp
)->netsig_len
) > 0) {
9730 bcopy(IN6_IFEXTRA(ifp
)->netsig
, data
, *len
);
9737 if_inet6data_lock_done(ifp
);
9745 if (error
== 0 && flags
!= NULL
) {
9754 ifnet_set_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
9756 int i
, error
= 0, one_set
= 0;
9758 if_inet6data_lock_exclusive(ifp
);
9760 if (IN6_IFEXTRA(ifp
) == NULL
) {
9765 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
9766 uint32_t prefix_len
=
9767 prefixes
[i
].prefix_len
;
9768 struct in6_addr
*prefix
=
9769 &prefixes
[i
].ipv6_prefix
;
9771 if (prefix_len
== 0) {
9772 clat_log0((LOG_DEBUG
,
9773 "NAT64 prefixes purged from Interface %s\n",
9775 /* Allow clearing the signature */
9776 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= 0;
9777 bzero(&IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
9778 sizeof(struct in6_addr
));
9781 } else if (prefix_len
!= NAT64_PREFIX_LEN_32
&&
9782 prefix_len
!= NAT64_PREFIX_LEN_40
&&
9783 prefix_len
!= NAT64_PREFIX_LEN_48
&&
9784 prefix_len
!= NAT64_PREFIX_LEN_56
&&
9785 prefix_len
!= NAT64_PREFIX_LEN_64
&&
9786 prefix_len
!= NAT64_PREFIX_LEN_96
) {
9787 clat_log0((LOG_DEBUG
,
9788 "NAT64 prefixlen is incorrect %d\n", prefix_len
));
9793 if (IN6_IS_SCOPE_EMBED(prefix
)) {
9794 clat_log0((LOG_DEBUG
,
9795 "NAT64 prefix has interface/link local scope.\n"));
9800 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= prefix_len
;
9801 bcopy(prefix
, &IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
9802 sizeof(struct in6_addr
));
9803 clat_log0((LOG_DEBUG
,
9804 "NAT64 prefix set to %s with prefixlen: %d\n",
9805 ip6_sprintf(prefix
), prefix_len
));
9810 if_inet6data_lock_done(ifp
);
9812 if (error
== 0 && one_set
!= 0) {
9813 necp_update_all_clients();
9820 ifnet_get_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
9822 int i
, found_one
= 0, error
= 0;
9828 if_inet6data_lock_shared(ifp
);
9830 if (IN6_IFEXTRA(ifp
) == NULL
) {
9835 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
9836 if (IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
!= 0) {
9841 if (found_one
== 0) {
9847 bcopy(IN6_IFEXTRA(ifp
)->nat64_prefixes
, prefixes
,
9848 sizeof(IN6_IFEXTRA(ifp
)->nat64_prefixes
));
9852 if_inet6data_lock_done(ifp
);
9859 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
9860 protocol_family_t pf
)
9865 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
9866 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
| CSUM_TSO_IPV6
))) {
9872 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
9873 if (did_sw
& CSUM_DELAY_IP
) {
9874 hwcksum_dbg_finalized_hdr
++;
9876 if (did_sw
& CSUM_DELAY_DATA
) {
9877 hwcksum_dbg_finalized_data
++;
9883 * Checksum offload should not have been enabled when
9884 * extension headers exist; that also means that we
9885 * cannot force-finalize packets with extension headers.
9886 * Indicate to the callee should it skip such case by
9887 * setting optlen to -1.
9889 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
9890 m
->m_pkthdr
.csum_flags
);
9891 if (did_sw
& CSUM_DELAY_IPV6_DATA
) {
9892 hwcksum_dbg_finalized_data
++;
9902 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
9903 protocol_family_t pf
)
9908 if (frame_header
== NULL
||
9909 frame_header
< (char *)mbuf_datastart(m
) ||
9910 frame_header
> (char *)m
->m_data
) {
9911 DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
9912 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
9913 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
9914 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
9915 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
9916 (uint64_t)VM_KERNEL_ADDRPERM(m
));
9919 hlen
= (m
->m_data
- frame_header
);
9932 * Force partial checksum offload; useful to simulate cases
9933 * where the hardware does not support partial checksum offload,
9934 * in order to validate correctness throughout the layers above.
9936 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
9937 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
9939 if (foff
> (uint32_t)m
->m_pkthdr
.len
) {
9943 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
9945 /* Compute 16-bit 1's complement sum from forced offset */
9946 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
9948 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
9949 m
->m_pkthdr
.csum_rx_val
= sum
;
9950 m
->m_pkthdr
.csum_rx_start
= (foff
+ hlen
);
9952 hwcksum_dbg_partial_forced
++;
9953 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
9957 * Partial checksum offload verification (and adjustment);
9958 * useful to validate and test cases where the hardware
9959 * supports partial checksum offload.
9961 if ((m
->m_pkthdr
.csum_flags
&
9962 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
9963 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
9966 /* Start offset must begin after frame header */
9967 rxoff
= m
->m_pkthdr
.csum_rx_start
;
9969 hwcksum_dbg_bad_rxoff
++;
9971 DLIL_PRINTF("%s: partial cksum start offset %d "
9972 "is less than frame header length %d for "
9973 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
9974 (uint64_t)VM_KERNEL_ADDRPERM(m
));
9980 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
9982 * Compute the expected 16-bit 1's complement sum;
9983 * skip this if we've already computed it above
9984 * when partial checksum offload is forced.
9986 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
9988 /* Hardware or driver is buggy */
9989 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
9990 hwcksum_dbg_bad_cksum
++;
9992 DLIL_PRINTF("%s: bad partial cksum value "
9993 "0x%x (expected 0x%x) for mbuf "
9994 "0x%llx [rx_start %d]\n",
9996 m
->m_pkthdr
.csum_rx_val
, sum
,
9997 (uint64_t)VM_KERNEL_ADDRPERM(m
),
9998 m
->m_pkthdr
.csum_rx_start
);
10003 hwcksum_dbg_verified
++;
10006 * This code allows us to emulate various hardwares that
10007 * perform 16-bit 1's complement sum beginning at various
10008 * start offset values.
10010 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
10011 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
10013 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
) {
10017 sum
= m_adj_sum16(m
, rxoff
, aoff
,
10018 m_pktlen(m
) - aoff
, sum
);
10020 m
->m_pkthdr
.csum_rx_val
= sum
;
10021 m
->m_pkthdr
.csum_rx_start
= (aoff
+ hlen
);
10023 hwcksum_dbg_adjusted
++;
10029 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
10031 #pragma unused(arg1, arg2)
10035 i
= hwcksum_dbg_mode
;
10037 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
10038 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
10042 if (hwcksum_dbg
== 0) {
10046 if ((i
& ~HWCKSUM_DBG_MASK
) != 0) {
10050 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
10056 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
10058 #pragma unused(arg1, arg2)
10062 i
= hwcksum_dbg_partial_rxoff_forced
;
10064 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
10065 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
10069 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
10073 hwcksum_dbg_partial_rxoff_forced
= i
;
10079 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
10081 #pragma unused(arg1, arg2)
10085 i
= hwcksum_dbg_partial_rxoff_adj
;
10087 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
10088 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
10092 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
)) {
10096 hwcksum_dbg_partial_rxoff_adj
= i
;
10102 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
10104 #pragma unused(oidp, arg1, arg2)
10107 if (req
->oldptr
== USER_ADDR_NULL
) {
10109 if (req
->newptr
!= USER_ADDR_NULL
) {
10112 err
= SYSCTL_OUT(req
, &tx_chain_len_stats
,
10113 sizeof(struct chain_len_stats
));
10119 #if DEBUG || DEVELOPMENT
10120 /* Blob for sum16 verification */
10121 static uint8_t sumdata
[] = {
10122 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
10123 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
10124 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
10125 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
10126 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
10127 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
10128 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
10129 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
10130 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
10131 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
10132 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
10133 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
10134 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
10135 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
10136 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
10137 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
10138 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
10139 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
10140 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
10141 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
10142 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
10143 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
10144 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
10145 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
10146 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
10147 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
10148 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
10149 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
10150 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
10151 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
10152 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
10153 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
10154 0xc8, 0x28, 0x02, 0x00, 0x00
10157 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
10161 uint16_t sumr
; /* reference */
10162 uint16_t sumrp
; /* reference, precomputed */
10164 { FALSE
, 0, 0, 0x0000 },
10165 { FALSE
, 1, 0, 0x001f },
10166 { FALSE
, 2, 0, 0x8b1f },
10167 { FALSE
, 3, 0, 0x8b27 },
10168 { FALSE
, 7, 0, 0x790e },
10169 { FALSE
, 11, 0, 0xcb6d },
10170 { FALSE
, 20, 0, 0x20dd },
10171 { FALSE
, 27, 0, 0xbabd },
10172 { FALSE
, 32, 0, 0xf3e8 },
10173 { FALSE
, 37, 0, 0x197d },
10174 { FALSE
, 43, 0, 0x9eae },
10175 { FALSE
, 64, 0, 0x4678 },
10176 { FALSE
, 127, 0, 0x9399 },
10177 { FALSE
, 256, 0, 0xd147 },
10178 { FALSE
, 325, 0, 0x0358 },
10180 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
10183 dlil_verify_sum16(void)
10189 /* Make sure test data plus extra room for alignment fits in cluster */
10190 _CASSERT((sizeof(sumdata
) + (sizeof(uint64_t) * 2)) <= MCLBYTES
);
10192 kprintf("DLIL: running SUM16 self-tests ... ");
10194 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
10195 m_align(m
, sizeof(sumdata
) + (sizeof(uint64_t) * 2));
10197 buf
= mtod(m
, uint8_t *); /* base address */
10199 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
10200 uint16_t len
= sumtbl
[n
].len
;
10203 /* Verify for all possible alignments */
10204 for (i
= 0; i
< (int)sizeof(uint64_t); i
++) {
10205 uint16_t sum
, sumr
;
10208 /* Copy over test data to mbuf */
10209 VERIFY(len
<= sizeof(sumdata
));
10211 bcopy(sumdata
, c
, len
);
10213 /* Zero-offset test (align by data pointer) */
10214 m
->m_data
= (caddr_t
)c
;
10216 sum
= m_sum16(m
, 0, len
);
10218 if (!sumtbl
[n
].init
) {
10219 sumr
= in_cksum_mbuf_ref(m
, len
, 0, 0);
10220 sumtbl
[n
].sumr
= sumr
;
10221 sumtbl
[n
].init
= TRUE
;
10223 sumr
= sumtbl
[n
].sumr
;
10226 /* Something is horribly broken; stop now */
10227 if (sumr
!= sumtbl
[n
].sumrp
) {
10228 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
10229 "for len=%d align=%d sum=0x%04x "
10230 "[expected=0x%04x]\n", __func__
,
10231 len
, i
, sum
, sumr
);
10233 } else if (sum
!= sumr
) {
10234 panic_plain("\n%s: broken m_sum16() for len=%d "
10235 "align=%d sum=0x%04x [expected=0x%04x]\n",
10236 __func__
, len
, i
, sum
, sumr
);
10240 /* Alignment test by offset (fixed data pointer) */
10241 m
->m_data
= (caddr_t
)buf
;
10242 m
->m_len
= i
+ len
;
10243 sum
= m_sum16(m
, i
, len
);
10245 /* Something is horribly broken; stop now */
10247 panic_plain("\n%s: broken m_sum16() for len=%d "
10248 "offset=%d sum=0x%04x [expected=0x%04x]\n",
10249 __func__
, len
, i
, sum
, sumr
);
10253 /* Simple sum16 contiguous buffer test by aligment */
10254 sum
= b_sum16(c
, len
);
10256 /* Something is horribly broken; stop now */
10258 panic_plain("\n%s: broken b_sum16() for len=%d "
10259 "align=%d sum=0x%04x [expected=0x%04x]\n",
10260 __func__
, len
, i
, sum
, sumr
);
10268 kprintf("PASSED\n");
10270 #endif /* DEBUG || DEVELOPMENT */
10272 #define CASE_STRINGIFY(x) case x: return #x
10274 __private_extern__
const char *
10275 dlil_kev_dl_code_str(u_int32_t event_code
)
10277 switch (event_code
) {
10278 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
10279 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
10280 CASE_STRINGIFY(KEV_DL_SIFMTU
);
10281 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
10282 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
10283 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
10284 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
10285 CASE_STRINGIFY(KEV_DL_DELMULTI
);
10286 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
10287 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
10288 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
10289 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
10290 CASE_STRINGIFY(KEV_DL_LINK_ON
);
10291 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
10292 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
10293 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
10294 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
10295 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
10296 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
10297 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
10298 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
10299 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
10300 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
10301 CASE_STRINGIFY(KEV_DL_ISSUES
);
10302 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);
10310 dlil_dt_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
10312 #pragma unused(arg1)
10313 struct ifnet
*ifp
= arg0
;
10315 if (ifnet_is_attached(ifp
, 1)) {
10316 nstat_ifnet_threshold_reached(ifp
->if_index
);
10317 ifnet_decr_iorefcnt(ifp
);
10322 ifnet_notify_data_threshold(struct ifnet
*ifp
)
10324 uint64_t bytes
= (ifp
->if_ibytes
+ ifp
->if_obytes
);
10325 uint64_t oldbytes
= ifp
->if_dt_bytes
;
10327 ASSERT(ifp
->if_dt_tcall
!= NULL
);
10330 * If we went over the threshold, notify NetworkStatistics.
10331 * We rate-limit it based on the threshold interval value.
10333 if (threshold_notify
&& (bytes
- oldbytes
) > ifp
->if_data_threshold
&&
10334 OSCompareAndSwap64(oldbytes
, bytes
, &ifp
->if_dt_bytes
) &&
10335 !thread_call_isactive(ifp
->if_dt_tcall
)) {
10336 uint64_t tival
= (threshold_interval
* NSEC_PER_SEC
);
10337 uint64_t now
= mach_absolute_time(), deadline
= now
;
10341 nanoseconds_to_absolutetime(tival
, &ival
);
10342 clock_deadline_for_periodic_event(ival
, now
, &deadline
);
10343 (void) thread_call_enter_delayed(ifp
->if_dt_tcall
,
10346 (void) thread_call_enter(ifp
->if_dt_tcall
);
10351 #if (DEVELOPMENT || DEBUG)
10353 * The sysctl variable name contains the input parameters of
10354 * ifnet_get_keepalive_offload_frames()
10355 * ifp (interface index): name[0]
10356 * frames_array_count: name[1]
10357 * frame_data_offset: name[2]
10358 * The return length gives used_frames_count
10361 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
10363 #pragma unused(oidp)
10364 int *name
= (int *)arg1
;
10365 u_int namelen
= arg2
;
10367 ifnet_t ifp
= NULL
;
10368 u_int32_t frames_array_count
;
10369 size_t frame_data_offset
;
10370 u_int32_t used_frames_count
;
10371 struct ifnet_keepalive_offload_frame
*frames_array
= NULL
;
10376 * Only root can get look at other people TCP frames
10378 error
= proc_suser(current_proc());
10383 * Validate the input parameters
10385 if (req
->newptr
!= USER_ADDR_NULL
) {
10389 if (namelen
!= 3) {
10393 if (req
->oldptr
== USER_ADDR_NULL
) {
10397 if (req
->oldlen
== 0) {
10402 frames_array_count
= name
[1];
10403 frame_data_offset
= name
[2];
10405 /* Make sure the passed buffer is large enough */
10406 if (frames_array_count
* sizeof(struct ifnet_keepalive_offload_frame
) >
10412 ifnet_head_lock_shared();
10413 if (!IF_INDEX_IN_RANGE(idx
)) {
10418 ifp
= ifindex2ifnet
[idx
];
10421 frames_array
= _MALLOC(frames_array_count
*
10422 sizeof(struct ifnet_keepalive_offload_frame
), M_TEMP
, M_WAITOK
);
10423 if (frames_array
== NULL
) {
10428 error
= ifnet_get_keepalive_offload_frames(ifp
, frames_array
,
10429 frames_array_count
, frame_data_offset
, &used_frames_count
);
10431 DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n",
10436 for (i
= 0; i
< used_frames_count
; i
++) {
10437 error
= SYSCTL_OUT(req
, frames_array
+ i
,
10438 sizeof(struct ifnet_keepalive_offload_frame
));
10444 if (frames_array
!= NULL
) {
10445 _FREE(frames_array
, M_TEMP
);
10449 #endif /* DEVELOPMENT || DEBUG */
10452 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow
*ifs
,
10455 tcp_update_stats_per_flow(ifs
, ifp
);