2 * Copyright (c) 1999-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_llreach.h>
71 #include <net/kpi_interfacefilter.h>
72 #include <net/classq/classq.h>
73 #include <net/classq/classq_sfb.h>
74 #include <net/flowhash.h>
75 #include <net/ntstat.h>
76 #include <net/if_llatbl.h>
77 #include <net/net_api_stats.h>
80 #include <netinet/in_var.h>
81 #include <netinet/igmp_var.h>
82 #include <netinet/ip_var.h>
83 #include <netinet/tcp.h>
84 #include <netinet/tcp_var.h>
85 #include <netinet/udp.h>
86 #include <netinet/udp_var.h>
87 #include <netinet/if_ether.h>
88 #include <netinet/in_pcb.h>
89 #include <netinet/in_tclass.h>
93 #include <netinet6/in6_var.h>
94 #include <netinet6/nd6.h>
95 #include <netinet6/mld6_var.h>
96 #include <netinet6/scope6_var.h>
99 #include <libkern/OSAtomic.h>
100 #include <libkern/tree.h>
102 #include <dev/random/randomdev.h>
103 #include <machine/machine_routines.h>
105 #include <mach/thread_act.h>
106 #include <mach/sdt.h>
109 #include <sys/kauth.h>
110 #include <security/mac_framework.h>
111 #include <net/ethernet.h>
112 #include <net/firewire.h>
116 #include <net/pfvar.h>
118 #include <net/pktsched/pktsched.h>
121 #include <net/necp.h>
125 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
126 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
127 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
128 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
129 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
131 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
132 #define MAX_LINKADDR 4 /* LONGWORDS */
133 #define M_NKE M_IFADDR
136 #define DLIL_PRINTF printf
138 #define DLIL_PRINTF kprintf
141 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
142 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
144 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
145 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
153 * List of if_proto structures in if_proto_hash[] is protected by
154 * the ifnet lock. The rest of the fields are initialized at protocol
155 * attach time and never change, thus no lock required as long as
156 * a reference to it is valid, via if_proto_ref().
159 SLIST_ENTRY(if_proto
) next_hash
;
163 protocol_family_t protocol_family
;
167 proto_media_input input
;
168 proto_media_preout pre_output
;
169 proto_media_event event
;
170 proto_media_ioctl ioctl
;
171 proto_media_detached detached
;
172 proto_media_resolve_multi resolve_multi
;
173 proto_media_send_arp send_arp
;
176 proto_media_input_v2 input
;
177 proto_media_preout pre_output
;
178 proto_media_event event
;
179 proto_media_ioctl ioctl
;
180 proto_media_detached detached
;
181 proto_media_resolve_multi resolve_multi
;
182 proto_media_send_arp send_arp
;
187 SLIST_HEAD(proto_hash_entry
, if_proto
);
189 #define DLIL_SDLDATALEN \
190 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
193 struct ifnet dl_if
; /* public ifnet */
195 * DLIL private fields, protected by dl_if_lock
197 decl_lck_mtx_data(, dl_if_lock
);
198 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
199 u_int32_t dl_if_flags
; /* flags (below) */
200 u_int32_t dl_if_refcnt
; /* refcnt */
201 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
202 void *dl_if_uniqueid
; /* unique interface id */
203 size_t dl_if_uniqueid_len
; /* length of the unique id */
204 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
205 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
207 struct ifaddr ifa
; /* lladdr ifa */
208 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
209 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
211 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
212 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
213 ctrace_t dl_if_attach
; /* attach PC stacktrace */
214 ctrace_t dl_if_detach
; /* detach PC stacktrace */
217 /* Values for dl_if_flags (private to DLIL) */
218 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
219 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
220 #define DLIF_DEBUG 0x4 /* has debugging info */
222 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
225 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
227 struct dlil_ifnet_dbg
{
228 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
229 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
230 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
232 * Circular lists of ifnet_{reference,release} callers.
234 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
235 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
238 #define DLIL_TO_IFP(s) (&s->dl_if)
239 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
241 struct ifnet_filter
{
242 TAILQ_ENTRY(ifnet_filter
) filt_next
;
244 u_int32_t filt_flags
;
246 const char *filt_name
;
248 protocol_family_t filt_protocol
;
249 iff_input_func filt_input
;
250 iff_output_func filt_output
;
251 iff_event_func filt_event
;
252 iff_ioctl_func filt_ioctl
;
253 iff_detached_func filt_detached
;
256 struct proto_input_entry
;
258 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
259 static lck_grp_t
*dlil_lock_group
;
260 lck_grp_t
*ifnet_lock_group
;
261 static lck_grp_t
*ifnet_head_lock_group
;
262 static lck_grp_t
*ifnet_snd_lock_group
;
263 static lck_grp_t
*ifnet_rcv_lock_group
;
264 lck_attr_t
*ifnet_lock_attr
;
265 decl_lck_rw_data(static, ifnet_head_lock
);
266 decl_lck_mtx_data(static, dlil_ifnet_lock
);
267 u_int32_t dlil_filter_disable_tso_count
= 0;
270 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
272 static unsigned int ifnet_debug
; /* debugging (disabled) */
274 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
275 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
276 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
278 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
279 #define DLIF_ZONE_NAME "ifnet" /* zone name */
281 static unsigned int dlif_filt_size
; /* size of ifnet_filter */
282 static struct zone
*dlif_filt_zone
; /* zone for ifnet_filter */
284 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
285 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
287 static unsigned int dlif_phash_size
; /* size of ifnet proto hash table */
288 static struct zone
*dlif_phash_zone
; /* zone for ifnet proto hash table */
290 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
291 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
293 static unsigned int dlif_proto_size
; /* size of if_proto */
294 static struct zone
*dlif_proto_zone
; /* zone for if_proto */
296 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
297 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
299 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
300 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
301 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
303 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
304 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
306 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
307 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
308 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
310 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
311 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
313 static u_int32_t net_rtref
;
315 static struct dlil_main_threading_info dlil_main_input_thread_info
;
316 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
317 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
319 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
, bool update_generation
);
320 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
321 static void dlil_if_trace(struct dlil_ifnet
*, int);
322 static void if_proto_ref(struct if_proto
*);
323 static void if_proto_free(struct if_proto
*);
324 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
325 static int dlil_ifp_proto_count(struct ifnet
*);
326 static void if_flt_monitor_busy(struct ifnet
*);
327 static void if_flt_monitor_unbusy(struct ifnet
*);
328 static void if_flt_monitor_enter(struct ifnet
*);
329 static void if_flt_monitor_leave(struct ifnet
*);
330 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
331 char **, protocol_family_t
);
332 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
334 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
335 const struct sockaddr_dl
*);
336 static int ifnet_lookup(struct ifnet
*);
337 static void if_purgeaddrs(struct ifnet
*);
339 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
340 struct mbuf
*, char *);
341 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
343 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
344 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
345 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
346 const struct kev_msg
*);
347 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
348 unsigned long, void *);
349 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
350 struct sockaddr_dl
*, size_t);
351 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
352 const struct sockaddr_dl
*, const struct sockaddr
*,
353 const struct sockaddr_dl
*, const struct sockaddr
*);
355 static errno_t
ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
356 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
357 boolean_t poll
, struct thread
*tp
);
358 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
359 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
360 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
361 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
362 protocol_family_t
*);
363 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
364 const struct ifnet_demux_desc
*, u_int32_t
);
365 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
366 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
368 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
369 const struct sockaddr
*, const char *, const char *,
370 u_int32_t
*, u_int32_t
*);
372 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
373 const struct sockaddr
*, const char *, const char *);
374 #endif /* CONFIG_EMBEDDED */
375 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
376 const struct sockaddr
*, const char *, const char *,
377 u_int32_t
*, u_int32_t
*);
378 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
379 static void ifp_if_free(struct ifnet
*);
380 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
381 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
382 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
384 static void dlil_main_input_thread_func(void *, wait_result_t
);
385 static void dlil_input_thread_func(void *, wait_result_t
);
386 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
387 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*);
388 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
389 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
390 struct dlil_threading_info
*, boolean_t
);
391 static void dlil_input_stats_sync(struct ifnet
*, struct dlil_threading_info
*);
392 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
393 u_int32_t
, ifnet_model_t
, boolean_t
);
394 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
395 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
397 #if DEBUG || DEVELOPMENT
398 static void dlil_verify_sum16(void);
399 #endif /* DEBUG || DEVELOPMENT */
400 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
402 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
405 static void ifnet_detacher_thread_func(void *, wait_result_t
);
406 static int ifnet_detacher_thread_cont(int);
407 static void ifnet_detach_final(struct ifnet
*);
408 static void ifnet_detaching_enqueue(struct ifnet
*);
409 static struct ifnet
*ifnet_detaching_dequeue(void);
411 static void ifnet_start_thread_fn(void *, wait_result_t
);
412 static void ifnet_poll_thread_fn(void *, wait_result_t
);
413 static void ifnet_poll(struct ifnet
*);
414 static errno_t
ifnet_enqueue_common(struct ifnet
*, void *,
415 classq_pkt_type_t
, boolean_t
, boolean_t
*);
417 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
418 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
420 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
421 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
424 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
425 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
426 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
427 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
428 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
429 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
430 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
431 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
432 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
433 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
434 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
435 static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS
;
437 struct chain_len_stats tx_chain_len_stats
;
438 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
;
440 #if TEST_INPUT_THREAD_TERMINATION
441 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
;
442 #endif /* TEST_INPUT_THREAD_TERMINATION */
444 /* The following are protected by dlil_ifnet_lock */
445 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
446 static u_int32_t ifnet_detaching_cnt
;
447 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
449 decl_lck_mtx_data(static, ifnet_fc_lock
);
451 static uint32_t ifnet_flowhash_seed
;
453 struct ifnet_flowhash_key
{
454 char ifk_name
[IFNAMSIZ
];
458 uint32_t ifk_capabilities
;
459 uint32_t ifk_capenable
;
460 uint32_t ifk_output_sched_model
;
465 /* Flow control entry per interface */
466 struct ifnet_fc_entry
{
467 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
468 u_int32_t ifce_flowhash
;
469 struct ifnet
*ifce_ifp
;
472 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
473 static int ifce_cmp(const struct ifnet_fc_entry
*,
474 const struct ifnet_fc_entry
*);
475 static int ifnet_fc_add(struct ifnet
*);
476 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
477 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
479 /* protected by ifnet_fc_lock */
480 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
481 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
482 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
484 static unsigned int ifnet_fc_zone_size
; /* sizeof ifnet_fc_entry */
485 static struct zone
*ifnet_fc_zone
; /* ifnet_fc_entry zone */
487 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
488 #define IFNET_FC_ZONE_MAX 32
490 extern void bpfdetach(struct ifnet
*);
491 extern void proto_input_run(void);
493 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
495 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
498 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
501 #ifdef CONFIG_EMBEDDED
502 int dlil_lladdr_ckreq
= 1;
504 int dlil_lladdr_ckreq
= 0;
509 int dlil_verbose
= 1;
511 int dlil_verbose
= 0;
513 #if IFNET_INPUT_SANITY_CHK
514 /* sanity checking of input packet lists received */
515 static u_int32_t dlil_input_sanity_check
= 0;
516 #endif /* IFNET_INPUT_SANITY_CHK */
517 /* rate limit debug messages */
518 struct timespec dlil_dbgrate
= { 1, 0 };
520 SYSCTL_DECL(_net_link_generic_system
);
523 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_lladdr_ckreq
,
524 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_lladdr_ckreq
, 0,
525 "Require MACF system info check to expose link-layer address");
528 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
529 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
531 #define IF_SNDQ_MINLEN 32
532 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
533 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
534 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
535 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
537 #define IF_RCVQ_MINLEN 32
538 #define IF_RCVQ_MAXLEN 256
539 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
540 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
541 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
542 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
544 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
545 static u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
546 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
547 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
548 "ilog2 of EWMA decay rate of avg inbound packets");
550 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
551 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
552 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
553 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
554 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
555 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
556 "Q", "input poll mode freeze time");
558 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
559 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
560 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
561 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
562 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
563 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
564 "Q", "input poll sampling time");
566 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
567 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
568 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
569 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
570 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
571 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
572 "Q", "input poll interval (time)");
574 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
575 static u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
576 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
577 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
578 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
580 #define IF_RXPOLL_WLOWAT 10
581 static u_int32_t if_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
582 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
583 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_wlowat
,
584 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
585 "I", "input poll wakeup low watermark");
587 #define IF_RXPOLL_WHIWAT 100
588 static u_int32_t if_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
589 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
590 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_whiwat
,
591 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
592 "I", "input poll wakeup high watermark");
594 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
595 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
596 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
597 "max packets per poll call");
599 static u_int32_t if_rxpoll
= 1;
600 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
601 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
602 sysctl_rxpoll
, "I", "enable opportunistic input polling");
604 #if TEST_INPUT_THREAD_TERMINATION
605 static u_int32_t if_input_thread_termination_spin
= 0;
606 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, input_thread_termination_spin
,
607 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
608 &if_input_thread_termination_spin
, 0,
609 sysctl_input_thread_termination_spin
,
610 "I", "input thread termination spin limit");
611 #endif /* TEST_INPUT_THREAD_TERMINATION */
613 static u_int32_t cur_dlil_input_threads
= 0;
614 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
615 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
616 "Current number of DLIL input threads");
618 #if IFNET_INPUT_SANITY_CHK
619 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
620 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
621 "Turn on sanity checking in DLIL input");
622 #endif /* IFNET_INPUT_SANITY_CHK */
624 static u_int32_t if_flowadv
= 1;
625 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
626 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
627 "enable flow-advisory mechanism");
629 static u_int32_t if_delaybased_queue
= 1;
630 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
631 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
632 "enable delay based dynamic queue sizing");
634 static uint64_t hwcksum_in_invalidated
= 0;
635 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
636 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
637 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
639 uint32_t hwcksum_dbg
= 0;
640 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
641 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
642 "enable hardware cksum debugging");
644 u_int32_t ifnet_start_delayed
= 0;
645 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delayed
,
646 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_start_delayed
, 0,
647 "number of times start was delayed");
649 u_int32_t ifnet_delay_start_disabled
= 0;
650 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delay_disabled
,
651 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_delay_start_disabled
, 0,
652 "number of times start was delayed");
654 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
655 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
656 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
657 #define HWCKSUM_DBG_MASK \
658 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
659 HWCKSUM_DBG_FINALIZE_FORCED)
661 static uint32_t hwcksum_dbg_mode
= 0;
662 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
663 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
664 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
666 static uint64_t hwcksum_dbg_partial_forced
= 0;
667 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
668 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
669 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
671 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
672 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
673 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
674 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
676 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
677 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
678 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
679 &hwcksum_dbg_partial_rxoff_forced
, 0,
680 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
681 "forced partial cksum rx offset");
683 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
684 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
685 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
686 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
687 "adjusted partial cksum rx offset");
689 static uint64_t hwcksum_dbg_verified
= 0;
690 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
691 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
692 &hwcksum_dbg_verified
, "packets verified for having good checksum");
694 static uint64_t hwcksum_dbg_bad_cksum
= 0;
695 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
696 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
697 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
699 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
700 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
701 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
702 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
704 static uint64_t hwcksum_dbg_adjusted
= 0;
705 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
706 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
707 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
709 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
710 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
711 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
712 &hwcksum_dbg_finalized_hdr
, "finalized headers");
714 static uint64_t hwcksum_dbg_finalized_data
= 0;
715 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
716 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
717 &hwcksum_dbg_finalized_data
, "finalized payloads");
719 uint32_t hwcksum_tx
= 1;
720 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
721 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
722 "enable transmit hardware checksum offload");
724 uint32_t hwcksum_rx
= 1;
725 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
726 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
727 "enable receive hardware checksum offload");
729 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, tx_chain_len_stats
,
730 CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 9,
731 sysctl_tx_chain_len_stats
, "S", "");
733 uint32_t tx_chain_len_count
= 0;
734 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, tx_chain_len_count
,
735 CTLFLAG_RW
| CTLFLAG_LOCKED
, &tx_chain_len_count
, 0, "");
737 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_ports_used
,
738 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_ports_used
, "");
740 static uint32_t threshold_notify
= 1; /* enable/disable */
741 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_notify
,
742 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_notify
, 0, "");
744 static uint32_t threshold_interval
= 2; /* in seconds */
745 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_interval
,
746 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_interval
, 0, "");
748 #if (DEVELOPMENT || DEBUG)
749 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
;
750 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_kao_frames
,
751 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_kao_frames
, "");
752 #endif /* DEVELOPMENT || DEBUG */
754 struct net_api_stats net_api_stats
;
755 SYSCTL_STRUCT(_net
, OID_AUTO
, api_stats
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
756 &net_api_stats
, net_api_stats
, "");
759 unsigned int net_rxpoll
= 1;
760 unsigned int net_affinity
= 1;
761 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
763 extern u_int32_t inject_buckets
;
765 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
766 static lck_attr_t
*dlil_lck_attributes
= NULL
;
768 /* DLIL data threshold thread call */
769 static void dlil_dt_tcall_fn(thread_call_param_t
, thread_call_param_t
);
771 static void dlil_mit_tcall_fn(thread_call_param_t
, thread_call_param_t
);
773 uint32_t dlil_rcv_mit_pkts_min
= 5;
774 uint32_t dlil_rcv_mit_pkts_max
= 64;
775 uint32_t dlil_rcv_mit_interval
= (500 * 1000);
777 #if (DEVELOPMENT || DEBUG)
778 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_pkts_min
,
779 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_pkts_min
, 0, "");
780 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_pkts_max
,
781 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_pkts_max
, 0, "");
782 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_interval
,
783 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_interval
, 0, "");
784 #endif /* DEVELOPMENT || DEBUG */
787 #define DLIL_INPUT_CHECK(m, ifp) { \
788 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
789 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
790 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
791 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
796 #define DLIL_EWMA(old, new, decay) do { \
798 if ((_avg = (old)) > 0) \
799 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
805 #define MBPS (1ULL * 1000 * 1000)
806 #define GBPS (MBPS * 1000)
808 struct rxpoll_time_tbl
{
809 u_int64_t speed
; /* downlink speed */
810 u_int32_t plowat
; /* packets low watermark */
811 u_int32_t phiwat
; /* packets high watermark */
812 u_int32_t blowat
; /* bytes low watermark */
813 u_int32_t bhiwat
; /* bytes high watermark */
816 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
817 { 10 * MBPS
, 2, 8, (1 * 1024), (6 * 1024) },
818 { 100 * MBPS
, 10, 40, (4 * 1024), (64 * 1024) },
819 { 1 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
820 { 10 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
821 { 100 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
826 proto_hash_value(u_int32_t protocol_family
)
829 * dlil_proto_unplumb_all() depends on the mapping between
830 * the hash bucket index and the protocol family defined
831 * here; future changes must be applied there as well.
833 switch (protocol_family
) {
847 * Caller must already be holding ifnet lock.
849 static struct if_proto
*
850 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
852 struct if_proto
*proto
= NULL
;
853 u_int32_t i
= proto_hash_value(protocol_family
);
855 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
857 if (ifp
->if_proto_hash
!= NULL
)
858 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
860 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
)
861 proto
= SLIST_NEXT(proto
, next_hash
);
870 if_proto_ref(struct if_proto
*proto
)
872 atomic_add_32(&proto
->refcount
, 1);
875 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
878 if_proto_free(struct if_proto
*proto
)
881 struct ifnet
*ifp
= proto
->ifp
;
882 u_int32_t proto_family
= proto
->protocol_family
;
883 struct kev_dl_proto_data ev_pr_data
;
885 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
889 /* No more reference on this, protocol must have been detached */
890 VERIFY(proto
->detached
);
892 if (proto
->proto_kpi
== kProtoKPI_v1
) {
893 if (proto
->kpi
.v1
.detached
)
894 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
896 if (proto
->proto_kpi
== kProtoKPI_v2
) {
897 if (proto
->kpi
.v2
.detached
)
898 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
902 * Cleanup routes that may still be in the routing table for that
903 * interface/protocol pair.
905 if_rtproto_del(ifp
, proto_family
);
908 * The reserved field carries the number of protocol still attached
909 * (subject to change)
911 ifnet_lock_shared(ifp
);
912 ev_pr_data
.proto_family
= proto_family
;
913 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
914 ifnet_lock_done(ifp
);
916 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
917 (struct net_event_data
*)&ev_pr_data
,
918 sizeof (struct kev_dl_proto_data
));
920 zfree(dlif_proto_zone
, proto
);
923 __private_extern__
void
924 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
929 unsigned int type
= 0;
933 case IFNET_LCK_ASSERT_EXCLUSIVE
:
934 type
= LCK_RW_ASSERT_EXCLUSIVE
;
937 case IFNET_LCK_ASSERT_SHARED
:
938 type
= LCK_RW_ASSERT_SHARED
;
941 case IFNET_LCK_ASSERT_OWNED
:
942 type
= LCK_RW_ASSERT_HELD
;
945 case IFNET_LCK_ASSERT_NOTOWNED
:
946 /* nothing to do here for RW lock; bypass assert */
951 panic("bad ifnet assert type: %d", what
);
955 LCK_RW_ASSERT(&ifp
->if_lock
, type
);
958 __private_extern__
void
959 ifnet_lock_shared(struct ifnet
*ifp
)
961 lck_rw_lock_shared(&ifp
->if_lock
);
964 __private_extern__
void
965 ifnet_lock_exclusive(struct ifnet
*ifp
)
967 lck_rw_lock_exclusive(&ifp
->if_lock
);
970 __private_extern__
void
971 ifnet_lock_done(struct ifnet
*ifp
)
973 lck_rw_done(&ifp
->if_lock
);
977 __private_extern__
void
978 if_inetdata_lock_shared(struct ifnet
*ifp
)
980 lck_rw_lock_shared(&ifp
->if_inetdata_lock
);
983 __private_extern__
void
984 if_inetdata_lock_exclusive(struct ifnet
*ifp
)
986 lck_rw_lock_exclusive(&ifp
->if_inetdata_lock
);
989 __private_extern__
void
990 if_inetdata_lock_done(struct ifnet
*ifp
)
992 lck_rw_done(&ifp
->if_inetdata_lock
);
997 __private_extern__
void
998 if_inet6data_lock_shared(struct ifnet
*ifp
)
1000 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
1003 __private_extern__
void
1004 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
1006 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
1009 __private_extern__
void
1010 if_inet6data_lock_done(struct ifnet
*ifp
)
1012 lck_rw_done(&ifp
->if_inet6data_lock
);
1016 __private_extern__
void
1017 ifnet_head_lock_shared(void)
1019 lck_rw_lock_shared(&ifnet_head_lock
);
1022 __private_extern__
void
1023 ifnet_head_lock_exclusive(void)
1025 lck_rw_lock_exclusive(&ifnet_head_lock
);
1028 __private_extern__
void
1029 ifnet_head_done(void)
1031 lck_rw_done(&ifnet_head_lock
);
1034 __private_extern__
void
1035 ifnet_head_assert_exclusive(void)
1037 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
1041 * Caller must already be holding ifnet lock.
1044 dlil_ifp_proto_count(struct ifnet
*ifp
)
1048 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
1050 if (ifp
->if_proto_hash
== NULL
)
1053 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1054 struct if_proto
*proto
;
1055 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1063 __private_extern__
void
1064 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
1065 u_int32_t event_code
, struct net_event_data
*event_data
,
1066 u_int32_t event_data_len
)
1068 struct net_event_data ev_data
;
1069 struct kev_msg ev_msg
;
1071 bzero(&ev_msg
, sizeof (ev_msg
));
1072 bzero(&ev_data
, sizeof (ev_data
));
1074 * a net event always starts with a net_event_data structure
1075 * but the caller can generate a simple net event or
1076 * provide a longer event structure to post
1078 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1079 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1080 ev_msg
.kev_subclass
= event_subclass
;
1081 ev_msg
.event_code
= event_code
;
1083 if (event_data
== NULL
) {
1084 event_data
= &ev_data
;
1085 event_data_len
= sizeof (struct net_event_data
);
1088 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
1089 event_data
->if_family
= ifp
->if_family
;
1090 event_data
->if_unit
= (u_int32_t
)ifp
->if_unit
;
1092 ev_msg
.dv
[0].data_length
= event_data_len
;
1093 ev_msg
.dv
[0].data_ptr
= event_data
;
1094 ev_msg
.dv
[1].data_length
= 0;
1096 /* Don't update interface generation for quality and RRC state changess */
1097 bool update_generation
= (event_subclass
!= KEV_DL_SUBCLASS
||
1098 (event_code
!= KEV_DL_LINK_QUALITY_METRIC_CHANGED
&&
1099 event_code
!= KEV_DL_RRC_STATE_CHANGED
));
1101 dlil_event_internal(ifp
, &ev_msg
, update_generation
);
1104 __private_extern__
int
1105 dlil_alloc_local_stats(struct ifnet
*ifp
)
1108 void *buf
, *base
, **pbuf
;
1113 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1114 /* allocate tcpstat_local structure */
1115 buf
= zalloc(dlif_tcpstat_zone
);
1120 bzero(buf
, dlif_tcpstat_bufsize
);
1122 /* Get the 64-bit aligned base address for this object */
1123 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1124 sizeof (u_int64_t
));
1125 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1126 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1129 * Wind back a pointer size from the aligned base and
1130 * save the original address so we can free it later.
1132 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1134 ifp
->if_tcp_stat
= base
;
1136 /* allocate udpstat_local structure */
1137 buf
= zalloc(dlif_udpstat_zone
);
1142 bzero(buf
, dlif_udpstat_bufsize
);
1144 /* Get the 64-bit aligned base address for this object */
1145 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1146 sizeof (u_int64_t
));
1147 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1148 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1151 * Wind back a pointer size from the aligned base and
1152 * save the original address so we can free it later.
1154 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1156 ifp
->if_udp_stat
= base
;
1158 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof (u_int64_t
)) &&
1159 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof (u_int64_t
)));
1164 if (ifp
->if_ipv4_stat
== NULL
) {
1165 MALLOC(ifp
->if_ipv4_stat
, struct if_tcp_ecn_stat
*,
1166 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1167 if (ifp
->if_ipv4_stat
== NULL
) {
1173 if (ifp
->if_ipv6_stat
== NULL
) {
1174 MALLOC(ifp
->if_ipv6_stat
, struct if_tcp_ecn_stat
*,
1175 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1176 if (ifp
->if_ipv6_stat
== NULL
) {
1183 if (ifp
->if_tcp_stat
!= NULL
) {
1185 ((intptr_t)ifp
->if_tcp_stat
- sizeof (void *));
1186 zfree(dlif_tcpstat_zone
, *pbuf
);
1187 ifp
->if_tcp_stat
= NULL
;
1189 if (ifp
->if_udp_stat
!= NULL
) {
1191 ((intptr_t)ifp
->if_udp_stat
- sizeof (void *));
1192 zfree(dlif_udpstat_zone
, *pbuf
);
1193 ifp
->if_udp_stat
= NULL
;
1195 if (ifp
->if_ipv4_stat
!= NULL
) {
1196 FREE(ifp
->if_ipv4_stat
, M_TEMP
);
1197 ifp
->if_ipv4_stat
= NULL
;
1199 if (ifp
->if_ipv6_stat
!= NULL
) {
1200 FREE(ifp
->if_ipv6_stat
, M_TEMP
);
1201 ifp
->if_ipv6_stat
= NULL
;
1209 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
)
1211 thread_continue_t func
;
1215 /* NULL ifp indicates the main input thread, called at dlil_init time */
1217 func
= dlil_main_input_thread_func
;
1218 VERIFY(inp
== dlil_main_input_thread
);
1219 (void) strlcat(inp
->input_name
,
1220 "main_input", DLIL_THREADNAME_LEN
);
1221 } else if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1222 func
= dlil_rxpoll_input_thread_func
;
1223 VERIFY(inp
!= dlil_main_input_thread
);
1224 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1225 "%s_input_poll", if_name(ifp
));
1227 func
= dlil_input_thread_func
;
1228 VERIFY(inp
!= dlil_main_input_thread
);
1229 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1230 "%s_input", if_name(ifp
));
1232 VERIFY(inp
->input_thr
== THREAD_NULL
);
1234 inp
->lck_grp
= lck_grp_alloc_init(inp
->input_name
, dlil_grp_attributes
);
1235 lck_mtx_init(&inp
->input_lck
, inp
->lck_grp
, dlil_lck_attributes
);
1237 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1238 inp
->ifp
= ifp
; /* NULL for main input thread */
1240 net_timerclear(&inp
->mode_holdtime
);
1241 net_timerclear(&inp
->mode_lasttime
);
1242 net_timerclear(&inp
->sample_holdtime
);
1243 net_timerclear(&inp
->sample_lasttime
);
1244 net_timerclear(&inp
->dbg_lasttime
);
1247 * For interfaces that support opportunistic polling, set the
1248 * low and high watermarks for outstanding inbound packets/bytes.
1249 * Also define freeze times for transitioning between modes
1250 * and updating the average.
1252 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1253 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1254 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1256 limit
= (u_int32_t
)-1;
1259 _qinit(&inp
->rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1260 if (inp
== dlil_main_input_thread
) {
1261 struct dlil_main_threading_info
*inpm
=
1262 (struct dlil_main_threading_info
*)inp
;
1263 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1266 error
= kernel_thread_start(func
, inp
, &inp
->input_thr
);
1267 if (error
== KERN_SUCCESS
) {
1268 ml_thread_policy(inp
->input_thr
, MACHINE_GROUP
,
1269 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_NETISR
));
1271 * We create an affinity set so that the matching workloop
1272 * thread or the starter thread (for loopback) can be
1273 * scheduled on the same processor set as the input thread.
1276 struct thread
*tp
= inp
->input_thr
;
1279 * Randomize to reduce the probability
1280 * of affinity tag namespace collision.
1282 read_frandom(&tag
, sizeof (tag
));
1283 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1284 thread_reference(tp
);
1286 inp
->net_affinity
= TRUE
;
1289 } else if (inp
== dlil_main_input_thread
) {
1290 panic_plain("%s: couldn't create main input thread", __func__
);
1293 panic_plain("%s: couldn't create %s input thread", __func__
,
1297 OSAddAtomic(1, &cur_dlil_input_threads
);
1302 #if TEST_INPUT_THREAD_TERMINATION
1304 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1306 #pragma unused(arg1, arg2)
1310 i
= if_input_thread_termination_spin
;
1312 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
1313 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
1316 if (net_rxpoll
== 0)
1319 if_input_thread_termination_spin
= i
;
1322 #endif /* TEST_INPUT_THREAD_TERMINATION */
1325 dlil_clean_threading_info(struct dlil_threading_info
*inp
)
1327 lck_mtx_destroy(&inp
->input_lck
, inp
->lck_grp
);
1328 lck_grp_free(inp
->lck_grp
);
1330 inp
->input_waiting
= 0;
1332 bzero(inp
->input_name
, sizeof (inp
->input_name
));
1334 VERIFY(qhead(&inp
->rcvq_pkts
) == NULL
&& qempty(&inp
->rcvq_pkts
));
1335 qlimit(&inp
->rcvq_pkts
) = 0;
1336 bzero(&inp
->stats
, sizeof (inp
->stats
));
1338 VERIFY(!inp
->net_affinity
);
1339 inp
->input_thr
= THREAD_NULL
;
1340 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
1341 VERIFY(inp
->poll_thr
== THREAD_NULL
);
1342 VERIFY(inp
->tag
== 0);
1344 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1345 bzero(&inp
->tstats
, sizeof (inp
->tstats
));
1346 bzero(&inp
->pstats
, sizeof (inp
->pstats
));
1347 bzero(&inp
->sstats
, sizeof (inp
->sstats
));
1349 net_timerclear(&inp
->mode_holdtime
);
1350 net_timerclear(&inp
->mode_lasttime
);
1351 net_timerclear(&inp
->sample_holdtime
);
1352 net_timerclear(&inp
->sample_lasttime
);
1353 net_timerclear(&inp
->dbg_lasttime
);
1355 #if IFNET_INPUT_SANITY_CHK
1356 inp
->input_mbuf_cnt
= 0;
1357 #endif /* IFNET_INPUT_SANITY_CHK */
1361 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1363 struct ifnet
*ifp
= inp
->ifp
;
1365 VERIFY(current_thread() == inp
->input_thr
);
1366 VERIFY(inp
!= dlil_main_input_thread
);
1368 OSAddAtomic(-1, &cur_dlil_input_threads
);
1370 #if TEST_INPUT_THREAD_TERMINATION
1371 { /* do something useless that won't get optimized away */
1373 for (uint32_t i
= 0;
1374 i
< if_input_thread_termination_spin
;
1378 printf("the value is %d\n", v
);
1380 #endif /* TEST_INPUT_THREAD_TERMINATION */
1382 lck_mtx_lock_spin(&inp
->input_lck
);
1383 VERIFY((inp
->input_waiting
& DLIL_INPUT_TERMINATE
) != 0);
1384 inp
->input_waiting
|= DLIL_INPUT_TERMINATE_COMPLETE
;
1385 wakeup_one((caddr_t
)&inp
->input_waiting
);
1386 lck_mtx_unlock(&inp
->input_lck
);
1388 /* for the extra refcnt from kernel_thread_start() */
1389 thread_deallocate(current_thread());
1392 printf("%s: input thread terminated\n",
1396 /* this is the end */
1397 thread_terminate(current_thread());
1401 static kern_return_t
1402 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1404 thread_affinity_policy_data_t policy
;
1406 bzero(&policy
, sizeof (policy
));
1407 policy
.affinity_tag
= tag
;
1408 return (thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1409 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
));
1415 thread_t thread
= THREAD_NULL
;
1418 * The following fields must be 64-bit aligned for atomic operations.
1420 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1421 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1422 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1423 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1424 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1425 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1426 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1427 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1428 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1429 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1430 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1431 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1432 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1433 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1434 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1436 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1437 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1438 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1439 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1440 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1441 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1442 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1443 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1444 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1445 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1446 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1447 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1448 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1449 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1450 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1453 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1455 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1456 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1457 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1458 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1459 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1460 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1461 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1462 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1463 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1464 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT
== IFNET_CSUM_ZERO_INVERT
);
1465 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1466 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1467 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1468 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1471 * ... as well as the mbuf checksum flags counterparts.
1473 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1474 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1475 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1476 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1477 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1478 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1479 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1480 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1481 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1482 _CASSERT(CSUM_ZERO_INVERT
== IF_HWASSIST_CSUM_ZERO_INVERT
);
1483 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1486 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1488 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1489 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1491 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1492 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1493 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1494 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1496 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1497 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1498 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1500 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1501 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1502 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1503 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1504 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1505 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1506 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1507 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1508 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1509 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1510 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1511 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1512 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1513 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1514 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1515 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1517 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1518 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1519 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1520 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1521 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1522 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1523 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC
== IFNET_SUBFAMILY_INTCOPROC
);
1525 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1526 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1528 PE_parse_boot_argn("net_affinity", &net_affinity
,
1529 sizeof (net_affinity
));
1531 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof (net_rxpoll
));
1533 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof (net_rtref
));
1535 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof (ifnet_debug
));
1537 dlif_size
= (ifnet_debug
== 0) ? sizeof (struct dlil_ifnet
) :
1538 sizeof (struct dlil_ifnet_dbg
);
1539 /* Enforce 64-bit alignment for dlil_ifnet structure */
1540 dlif_bufsize
= dlif_size
+ sizeof (void *) + sizeof (u_int64_t
);
1541 dlif_bufsize
= P2ROUNDUP(dlif_bufsize
, sizeof (u_int64_t
));
1542 dlif_zone
= zinit(dlif_bufsize
, DLIF_ZONE_MAX
* dlif_bufsize
,
1544 if (dlif_zone
== NULL
) {
1545 panic_plain("%s: failed allocating %s", __func__
,
1549 zone_change(dlif_zone
, Z_EXPAND
, TRUE
);
1550 zone_change(dlif_zone
, Z_CALLERACCT
, FALSE
);
1552 dlif_filt_size
= sizeof (struct ifnet_filter
);
1553 dlif_filt_zone
= zinit(dlif_filt_size
,
1554 DLIF_FILT_ZONE_MAX
* dlif_filt_size
, 0, DLIF_FILT_ZONE_NAME
);
1555 if (dlif_filt_zone
== NULL
) {
1556 panic_plain("%s: failed allocating %s", __func__
,
1557 DLIF_FILT_ZONE_NAME
);
1560 zone_change(dlif_filt_zone
, Z_EXPAND
, TRUE
);
1561 zone_change(dlif_filt_zone
, Z_CALLERACCT
, FALSE
);
1563 dlif_phash_size
= sizeof (struct proto_hash_entry
) * PROTO_HASH_SLOTS
;
1564 dlif_phash_zone
= zinit(dlif_phash_size
,
1565 DLIF_PHASH_ZONE_MAX
* dlif_phash_size
, 0, DLIF_PHASH_ZONE_NAME
);
1566 if (dlif_phash_zone
== NULL
) {
1567 panic_plain("%s: failed allocating %s", __func__
,
1568 DLIF_PHASH_ZONE_NAME
);
1571 zone_change(dlif_phash_zone
, Z_EXPAND
, TRUE
);
1572 zone_change(dlif_phash_zone
, Z_CALLERACCT
, FALSE
);
1574 dlif_proto_size
= sizeof (struct if_proto
);
1575 dlif_proto_zone
= zinit(dlif_proto_size
,
1576 DLIF_PROTO_ZONE_MAX
* dlif_proto_size
, 0, DLIF_PROTO_ZONE_NAME
);
1577 if (dlif_proto_zone
== NULL
) {
1578 panic_plain("%s: failed allocating %s", __func__
,
1579 DLIF_PROTO_ZONE_NAME
);
1582 zone_change(dlif_proto_zone
, Z_EXPAND
, TRUE
);
1583 zone_change(dlif_proto_zone
, Z_CALLERACCT
, FALSE
);
1585 dlif_tcpstat_size
= sizeof (struct tcpstat_local
);
1586 /* Enforce 64-bit alignment for tcpstat_local structure */
1587 dlif_tcpstat_bufsize
=
1588 dlif_tcpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1589 dlif_tcpstat_bufsize
=
1590 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof (u_int64_t
));
1591 dlif_tcpstat_zone
= zinit(dlif_tcpstat_bufsize
,
1592 DLIF_TCPSTAT_ZONE_MAX
* dlif_tcpstat_bufsize
, 0,
1593 DLIF_TCPSTAT_ZONE_NAME
);
1594 if (dlif_tcpstat_zone
== NULL
) {
1595 panic_plain("%s: failed allocating %s", __func__
,
1596 DLIF_TCPSTAT_ZONE_NAME
);
1599 zone_change(dlif_tcpstat_zone
, Z_EXPAND
, TRUE
);
1600 zone_change(dlif_tcpstat_zone
, Z_CALLERACCT
, FALSE
);
1602 dlif_udpstat_size
= sizeof (struct udpstat_local
);
1603 /* Enforce 64-bit alignment for udpstat_local structure */
1604 dlif_udpstat_bufsize
=
1605 dlif_udpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1606 dlif_udpstat_bufsize
=
1607 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof (u_int64_t
));
1608 dlif_udpstat_zone
= zinit(dlif_udpstat_bufsize
,
1609 DLIF_TCPSTAT_ZONE_MAX
* dlif_udpstat_bufsize
, 0,
1610 DLIF_UDPSTAT_ZONE_NAME
);
1611 if (dlif_udpstat_zone
== NULL
) {
1612 panic_plain("%s: failed allocating %s", __func__
,
1613 DLIF_UDPSTAT_ZONE_NAME
);
1616 zone_change(dlif_udpstat_zone
, Z_EXPAND
, TRUE
);
1617 zone_change(dlif_udpstat_zone
, Z_CALLERACCT
, FALSE
);
1619 ifnet_llreach_init();
1620 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt
);
1622 TAILQ_INIT(&dlil_ifnet_head
);
1623 TAILQ_INIT(&ifnet_head
);
1624 TAILQ_INIT(&ifnet_detaching_head
);
1625 TAILQ_INIT(&ifnet_ordered_head
);
1627 /* Setup the lock groups we will use */
1628 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1630 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1631 dlil_grp_attributes
);
1632 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1633 dlil_grp_attributes
);
1634 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1635 dlil_grp_attributes
);
1636 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1637 dlil_grp_attributes
);
1638 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1639 dlil_grp_attributes
);
1641 /* Setup the lock attributes we will use */
1642 dlil_lck_attributes
= lck_attr_alloc_init();
1644 ifnet_lock_attr
= lck_attr_alloc_init();
1646 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1647 dlil_lck_attributes
);
1648 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1650 /* Setup interface flow control related items */
1651 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1653 ifnet_fc_zone_size
= sizeof (struct ifnet_fc_entry
);
1654 ifnet_fc_zone
= zinit(ifnet_fc_zone_size
,
1655 IFNET_FC_ZONE_MAX
* ifnet_fc_zone_size
, 0, IFNET_FC_ZONE_NAME
);
1656 if (ifnet_fc_zone
== NULL
) {
1657 panic_plain("%s: failed allocating %s", __func__
,
1658 IFNET_FC_ZONE_NAME
);
1661 zone_change(ifnet_fc_zone
, Z_EXPAND
, TRUE
);
1662 zone_change(ifnet_fc_zone
, Z_CALLERACCT
, FALSE
);
1664 /* Initialize interface address subsystem */
1668 /* Initialize the packet filter */
1672 /* Initialize queue algorithms */
1675 /* Initialize packet schedulers */
1678 /* Initialize flow advisory subsystem */
1681 /* Initialize the pktap virtual interface */
1684 /* Initialize the service class to dscp map */
1687 #if DEBUG || DEVELOPMENT
1688 /* Run self-tests */
1689 dlil_verify_sum16();
1690 #endif /* DEBUG || DEVELOPMENT */
1692 /* Initialize link layer table */
1693 lltable_glbl_init();
1696 * Create and start up the main DLIL input thread and the interface
1697 * detacher threads once everything is initialized.
1699 dlil_create_input_thread(NULL
, dlil_main_input_thread
);
1701 if (kernel_thread_start(ifnet_detacher_thread_func
,
1702 NULL
, &thread
) != KERN_SUCCESS
) {
1703 panic_plain("%s: couldn't create detacher thread", __func__
);
1706 thread_deallocate(thread
);
1711 if_flt_monitor_busy(struct ifnet
*ifp
)
1713 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1716 VERIFY(ifp
->if_flt_busy
!= 0);
1720 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1722 if_flt_monitor_leave(ifp
);
1726 if_flt_monitor_enter(struct ifnet
*ifp
)
1728 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1730 while (ifp
->if_flt_busy
) {
1731 ++ifp
->if_flt_waiters
;
1732 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1733 (PZERO
- 1), "if_flt_monitor", NULL
);
1735 if_flt_monitor_busy(ifp
);
1739 if_flt_monitor_leave(struct ifnet
*ifp
)
1741 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1743 VERIFY(ifp
->if_flt_busy
!= 0);
1746 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1747 ifp
->if_flt_waiters
= 0;
1748 wakeup(&ifp
->if_flt_head
);
1752 __private_extern__
int
1753 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1754 interface_filter_t
*filter_ref
, u_int32_t flags
)
1757 struct ifnet_filter
*filter
= NULL
;
1759 ifnet_head_lock_shared();
1760 /* Check that the interface is in the global list */
1761 if (!ifnet_lookup(ifp
)) {
1766 filter
= zalloc(dlif_filt_zone
);
1767 if (filter
== NULL
) {
1771 bzero(filter
, dlif_filt_size
);
1773 /* refcnt held above during lookup */
1774 filter
->filt_flags
= flags
;
1775 filter
->filt_ifp
= ifp
;
1776 filter
->filt_cookie
= if_filter
->iff_cookie
;
1777 filter
->filt_name
= if_filter
->iff_name
;
1778 filter
->filt_protocol
= if_filter
->iff_protocol
;
1780 * Do not install filter callbacks for internal coproc interface
1782 if (!IFNET_IS_INTCOPROC(ifp
)) {
1783 filter
->filt_input
= if_filter
->iff_input
;
1784 filter
->filt_output
= if_filter
->iff_output
;
1785 filter
->filt_event
= if_filter
->iff_event
;
1786 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1788 filter
->filt_detached
= if_filter
->iff_detached
;
1790 lck_mtx_lock(&ifp
->if_flt_lock
);
1791 if_flt_monitor_enter(ifp
);
1793 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1794 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1796 if_flt_monitor_leave(ifp
);
1797 lck_mtx_unlock(&ifp
->if_flt_lock
);
1799 *filter_ref
= filter
;
1802 * Bump filter count and route_generation ID to let TCP
1803 * know it shouldn't do TSO on this connection
1805 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1806 OSAddAtomic(1, &dlil_filter_disable_tso_count
);
1807 routegenid_update();
1809 OSIncrementAtomic64(&net_api_stats
.nas_iflt_attach_count
);
1810 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_total
);
1811 if ((filter
->filt_flags
& DLIL_IFF_INTERNAL
)) {
1812 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_os_total
);
1815 printf("%s: %s filter attached\n", if_name(ifp
),
1816 if_filter
->iff_name
);
1820 if (retval
!= 0 && ifp
!= NULL
) {
1821 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1822 if_name(ifp
), if_filter
->iff_name
, retval
);
1824 if (retval
!= 0 && filter
!= NULL
)
1825 zfree(dlif_filt_zone
, filter
);
1831 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1835 if (detached
== 0) {
1838 ifnet_head_lock_shared();
1839 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1840 interface_filter_t entry
= NULL
;
1842 lck_mtx_lock(&ifp
->if_flt_lock
);
1843 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1844 if (entry
!= filter
|| entry
->filt_skip
)
1847 * We've found a match; since it's possible
1848 * that the thread gets blocked in the monitor,
1849 * we do the lock dance. Interface should
1850 * not be detached since we still have a use
1851 * count held during filter attach.
1853 entry
->filt_skip
= 1; /* skip input/output */
1854 lck_mtx_unlock(&ifp
->if_flt_lock
);
1857 lck_mtx_lock(&ifp
->if_flt_lock
);
1858 if_flt_monitor_enter(ifp
);
1859 LCK_MTX_ASSERT(&ifp
->if_flt_lock
,
1860 LCK_MTX_ASSERT_OWNED
);
1862 /* Remove the filter from the list */
1863 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
1866 if_flt_monitor_leave(ifp
);
1867 lck_mtx_unlock(&ifp
->if_flt_lock
);
1869 printf("%s: %s filter detached\n",
1870 if_name(ifp
), filter
->filt_name
);
1874 lck_mtx_unlock(&ifp
->if_flt_lock
);
1878 /* filter parameter is not a valid filter ref */
1884 printf("%s filter detached\n", filter
->filt_name
);
1888 /* Call the detached function if there is one */
1889 if (filter
->filt_detached
)
1890 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
1893 * Decrease filter count and route_generation ID to let TCP
1894 * know it should reevalute doing TSO or not
1896 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1897 OSAddAtomic(-1, &dlil_filter_disable_tso_count
);
1898 routegenid_update();
1901 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_iflt_attach_count
) > 0);
1903 /* Free the filter */
1904 zfree(dlif_filt_zone
, filter
);
1907 if (retval
!= 0 && filter
!= NULL
) {
1908 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1909 filter
->filt_name
, retval
);
1915 __private_extern__
void
1916 dlil_detach_filter(interface_filter_t filter
)
1920 dlil_detach_filter_internal(filter
, 0);
1924 * Main input thread:
1926 * a) handles all inbound packets for lo0
1927 * b) handles all inbound packets for interfaces with no dedicated
1928 * input thread (e.g. anything but Ethernet/PDP or those that support
1929 * opportunistic polling.)
1930 * c) protocol registrations
1931 * d) packet injections
1933 __attribute__((noreturn
))
1935 dlil_main_input_thread_func(void *v
, wait_result_t w
)
1938 struct dlil_main_threading_info
*inpm
= v
;
1939 struct dlil_threading_info
*inp
= v
;
1941 VERIFY(inp
== dlil_main_input_thread
);
1942 VERIFY(inp
->ifp
== NULL
);
1943 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
1946 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
1947 u_int32_t m_cnt
, m_cnt_loop
;
1948 boolean_t proto_req
;
1950 lck_mtx_lock_spin(&inp
->input_lck
);
1952 /* Wait until there is work to be done */
1953 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1954 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1955 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1956 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1959 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1960 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1962 /* Main input thread cannot be terminated */
1963 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_TERMINATE
));
1965 proto_req
= (inp
->input_waiting
&
1966 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
1968 /* Packets for non-dedicated interfaces other than lo0 */
1969 m_cnt
= qlen(&inp
->rcvq_pkts
);
1970 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
1972 /* Packets exclusive to lo0 */
1973 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
1974 m_loop
= _getq_all(&inpm
->lo_rcvq_pkts
, NULL
, NULL
, NULL
);
1978 lck_mtx_unlock(&inp
->input_lck
);
1981 * NOTE warning %%% attention !!!!
1982 * We should think about putting some thread starvation
1983 * safeguards if we deal with long chains of packets.
1986 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
1987 m_cnt_loop
, inp
->mode
);
1990 dlil_input_packet_list_extended(NULL
, m
,
1998 VERIFY(0); /* we should never get here */
2002 * Input thread for interfaces with legacy input model.
2005 dlil_input_thread_func(void *v
, wait_result_t w
)
2008 char thread_name
[MAXTHREADNAMESIZE
];
2009 struct dlil_threading_info
*inp
= v
;
2010 struct ifnet
*ifp
= inp
->ifp
;
2012 /* Construct the name for this thread, and then apply it. */
2013 bzero(thread_name
, sizeof(thread_name
));
2014 snprintf(thread_name
, sizeof(thread_name
), "dlil_input_%s", ifp
->if_xname
);
2015 thread_set_thread_name(inp
->input_thr
, thread_name
);
2017 VERIFY(inp
!= dlil_main_input_thread
);
2018 VERIFY(ifp
!= NULL
);
2019 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
);
2020 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
2023 struct mbuf
*m
= NULL
;
2026 lck_mtx_lock_spin(&inp
->input_lck
);
2028 /* Wait until there is work to be done */
2029 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2030 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2031 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2032 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2035 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2036 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2039 * Protocol registration and injection must always use
2040 * the main input thread; in theory the latter can utilize
2041 * the corresponding input thread where the packet arrived
2042 * on, but that requires our knowing the interface in advance
2043 * (and the benefits might not worth the trouble.)
2045 VERIFY(!(inp
->input_waiting
&
2046 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
2048 /* Packets for this interface */
2049 m_cnt
= qlen(&inp
->rcvq_pkts
);
2050 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2052 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2053 lck_mtx_unlock(&inp
->input_lck
);
2055 /* Free up pending packets */
2059 dlil_terminate_input_thread(inp
);
2066 dlil_input_stats_sync(ifp
, inp
);
2068 lck_mtx_unlock(&inp
->input_lck
);
2071 * NOTE warning %%% attention !!!!
2072 * We should think about putting some thread starvation
2073 * safeguards if we deal with long chains of packets.
2076 dlil_input_packet_list_extended(NULL
, m
,
2081 VERIFY(0); /* we should never get here */
2085 * Input thread for interfaces with opportunistic polling input model.
2088 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
2091 struct dlil_threading_info
*inp
= v
;
2092 struct ifnet
*ifp
= inp
->ifp
;
2095 VERIFY(inp
!= dlil_main_input_thread
);
2096 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
));
2099 struct mbuf
*m
= NULL
;
2100 u_int32_t m_cnt
, m_size
, poll_req
= 0;
2102 struct timespec now
, delta
;
2105 lck_mtx_lock_spin(&inp
->input_lck
);
2107 if ((ival
= inp
->rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
)
2108 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2110 /* Link parameters changed? */
2111 if (ifp
->if_poll_update
!= 0) {
2112 ifp
->if_poll_update
= 0;
2113 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
2116 /* Current operating mode */
2119 /* Wait until there is work to be done */
2120 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2121 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2122 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2123 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2126 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2127 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2130 * Protocol registration and injection must always use
2131 * the main input thread; in theory the latter can utilize
2132 * the corresponding input thread where the packet arrived
2133 * on, but that requires our knowing the interface in advance
2134 * (and the benefits might not worth the trouble.)
2136 VERIFY(!(inp
->input_waiting
&
2137 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
2139 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2140 /* Free up pending packets */
2141 lck_mtx_convert_spin(&inp
->input_lck
);
2142 _flushq(&inp
->rcvq_pkts
);
2143 if (inp
->input_mit_tcall
!= NULL
) {
2144 if (thread_call_isactive(inp
->input_mit_tcall
))
2145 thread_call_cancel(inp
->input_mit_tcall
);
2147 lck_mtx_unlock(&inp
->input_lck
);
2149 dlil_terminate_input_thread(inp
);
2154 /* Total count of all packets */
2155 m_cnt
= qlen(&inp
->rcvq_pkts
);
2157 /* Total bytes of all packets */
2158 m_size
= qsize(&inp
->rcvq_pkts
);
2160 /* Packets for this interface */
2161 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2162 VERIFY(m
!= NULL
|| m_cnt
== 0);
2165 if (!net_timerisset(&inp
->sample_lasttime
))
2166 *(&inp
->sample_lasttime
) = *(&now
);
2168 net_timersub(&now
, &inp
->sample_lasttime
, &delta
);
2169 if (if_rxpoll
&& net_timerisset(&inp
->sample_holdtime
)) {
2170 u_int32_t ptot
, btot
;
2172 /* Accumulate statistics for current sampling */
2173 PKTCNTR_ADD(&inp
->sstats
, m_cnt
, m_size
);
2175 if (net_timercmp(&delta
, &inp
->sample_holdtime
, <))
2178 *(&inp
->sample_lasttime
) = *(&now
);
2180 /* Calculate min/max of inbound bytes */
2181 btot
= (u_int32_t
)inp
->sstats
.bytes
;
2182 if (inp
->rxpoll_bmin
== 0 || inp
->rxpoll_bmin
> btot
)
2183 inp
->rxpoll_bmin
= btot
;
2184 if (btot
> inp
->rxpoll_bmax
)
2185 inp
->rxpoll_bmax
= btot
;
2187 /* Calculate EWMA of inbound bytes */
2188 DLIL_EWMA(inp
->rxpoll_bavg
, btot
, if_rxpoll_decay
);
2190 /* Calculate min/max of inbound packets */
2191 ptot
= (u_int32_t
)inp
->sstats
.packets
;
2192 if (inp
->rxpoll_pmin
== 0 || inp
->rxpoll_pmin
> ptot
)
2193 inp
->rxpoll_pmin
= ptot
;
2194 if (ptot
> inp
->rxpoll_pmax
)
2195 inp
->rxpoll_pmax
= ptot
;
2197 /* Calculate EWMA of inbound packets */
2198 DLIL_EWMA(inp
->rxpoll_pavg
, ptot
, if_rxpoll_decay
);
2200 /* Reset sampling statistics */
2201 PKTCNTR_CLEAR(&inp
->sstats
);
2203 /* Calculate EWMA of wakeup requests */
2204 DLIL_EWMA(inp
->rxpoll_wavg
, inp
->wtot
, if_rxpoll_decay
);
2208 if (!net_timerisset(&inp
->dbg_lasttime
))
2209 *(&inp
->dbg_lasttime
) = *(&now
);
2210 net_timersub(&now
, &inp
->dbg_lasttime
, &delta
);
2211 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
2212 *(&inp
->dbg_lasttime
) = *(&now
);
2213 printf("%s: [%s] pkts avg %d max %d "
2214 "limits [%d/%d], wreq avg %d "
2215 "limits [%d/%d], bytes avg %d "
2216 "limits [%d/%d]\n", if_name(ifp
),
2218 IFNET_MODEL_INPUT_POLL_ON
) ?
2219 "ON" : "OFF", inp
->rxpoll_pavg
,
2228 inp
->rxpoll_bhiwat
);
2232 /* Perform mode transition, if necessary */
2233 if (!net_timerisset(&inp
->mode_lasttime
))
2234 *(&inp
->mode_lasttime
) = *(&now
);
2236 net_timersub(&now
, &inp
->mode_lasttime
, &delta
);
2237 if (net_timercmp(&delta
, &inp
->mode_holdtime
, <))
2240 if (inp
->rxpoll_pavg
<= inp
->rxpoll_plowat
&&
2241 inp
->rxpoll_bavg
<= inp
->rxpoll_blowat
&&
2242 inp
->mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2243 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2244 } else if (inp
->rxpoll_pavg
>= inp
->rxpoll_phiwat
&&
2245 (inp
->rxpoll_bavg
>= inp
->rxpoll_bhiwat
||
2246 inp
->rxpoll_wavg
>= inp
->rxpoll_whiwat
) &&
2247 inp
->mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2248 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2251 if (mode
!= inp
->mode
) {
2253 *(&inp
->mode_lasttime
) = *(&now
);
2258 dlil_input_stats_sync(ifp
, inp
);
2260 lck_mtx_unlock(&inp
->input_lck
);
2263 * If there's a mode change and interface is still attached,
2264 * perform a downcall to the driver for the new mode. Also
2265 * hold an IO refcnt on the interface to prevent it from
2266 * being detached (will be release below.)
2268 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2269 struct ifnet_model_params p
= { mode
, { 0 } };
2273 printf("%s: polling is now %s, "
2274 "pkts avg %d max %d limits [%d/%d], "
2275 "wreq avg %d limits [%d/%d], "
2276 "bytes avg %d limits [%d/%d]\n",
2278 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2279 "ON" : "OFF", inp
->rxpoll_pavg
,
2280 inp
->rxpoll_pmax
, inp
->rxpoll_plowat
,
2281 inp
->rxpoll_phiwat
, inp
->rxpoll_wavg
,
2282 inp
->rxpoll_wlowat
, inp
->rxpoll_whiwat
,
2283 inp
->rxpoll_bavg
, inp
->rxpoll_blowat
,
2284 inp
->rxpoll_bhiwat
);
2287 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2288 IFNET_CTL_SET_INPUT_MODEL
, sizeof (p
), &p
))) != 0) {
2289 printf("%s: error setting polling mode "
2290 "to %s (%d)\n", if_name(ifp
),
2291 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2296 case IFNET_MODEL_INPUT_POLL_OFF
:
2297 ifnet_set_poll_cycle(ifp
, NULL
);
2298 inp
->rxpoll_offreq
++;
2300 inp
->rxpoll_offerr
++;
2303 case IFNET_MODEL_INPUT_POLL_ON
:
2304 net_nsectimer(&ival
, &ts
);
2305 ifnet_set_poll_cycle(ifp
, &ts
);
2307 inp
->rxpoll_onreq
++;
2309 inp
->rxpoll_onerr
++;
2317 /* Release the IO refcnt */
2318 ifnet_decr_iorefcnt(ifp
);
2322 * NOTE warning %%% attention !!!!
2323 * We should think about putting some thread starvation
2324 * safeguards if we deal with long chains of packets.
2327 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2331 VERIFY(0); /* we should never get here */
2335 * Must be called on an attached ifnet (caller is expected to check.)
2336 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2339 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2342 struct dlil_threading_info
*inp
;
2343 u_int64_t sample_holdtime
, inbw
;
2345 VERIFY(ifp
!= NULL
);
2346 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2350 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2351 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0))
2353 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2354 p
->packets_lowat
>= p
->packets_hiwat
)
2356 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2357 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0))
2359 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2360 p
->bytes_lowat
>= p
->bytes_hiwat
)
2362 if (p
->interval_time
!= 0 &&
2363 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
)
2364 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2368 lck_mtx_lock(&inp
->input_lck
);
2370 LCK_MTX_ASSERT(&inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
2373 * Normally, we'd reset the parameters to the auto-tuned values
2374 * if the the input thread detects a change in link rate. If the
2375 * driver provides its own parameters right after a link rate
2376 * changes, but before the input thread gets to run, we want to
2377 * make sure to keep the driver's values. Clearing if_poll_update
2378 * will achieve that.
2380 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0)
2381 ifp
->if_poll_update
= 0;
2383 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2384 sample_holdtime
= 0; /* polling is disabled */
2385 inp
->rxpoll_wlowat
= inp
->rxpoll_plowat
=
2386 inp
->rxpoll_blowat
= 0;
2387 inp
->rxpoll_whiwat
= inp
->rxpoll_phiwat
=
2388 inp
->rxpoll_bhiwat
= (u_int32_t
)-1;
2389 inp
->rxpoll_plim
= 0;
2390 inp
->rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2392 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2396 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2397 if (inbw
< rxpoll_tbl
[i
].speed
)
2401 /* auto-tune if caller didn't specify a value */
2402 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2403 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2404 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2405 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2406 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2407 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2408 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2409 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2410 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2411 if_rxpoll_max
: p
->packets_limit
);
2412 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2413 if_rxpoll_interval_time
: p
->interval_time
);
2415 VERIFY(plowat
!= 0 && phiwat
!= 0);
2416 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2417 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2419 sample_holdtime
= if_rxpoll_sample_holdtime
;
2420 inp
->rxpoll_wlowat
= if_rxpoll_wlowat
;
2421 inp
->rxpoll_whiwat
= if_rxpoll_whiwat
;
2422 inp
->rxpoll_plowat
= plowat
;
2423 inp
->rxpoll_phiwat
= phiwat
;
2424 inp
->rxpoll_blowat
= blowat
;
2425 inp
->rxpoll_bhiwat
= bhiwat
;
2426 inp
->rxpoll_plim
= plim
;
2427 inp
->rxpoll_ival
= ival
;
2430 net_nsectimer(&if_rxpoll_mode_holdtime
, &inp
->mode_holdtime
);
2431 net_nsectimer(&sample_holdtime
, &inp
->sample_holdtime
);
2434 printf("%s: speed %llu bps, sample per %llu nsec, "
2435 "poll interval %llu nsec, pkts per poll %u, "
2436 "pkt limits [%u/%u], wreq limits [%u/%u], "
2437 "bytes limits [%u/%u]\n", if_name(ifp
),
2438 inbw
, sample_holdtime
, inp
->rxpoll_ival
, inp
->rxpoll_plim
,
2439 inp
->rxpoll_plowat
, inp
->rxpoll_phiwat
, inp
->rxpoll_wlowat
,
2440 inp
->rxpoll_whiwat
, inp
->rxpoll_blowat
, inp
->rxpoll_bhiwat
);
2444 lck_mtx_unlock(&inp
->input_lck
);
2450 * Must be called on an attached ifnet (caller is expected to check.)
2453 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2455 struct dlil_threading_info
*inp
;
2457 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2458 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2461 bzero(p
, sizeof (*p
));
2463 lck_mtx_lock(&inp
->input_lck
);
2464 p
->packets_limit
= inp
->rxpoll_plim
;
2465 p
->packets_lowat
= inp
->rxpoll_plowat
;
2466 p
->packets_hiwat
= inp
->rxpoll_phiwat
;
2467 p
->bytes_lowat
= inp
->rxpoll_blowat
;
2468 p
->bytes_hiwat
= inp
->rxpoll_bhiwat
;
2469 p
->interval_time
= inp
->rxpoll_ival
;
2470 lck_mtx_unlock(&inp
->input_lck
);
2476 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2477 const struct ifnet_stat_increment_param
*s
)
2479 return (ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
));
2483 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2484 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2486 return (ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
));
2490 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2491 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2493 dlil_input_func input_func
;
2494 struct ifnet_stat_increment_param _s
;
2495 u_int32_t m_cnt
= 0, m_size
= 0;
2499 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2501 mbuf_freem_list(m_head
);
2505 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2506 VERIFY(m_tail
== NULL
|| ext
);
2507 VERIFY(s
!= NULL
|| !ext
);
2510 * Drop the packet(s) if the parameters are invalid, or if the
2511 * interface is no longer attached; else hold an IO refcnt to
2512 * prevent it from being detached (will be released below.)
2514 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_is_attached(ifp
, 1))) {
2516 mbuf_freem_list(m_head
);
2520 input_func
= ifp
->if_input_dlil
;
2521 VERIFY(input_func
!= NULL
);
2523 if (m_tail
== NULL
) {
2525 while (m_head
!= NULL
) {
2526 #if IFNET_INPUT_SANITY_CHK
2527 if (dlil_input_sanity_check
!= 0)
2528 DLIL_INPUT_CHECK(last
, ifp
);
2529 #endif /* IFNET_INPUT_SANITY_CHK */
2531 m_size
+= m_length(last
);
2532 if (mbuf_nextpkt(last
) == NULL
)
2534 last
= mbuf_nextpkt(last
);
2538 #if IFNET_INPUT_SANITY_CHK
2539 if (dlil_input_sanity_check
!= 0) {
2542 DLIL_INPUT_CHECK(last
, ifp
);
2544 m_size
+= m_length(last
);
2545 if (mbuf_nextpkt(last
) == NULL
)
2547 last
= mbuf_nextpkt(last
);
2550 m_cnt
= s
->packets_in
;
2551 m_size
= s
->bytes_in
;
2555 m_cnt
= s
->packets_in
;
2556 m_size
= s
->bytes_in
;
2558 #endif /* IFNET_INPUT_SANITY_CHK */
2561 if (last
!= m_tail
) {
2562 panic_plain("%s: invalid input packet chain for %s, "
2563 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2568 * Assert packet count only for the extended variant, for backwards
2569 * compatibility, since this came directly from the device driver.
2570 * Relax this assertion for input bytes, as the driver may have
2571 * included the link-layer headers in the computation; hence
2572 * m_size is just an approximation.
2574 if (ext
&& s
->packets_in
!= m_cnt
) {
2575 panic_plain("%s: input packet count mismatch for %s, "
2576 "%d instead of %d\n", __func__
, if_name(ifp
),
2577 s
->packets_in
, m_cnt
);
2581 bzero(&_s
, sizeof (_s
));
2586 _s
.packets_in
= m_cnt
;
2587 _s
.bytes_in
= m_size
;
2589 err
= (*input_func
)(ifp
, m_head
, m_tail
, s
, poll
, current_thread());
2591 if (ifp
!= lo_ifp
) {
2592 /* Release the IO refcnt */
2593 ifnet_decr_iorefcnt(ifp
);
2601 dlil_output_handler(struct ifnet
*ifp
, struct mbuf
*m
)
2603 return (ifp
->if_output(ifp
, m
));
2607 dlil_input_handler(struct ifnet
*ifp
, struct mbuf
*m_head
,
2608 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
2609 boolean_t poll
, struct thread
*tp
)
2611 struct dlil_threading_info
*inp
;
2612 u_int32_t m_cnt
= s
->packets_in
;
2613 u_int32_t m_size
= s
->bytes_in
;
2615 if ((inp
= ifp
->if_inp
) == NULL
)
2616 inp
= dlil_main_input_thread
;
2619 * If there is a matching DLIL input thread associated with an
2620 * affinity set, associate this thread with the same set. We
2621 * will only do this once.
2623 lck_mtx_lock_spin(&inp
->input_lck
);
2624 if (inp
!= dlil_main_input_thread
&& inp
->net_affinity
&& tp
!= NULL
&&
2625 ((!poll
&& inp
->wloop_thr
== THREAD_NULL
) ||
2626 (poll
&& inp
->poll_thr
== THREAD_NULL
))) {
2627 u_int32_t tag
= inp
->tag
;
2630 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2633 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2634 inp
->wloop_thr
= tp
;
2636 lck_mtx_unlock(&inp
->input_lck
);
2638 /* Associate the current thread with the new affinity tag */
2639 (void) dlil_affinity_set(tp
, tag
);
2642 * Take a reference on the current thread; during detach,
2643 * we will need to refer to it in order to tear down its
2646 thread_reference(tp
);
2647 lck_mtx_lock_spin(&inp
->input_lck
);
2650 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
2653 * Because of loopbacked multicast we cannot stuff the ifp in
2654 * the rcvif of the packet header: loopback (lo0) packets use a
2655 * dedicated list so that we can later associate them with lo_ifp
2656 * on their way up the stack. Packets for other interfaces without
2657 * dedicated input threads go to the regular list.
2659 if (m_head
!= NULL
) {
2660 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
2661 struct dlil_main_threading_info
*inpm
=
2662 (struct dlil_main_threading_info
*)inp
;
2663 _addq_multi(&inpm
->lo_rcvq_pkts
, m_head
, m_tail
,
2666 _addq_multi(&inp
->rcvq_pkts
, m_head
, m_tail
,
2671 #if IFNET_INPUT_SANITY_CHK
2672 if (dlil_input_sanity_check
!= 0) {
2676 for (m0
= m_head
, count
= 0; m0
; m0
= mbuf_nextpkt(m0
))
2679 if (count
!= m_cnt
) {
2680 panic_plain("%s: invalid packet count %d "
2681 "(expected %d)\n", if_name(ifp
),
2686 inp
->input_mbuf_cnt
+= m_cnt
;
2688 #endif /* IFNET_INPUT_SANITY_CHK */
2690 dlil_input_stats_add(s
, inp
, poll
);
2692 * If we're using the main input thread, synchronize the
2693 * stats now since we have the interface context. All
2694 * other cases involving dedicated input threads will
2695 * have their stats synchronized there.
2697 if (inp
== dlil_main_input_thread
)
2698 dlil_input_stats_sync(ifp
, inp
);
2700 if (qlen(&inp
->rcvq_pkts
) >= dlil_rcv_mit_pkts_min
&&
2701 qlen(&inp
->rcvq_pkts
) < dlil_rcv_mit_pkts_max
&&
2702 (ifp
->if_family
== IFNET_FAMILY_ETHERNET
||
2703 ifp
->if_type
== IFT_CELLULAR
)
2705 if (!thread_call_isactive(inp
->input_mit_tcall
)) {
2707 clock_interval_to_deadline(dlil_rcv_mit_interval
,
2709 (void) thread_call_enter_delayed(
2710 inp
->input_mit_tcall
, deadline
);
2713 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
2714 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
2716 wakeup_one((caddr_t
)&inp
->input_waiting
);
2719 lck_mtx_unlock(&inp
->input_lck
);
2726 ifnet_start_common(struct ifnet
*ifp
, int resetfc
)
2728 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2731 * If the starter thread is inactive, signal it to do work,
2732 * unless the interface is being flow controlled from below,
2733 * e.g. a virtual interface being flow controlled by a real
2734 * network interface beneath it.
2736 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2738 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
2739 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
2740 lck_mtx_unlock(&ifp
->if_start_lock
);
2743 ifp
->if_start_req
++;
2744 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
&&
2745 (resetfc
|| !(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) ||
2746 IFCQ_LEN(&ifp
->if_snd
) >= ifp
->if_start_delay_qlen
||
2747 ifp
->if_start_delayed
== 0)) {
2748 (void) thread_wakeup_thread((caddr_t
)&ifp
->if_start_thread
,
2749 ifp
->if_start_thread
);
2751 lck_mtx_unlock(&ifp
->if_start_lock
);
2755 ifnet_start(struct ifnet
*ifp
)
2757 ifnet_start_common(ifp
, 0);
2761 ifnet_start_thread_fn(void *v
, wait_result_t w
)
2764 struct ifnet
*ifp
= v
;
2765 char ifname
[IFNAMSIZ
+ 1];
2766 char thread_name
[MAXTHREADNAMESIZE
];
2767 struct timespec
*ts
= NULL
;
2768 struct ifclassq
*ifq
= &ifp
->if_snd
;
2769 struct timespec delay_start_ts
;
2771 /* Construct the name for this thread, and then apply it. */
2772 bzero(thread_name
, sizeof(thread_name
));
2773 (void) snprintf(thread_name
, sizeof (thread_name
),
2774 "ifnet_start_%s", ifp
->if_xname
);
2775 thread_set_thread_name(ifp
->if_start_thread
, thread_name
);
2778 * Treat the dedicated starter thread for lo0 as equivalent to
2779 * the driver workloop thread; if net_affinity is enabled for
2780 * the main input thread, associate this starter thread to it
2781 * by binding them with the same affinity tag. This is done
2782 * only once (as we only have one lo_ifp which never goes away.)
2784 if (ifp
== lo_ifp
) {
2785 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
2786 struct thread
*tp
= current_thread();
2788 lck_mtx_lock(&inp
->input_lck
);
2789 if (inp
->net_affinity
) {
2790 u_int32_t tag
= inp
->tag
;
2792 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2793 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2794 inp
->wloop_thr
= tp
;
2795 lck_mtx_unlock(&inp
->input_lck
);
2797 /* Associate this thread with the affinity tag */
2798 (void) dlil_affinity_set(tp
, tag
);
2800 lck_mtx_unlock(&inp
->input_lck
);
2804 (void) snprintf(ifname
, sizeof (ifname
), "%s_starter", if_name(ifp
));
2806 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2809 if (ifp
->if_start_thread
!= NULL
) {
2810 (void) msleep(&ifp
->if_start_thread
,
2811 &ifp
->if_start_lock
,
2812 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2814 /* interface is detached? */
2815 if (ifp
->if_start_thread
== THREAD_NULL
) {
2816 ifnet_set_start_cycle(ifp
, NULL
);
2817 lck_mtx_unlock(&ifp
->if_start_lock
);
2821 printf("%s: starter thread terminated\n",
2825 /* for the extra refcnt from kernel_thread_start() */
2826 thread_deallocate(current_thread());
2827 /* this is the end */
2828 thread_terminate(current_thread());
2833 ifp
->if_start_active
= 1;
2836 u_int32_t req
= ifp
->if_start_req
;
2837 if (!IFCQ_IS_EMPTY(ifq
) &&
2838 (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
2839 ifp
->if_start_delayed
== 0 &&
2840 IFCQ_LEN(ifq
) < ifp
->if_start_delay_qlen
&&
2841 (ifp
->if_eflags
& IFEF_DELAY_START
)) {
2842 ifp
->if_start_delayed
= 1;
2843 ifnet_start_delayed
++;
2846 ifp
->if_start_delayed
= 0;
2848 lck_mtx_unlock(&ifp
->if_start_lock
);
2851 * If no longer attached, don't call start because ifp
2852 * is being destroyed; else hold an IO refcnt to
2853 * prevent the interface from being detached (will be
2856 if (!ifnet_is_attached(ifp
, 1)) {
2857 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2861 /* invoke the driver's start routine */
2862 ((*ifp
->if_start
)(ifp
));
2865 * Release the io ref count taken by ifnet_is_attached.
2867 ifnet_decr_iorefcnt(ifp
);
2869 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2871 /* if there's no pending request, we're done */
2872 if (req
== ifp
->if_start_req
)
2876 ifp
->if_start_req
= 0;
2877 ifp
->if_start_active
= 0;
2880 * Wakeup N ns from now if rate-controlled by TBR, and if
2881 * there are still packets in the send queue which haven't
2882 * been dequeued so far; else sleep indefinitely (ts = NULL)
2883 * until ifnet_start() is called again.
2885 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
2886 &ifp
->if_start_cycle
: NULL
);
2888 if (ts
== NULL
&& ifp
->if_start_delayed
== 1) {
2889 delay_start_ts
.tv_sec
= 0;
2890 delay_start_ts
.tv_nsec
= ifp
->if_start_delay_timeout
;
2891 ts
= &delay_start_ts
;
2894 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
2902 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
2905 bzero(&ifp
->if_start_cycle
, sizeof (ifp
->if_start_cycle
));
2907 *(&ifp
->if_start_cycle
) = *ts
;
2909 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
2910 printf("%s: restart interval set to %lu nsec\n",
2911 if_name(ifp
), ts
->tv_nsec
);
2915 ifnet_poll(struct ifnet
*ifp
)
2918 * If the poller thread is inactive, signal it to do work.
2920 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2922 if (!ifp
->if_poll_active
&& ifp
->if_poll_thread
!= THREAD_NULL
) {
2923 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
2925 lck_mtx_unlock(&ifp
->if_poll_lock
);
2929 ifnet_poll_thread_fn(void *v
, wait_result_t w
)
2932 struct dlil_threading_info
*inp
;
2933 struct ifnet
*ifp
= v
;
2934 char ifname
[IFNAMSIZ
+ 1];
2935 struct timespec
*ts
= NULL
;
2936 struct ifnet_stat_increment_param s
;
2938 snprintf(ifname
, sizeof (ifname
), "%s_poller",
2940 bzero(&s
, sizeof (s
));
2942 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2945 VERIFY(inp
!= NULL
);
2948 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
2949 (void) msleep(&ifp
->if_poll_thread
, &ifp
->if_poll_lock
,
2950 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2953 /* interface is detached (maybe while asleep)? */
2954 if (ifp
->if_poll_thread
== THREAD_NULL
) {
2955 ifnet_set_poll_cycle(ifp
, NULL
);
2956 lck_mtx_unlock(&ifp
->if_poll_lock
);
2959 printf("%s: poller thread terminated\n",
2963 /* for the extra refcnt from kernel_thread_start() */
2964 thread_deallocate(current_thread());
2965 /* this is the end */
2966 thread_terminate(current_thread());
2971 ifp
->if_poll_active
= 1;
2973 struct mbuf
*m_head
, *m_tail
;
2974 u_int32_t m_lim
, m_cnt
, m_totlen
;
2975 u_int16_t req
= ifp
->if_poll_req
;
2977 lck_mtx_unlock(&ifp
->if_poll_lock
);
2980 * If no longer attached, there's nothing to do;
2981 * else hold an IO refcnt to prevent the interface
2982 * from being detached (will be released below.)
2984 if (!ifnet_is_attached(ifp
, 1)) {
2985 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2989 m_lim
= (inp
->rxpoll_plim
!= 0) ? inp
->rxpoll_plim
:
2990 MAX((qlimit(&inp
->rcvq_pkts
)),
2991 (inp
->rxpoll_phiwat
<< 2));
2993 if (dlil_verbose
> 1) {
2994 printf("%s: polling up to %d pkts, "
2995 "pkts avg %d max %d, wreq avg %d, "
2997 if_name(ifp
), m_lim
,
2998 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
2999 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
3002 /* invoke the driver's input poll routine */
3003 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
3004 &m_cnt
, &m_totlen
));
3006 if (m_head
!= NULL
) {
3007 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
3009 if (dlil_verbose
> 1) {
3010 printf("%s: polled %d pkts, "
3011 "pkts avg %d max %d, wreq avg %d, "
3013 if_name(ifp
), m_cnt
,
3014 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
3015 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
3018 /* stats are required for extended variant */
3019 s
.packets_in
= m_cnt
;
3020 s
.bytes_in
= m_totlen
;
3022 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
3025 if (dlil_verbose
> 1) {
3026 printf("%s: no packets, "
3027 "pkts avg %d max %d, wreq avg %d, "
3029 if_name(ifp
), inp
->rxpoll_pavg
,
3030 inp
->rxpoll_pmax
, inp
->rxpoll_wavg
,
3034 (void) ifnet_input_common(ifp
, NULL
, NULL
,
3038 /* Release the io ref count */
3039 ifnet_decr_iorefcnt(ifp
);
3041 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3043 /* if there's no pending request, we're done */
3044 if (req
== ifp
->if_poll_req
)
3047 ifp
->if_poll_req
= 0;
3048 ifp
->if_poll_active
= 0;
3051 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3052 * until ifnet_poll() is called again.
3054 ts
= &ifp
->if_poll_cycle
;
3055 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
3063 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3066 bzero(&ifp
->if_poll_cycle
, sizeof (ifp
->if_poll_cycle
));
3068 *(&ifp
->if_poll_cycle
) = *ts
;
3070 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
3071 printf("%s: poll interval set to %lu nsec\n",
3072 if_name(ifp
), ts
->tv_nsec
);
3076 ifnet_purge(struct ifnet
*ifp
)
3078 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
))
3083 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
3085 IFCQ_LOCK_ASSERT_HELD(ifq
);
3087 if (!(IFCQ_IS_READY(ifq
)))
3090 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
3091 struct tb_profile tb
= { ifq
->ifcq_tbr
.tbr_rate_raw
,
3092 ifq
->ifcq_tbr
.tbr_percent
, 0 };
3093 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
3096 ifclassq_update(ifq
, ev
);
3100 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
3103 case CLASSQ_EV_LINK_BANDWIDTH
:
3104 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
))
3105 ifp
->if_poll_update
++;
3114 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
3116 struct ifclassq
*ifq
;
3120 if (ifp
== NULL
|| model
>= IFNET_SCHED_MODEL_MAX
)
3122 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3127 omodel
= ifp
->if_output_sched_model
;
3128 ifp
->if_output_sched_model
= model
;
3129 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0)
3130 ifp
->if_output_sched_model
= omodel
;
3137 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3141 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3144 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
3150 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3152 if (ifp
== NULL
|| maxqlen
== NULL
)
3154 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3157 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
3163 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
3167 if (ifp
== NULL
|| pkts
== NULL
)
3169 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3172 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
3179 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3180 u_int32_t
*pkts
, u_int32_t
*bytes
)
3184 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
3185 (pkts
== NULL
&& bytes
== NULL
))
3187 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3190 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
3196 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3198 struct dlil_threading_info
*inp
;
3202 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3206 maxqlen
= if_rcvq_maxlen
;
3207 else if (maxqlen
< IF_RCVQ_MINLEN
)
3208 maxqlen
= IF_RCVQ_MINLEN
;
3211 lck_mtx_lock(&inp
->input_lck
);
3212 qlimit(&inp
->rcvq_pkts
) = maxqlen
;
3213 lck_mtx_unlock(&inp
->input_lck
);
3219 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3221 struct dlil_threading_info
*inp
;
3223 if (ifp
== NULL
|| maxqlen
== NULL
)
3225 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3229 lck_mtx_lock(&inp
->input_lck
);
3230 *maxqlen
= qlimit(&inp
->rcvq_pkts
);
3231 lck_mtx_unlock(&inp
->input_lck
);
3236 ifnet_enqueue_multi_setup(struct ifnet
*ifp
, uint16_t delay_qlen
,
3237 uint16_t delay_timeout
)
3239 if (delay_qlen
> 0 && delay_timeout
> 0) {
3240 ifp
->if_eflags
|= IFEF_ENQUEUE_MULTI
;
3241 ifp
->if_start_delay_qlen
= min(100, delay_qlen
);
3242 ifp
->if_start_delay_timeout
= min(20000, delay_timeout
);
3243 /* convert timeout to nanoseconds */
3244 ifp
->if_start_delay_timeout
*= 1000;
3245 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3246 ifp
->if_xname
, (uint32_t)delay_qlen
,
3247 (uint32_t)delay_timeout
);
3249 ifp
->if_eflags
&= ~IFEF_ENQUEUE_MULTI
;
3253 static inline errno_t
3254 ifnet_enqueue_common(struct ifnet
*ifp
, void *p
, classq_pkt_type_t ptype
,
3255 boolean_t flush
, boolean_t
*pdrop
)
3257 volatile uint64_t *fg_ts
= NULL
;
3258 volatile uint64_t *rt_ts
= NULL
;
3260 struct timespec now
;
3261 u_int64_t now_nsec
= 0;
3264 ASSERT(ifp
->if_eflags
& IFEF_TXSTART
);
3267 * If packet already carries a timestamp, either from dlil_output()
3268 * or from flowswitch, use it here. Otherwise, record timestamp.
3269 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3270 * the timestamp value is used internally there.
3274 ASSERT(m
->m_flags
& M_PKTHDR
);
3275 ASSERT(m
->m_nextpkt
== NULL
);
3277 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_TS_VALID
) ||
3278 m
->m_pkthdr
.pkt_timestamp
== 0) {
3280 net_timernsec(&now
, &now_nsec
);
3281 m
->m_pkthdr
.pkt_timestamp
= now_nsec
;
3283 m
->m_pkthdr
.pkt_flags
&= ~PKTF_TS_VALID
;
3285 * If the packet service class is not background,
3286 * update the timestamp to indicate recent activity
3287 * on a foreground socket.
3289 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
3290 m
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
) {
3291 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_SO_BACKGROUND
)) {
3292 ifp
->if_fg_sendts
= _net_uptime
;
3294 *fg_ts
= _net_uptime
;
3296 if (m
->m_pkthdr
.pkt_flags
& PKTF_SO_REALTIME
) {
3297 ifp
->if_rt_sendts
= _net_uptime
;
3299 *rt_ts
= _net_uptime
;
3310 if (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) {
3311 if (now_nsec
== 0) {
3313 net_timernsec(&now
, &now_nsec
);
3316 * If the driver chose to delay start callback for
3317 * coalescing multiple packets, Then use the following
3318 * heuristics to make sure that start callback will
3319 * be delayed only when bulk data transfer is detected.
3320 * 1. number of packets enqueued in (delay_win * 2) is
3321 * greater than or equal to the delay qlen.
3322 * 2. If delay_start is enabled it will stay enabled for
3323 * another 10 idle windows. This is to take into account
3324 * variable RTT and burst traffic.
3325 * 3. If the time elapsed since last enqueue is more
3326 * than 200ms we disable delaying start callback. This is
3327 * is to take idle time into account.
3329 u_int64_t dwin
= (ifp
->if_start_delay_timeout
<< 1);
3330 if (ifp
->if_start_delay_swin
> 0) {
3331 if ((ifp
->if_start_delay_swin
+ dwin
) > now_nsec
) {
3332 ifp
->if_start_delay_cnt
++;
3333 } else if ((now_nsec
- ifp
->if_start_delay_swin
)
3334 >= (200 * 1000 * 1000)) {
3335 ifp
->if_start_delay_swin
= now_nsec
;
3336 ifp
->if_start_delay_cnt
= 1;
3337 ifp
->if_start_delay_idle
= 0;
3338 if (ifp
->if_eflags
& IFEF_DELAY_START
) {
3340 ~(IFEF_DELAY_START
);
3341 ifnet_delay_start_disabled
++;
3344 if (ifp
->if_start_delay_cnt
>=
3345 ifp
->if_start_delay_qlen
) {
3346 ifp
->if_eflags
|= IFEF_DELAY_START
;
3347 ifp
->if_start_delay_idle
= 0;
3349 if (ifp
->if_start_delay_idle
>= 10) {
3350 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3351 ifnet_delay_start_disabled
++;
3353 ifp
->if_start_delay_idle
++;
3356 ifp
->if_start_delay_swin
= now_nsec
;
3357 ifp
->if_start_delay_cnt
= 1;
3360 ifp
->if_start_delay_swin
= now_nsec
;
3361 ifp
->if_start_delay_cnt
= 1;
3362 ifp
->if_start_delay_idle
= 0;
3363 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3366 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3371 /* enqueue the packet (caller consumes object) */
3372 error
= ifclassq_enqueue(&ifp
->if_snd
, m
, QP_MBUF
, pdrop
);
3382 * Tell the driver to start dequeueing; do this even when the queue
3383 * for the packet is suspended (EQSUSPENDED), as the driver could still
3384 * be dequeueing from other unsuspended queues.
3386 if (!(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3387 ((error
== 0 && flush
) || error
== EQFULL
|| error
== EQSUSPENDED
))
3394 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
3397 return (ifnet_enqueue_mbuf(ifp
, m
, TRUE
, &pdrop
));
3401 ifnet_enqueue_mbuf(struct ifnet
*ifp
, struct mbuf
*m
, boolean_t flush
,
3404 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
3405 m
->m_nextpkt
!= NULL
) {
3411 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3412 !IF_FULLY_ATTACHED(ifp
)) {
3413 /* flag tested without lock for performance */
3417 } else if (!(ifp
->if_flags
& IFF_UP
)) {
3423 return (ifnet_enqueue_common(ifp
, m
, QP_MBUF
, flush
, pdrop
));
3428 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
3431 classq_pkt_type_t ptype
;
3432 if (ifp
== NULL
|| mp
== NULL
)
3434 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3435 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3437 if (!ifnet_is_attached(ifp
, 1))
3440 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
3441 (void **)mp
, NULL
, NULL
, NULL
, &ptype
);
3442 VERIFY((*mp
== NULL
) || (ptype
== QP_MBUF
));
3443 ifnet_decr_iorefcnt(ifp
);
3449 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3453 classq_pkt_type_t ptype
;
3454 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
))
3456 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3457 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3459 if (!ifnet_is_attached(ifp
, 1))
3462 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1,
3463 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)mp
, NULL
, NULL
,
3465 VERIFY((*mp
== NULL
) || (ptype
== QP_MBUF
));
3466 ifnet_decr_iorefcnt(ifp
);
3471 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t pkt_limit
,
3472 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3475 classq_pkt_type_t ptype
;
3476 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1)
3478 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3479 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3481 if (!ifnet_is_attached(ifp
, 1))
3484 rc
= ifclassq_dequeue(&ifp
->if_snd
, pkt_limit
,
3485 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)head
, (void **)tail
, cnt
,
3487 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3488 ifnet_decr_iorefcnt(ifp
);
3493 ifnet_dequeue_multi_bytes(struct ifnet
*ifp
, u_int32_t byte_limit
,
3494 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3497 classq_pkt_type_t ptype
;
3498 if (ifp
== NULL
|| head
== NULL
|| byte_limit
< 1)
3500 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3501 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3503 if (!ifnet_is_attached(ifp
, 1))
3506 rc
= ifclassq_dequeue(&ifp
->if_snd
, CLASSQ_DEQUEUE_MAX_PKT_LIMIT
,
3507 byte_limit
, (void **)head
, (void **)tail
, cnt
, len
, &ptype
);
3508 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3509 ifnet_decr_iorefcnt(ifp
);
3514 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3515 u_int32_t pkt_limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
3519 classq_pkt_type_t ptype
;
3520 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1 ||
3523 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3524 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3526 if (!ifnet_is_attached(ifp
, 1))
3529 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, pkt_limit
,
3530 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)head
,
3531 (void **)tail
, cnt
, len
, &ptype
);
3532 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3533 ifnet_decr_iorefcnt(ifp
);
3537 #if !CONFIG_EMBEDDED
3539 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
3540 const struct sockaddr
*dest
, const char *dest_linkaddr
,
3541 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
3548 return (ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
));
3550 #endif /* !CONFIG_EMBEDDED */
3553 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
3554 char **frame_header_p
, protocol_family_t protocol_family
)
3556 struct ifnet_filter
*filter
;
3559 * Pass the inbound packet to the interface filters
3561 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3562 /* prevent filter list from changing in case we drop the lock */
3563 if_flt_monitor_busy(ifp
);
3564 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3567 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
3568 (filter
->filt_protocol
== 0 ||
3569 filter
->filt_protocol
== protocol_family
)) {
3570 lck_mtx_unlock(&ifp
->if_flt_lock
);
3572 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
3573 ifp
, protocol_family
, m_p
, frame_header_p
);
3575 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3577 /* we're done with the filter list */
3578 if_flt_monitor_unbusy(ifp
);
3579 lck_mtx_unlock(&ifp
->if_flt_lock
);
3584 /* we're done with the filter list */
3585 if_flt_monitor_unbusy(ifp
);
3586 lck_mtx_unlock(&ifp
->if_flt_lock
);
3589 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3590 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3593 (*m_p
)->m_flags
&= ~M_PROTO1
;
3599 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
3600 protocol_family_t protocol_family
)
3602 struct ifnet_filter
*filter
;
3605 * Pass the outbound packet to the interface filters
3607 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3608 /* prevent filter list from changing in case we drop the lock */
3609 if_flt_monitor_busy(ifp
);
3610 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3613 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
3614 (filter
->filt_protocol
== 0 ||
3615 filter
->filt_protocol
== protocol_family
)) {
3616 lck_mtx_unlock(&ifp
->if_flt_lock
);
3618 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
3619 protocol_family
, m_p
);
3621 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3623 /* we're done with the filter list */
3624 if_flt_monitor_unbusy(ifp
);
3625 lck_mtx_unlock(&ifp
->if_flt_lock
);
3630 /* we're done with the filter list */
3631 if_flt_monitor_unbusy(ifp
);
3632 lck_mtx_unlock(&ifp
->if_flt_lock
);
3638 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
3642 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
3643 /* Version 1 protocols get one packet at a time */
3645 char * frame_header
;
3648 next_packet
= m
->m_nextpkt
;
3649 m
->m_nextpkt
= NULL
;
3650 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3651 m
->m_pkthdr
.pkt_hdr
= NULL
;
3652 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
3653 ifproto
->protocol_family
, m
, frame_header
);
3654 if (error
!= 0 && error
!= EJUSTRETURN
)
3658 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
3659 /* Version 2 protocols support packet lists */
3660 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
3661 ifproto
->protocol_family
, m
);
3662 if (error
!= 0 && error
!= EJUSTRETURN
)
3668 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
3669 struct dlil_threading_info
*inp
, boolean_t poll
)
3671 struct ifnet_stat_increment_param
*d
= &inp
->stats
;
3673 if (s
->packets_in
!= 0)
3674 d
->packets_in
+= s
->packets_in
;
3675 if (s
->bytes_in
!= 0)
3676 d
->bytes_in
+= s
->bytes_in
;
3677 if (s
->errors_in
!= 0)
3678 d
->errors_in
+= s
->errors_in
;
3680 if (s
->packets_out
!= 0)
3681 d
->packets_out
+= s
->packets_out
;
3682 if (s
->bytes_out
!= 0)
3683 d
->bytes_out
+= s
->bytes_out
;
3684 if (s
->errors_out
!= 0)
3685 d
->errors_out
+= s
->errors_out
;
3687 if (s
->collisions
!= 0)
3688 d
->collisions
+= s
->collisions
;
3689 if (s
->dropped
!= 0)
3690 d
->dropped
+= s
->dropped
;
3693 PKTCNTR_ADD(&inp
->tstats
, s
->packets_in
, s
->bytes_in
);
3697 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
3699 struct ifnet_stat_increment_param
*s
= &inp
->stats
;
3702 * Use of atomic operations is unavoidable here because
3703 * these stats may also be incremented elsewhere via KPIs.
3705 if (s
->packets_in
!= 0) {
3706 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
3709 if (s
->bytes_in
!= 0) {
3710 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
3713 if (s
->errors_in
!= 0) {
3714 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
3718 if (s
->packets_out
!= 0) {
3719 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
3722 if (s
->bytes_out
!= 0) {
3723 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
3726 if (s
->errors_out
!= 0) {
3727 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
3731 if (s
->collisions
!= 0) {
3732 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
3735 if (s
->dropped
!= 0) {
3736 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
3740 if (ifp
->if_data_threshold
!= 0) {
3741 lck_mtx_convert_spin(&inp
->input_lck
);
3742 ifnet_notify_data_threshold(ifp
);
3746 * No need for atomic operations as they are modified here
3747 * only from within the DLIL input thread context.
3749 if (inp
->tstats
.packets
!= 0) {
3750 inp
->pstats
.ifi_poll_packets
+= inp
->tstats
.packets
;
3751 inp
->tstats
.packets
= 0;
3753 if (inp
->tstats
.bytes
!= 0) {
3754 inp
->pstats
.ifi_poll_bytes
+= inp
->tstats
.bytes
;
3755 inp
->tstats
.bytes
= 0;
3759 __private_extern__
void
3760 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
3762 return (dlil_input_packet_list_common(ifp
, m
, 0,
3763 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
));
3766 __private_extern__
void
3767 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
3768 u_int32_t cnt
, ifnet_model_t mode
)
3770 return (dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
));
3774 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
3775 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
3778 protocol_family_t protocol_family
;
3780 ifnet_t ifp
= ifp_param
;
3781 char * frame_header
;
3782 struct if_proto
* last_ifproto
= NULL
;
3783 mbuf_t pkt_first
= NULL
;
3784 mbuf_t
* pkt_next
= NULL
;
3785 u_int32_t poll_thresh
= 0, poll_ival
= 0;
3787 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
3789 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
3790 (poll_ival
= if_rxpoll_interval_pkts
) > 0)
3794 struct if_proto
*ifproto
= NULL
;
3796 uint32_t pktf_mask
; /* pkt flags to preserve */
3798 if (ifp_param
== NULL
)
3799 ifp
= m
->m_pkthdr
.rcvif
;
3801 if ((ifp
->if_eflags
& IFEF_RXPOLL
) && poll_thresh
!= 0 &&
3802 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0)
3805 /* Check if this mbuf looks valid */
3806 MBUF_INPUT_CHECK(m
, ifp
);
3808 next_packet
= m
->m_nextpkt
;
3809 m
->m_nextpkt
= NULL
;
3810 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3811 m
->m_pkthdr
.pkt_hdr
= NULL
;
3814 * Get an IO reference count if the interface is not
3815 * loopback (lo0) and it is attached; lo0 never goes
3816 * away, so optimize for that.
3818 if (ifp
!= lo_ifp
) {
3819 if (!ifnet_is_attached(ifp
, 1)) {
3825 * Preserve the time stamp if it was set.
3827 pktf_mask
= PKTF_TS_VALID
;
3830 * If this arrived on lo0, preserve interface addr
3831 * info to allow for connectivity between loopback
3832 * and local interface addresses.
3834 pktf_mask
= (PKTF_LOOP
|PKTF_IFAINFO
);
3837 /* make sure packet comes in clean */
3838 m_classifier_init(m
, pktf_mask
);
3840 ifp_inc_traffic_class_in(ifp
, m
);
3842 /* find which protocol family this packet is for */
3843 ifnet_lock_shared(ifp
);
3844 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
3846 ifnet_lock_done(ifp
);
3848 if (error
== EJUSTRETURN
)
3850 protocol_family
= 0;
3853 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
3854 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
))
3855 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
3859 * For partial checksum offload, we expect the driver to
3860 * set the start offset indicating the start of the span
3861 * that is covered by the hardware-computed checksum;
3862 * adjust this start offset accordingly because the data
3863 * pointer has been advanced beyond the link-layer header.
3865 * Don't adjust if the interface is a bridge member, as
3866 * the adjustment will occur from the context of the
3867 * bridge interface during input.
3869 if (ifp
->if_bridge
== NULL
&& (m
->m_pkthdr
.csum_flags
&
3870 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
3871 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
3874 if (frame_header
== NULL
||
3875 frame_header
< (char *)mbuf_datastart(m
) ||
3876 frame_header
> (char *)m
->m_data
||
3877 (adj
= (m
->m_data
- frame_header
)) >
3878 m
->m_pkthdr
.csum_rx_start
) {
3879 m
->m_pkthdr
.csum_data
= 0;
3880 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
3881 hwcksum_in_invalidated
++;
3883 m
->m_pkthdr
.csum_rx_start
-= adj
;
3887 pktap_input(ifp
, protocol_family
, m
, frame_header
);
3889 if (m
->m_flags
& (M_BCAST
|M_MCAST
))
3890 atomic_add_64(&ifp
->if_imcasts
, 1);
3892 /* run interface filters, exclude VLAN packets PR-3586856 */
3893 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
3894 error
= dlil_interface_filters_input(ifp
, &m
,
3895 &frame_header
, protocol_family
);
3897 if (error
!= EJUSTRETURN
)
3902 if (error
!= 0 || ((m
->m_flags
& M_PROMISC
) != 0)) {
3907 /* Lookup the protocol attachment to this interface */
3908 if (protocol_family
== 0) {
3910 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
3911 (last_ifproto
->protocol_family
== protocol_family
)) {
3912 VERIFY(ifproto
== NULL
);
3913 ifproto
= last_ifproto
;
3914 if_proto_ref(last_ifproto
);
3916 VERIFY(ifproto
== NULL
);
3917 ifnet_lock_shared(ifp
);
3918 /* callee holds a proto refcnt upon success */
3919 ifproto
= find_attached_proto(ifp
, protocol_family
);
3920 ifnet_lock_done(ifp
);
3922 if (ifproto
== NULL
) {
3923 /* no protocol for this packet, discard */
3927 if (ifproto
!= last_ifproto
) {
3928 if (last_ifproto
!= NULL
) {
3929 /* pass up the list for the previous protocol */
3930 dlil_ifproto_input(last_ifproto
, pkt_first
);
3932 if_proto_free(last_ifproto
);
3934 last_ifproto
= ifproto
;
3935 if_proto_ref(ifproto
);
3937 /* extend the list */
3938 m
->m_pkthdr
.pkt_hdr
= frame_header
;
3939 if (pkt_first
== NULL
) {
3944 pkt_next
= &m
->m_nextpkt
;
3947 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
3948 /* pass up the last list of packets */
3949 dlil_ifproto_input(last_ifproto
, pkt_first
);
3950 if_proto_free(last_ifproto
);
3951 last_ifproto
= NULL
;
3953 if (ifproto
!= NULL
) {
3954 if_proto_free(ifproto
);
3960 /* update the driver's multicast filter, if needed */
3961 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
3962 ifp
->if_updatemcasts
= 0;
3964 ifnet_decr_iorefcnt(ifp
);
3967 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
3971 if_mcasts_update(struct ifnet
*ifp
)
3975 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
3976 if (err
== EAFNOSUPPORT
)
3978 printf("%s: %s %d suspended link-layer multicast membership(s) "
3979 "(err=%d)\n", if_name(ifp
),
3980 (err
== 0 ? "successfully restored" : "failed to restore"),
3981 ifp
->if_updatemcasts
, err
);
3983 /* just return success */
3987 /* If ifp is set, we will increment the generation for the interface */
3989 dlil_post_complete_msg(struct ifnet
*ifp
, struct kev_msg
*event
)
3992 ifnet_increment_generation(ifp
);
3996 necp_update_all_clients();
3999 return (kev_post_msg(event
));
4002 #define TMP_IF_PROTO_ARR_SIZE 10
4004 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
, bool update_generation
)
4006 struct ifnet_filter
*filter
= NULL
;
4007 struct if_proto
*proto
= NULL
;
4008 int if_proto_count
= 0;
4009 struct if_proto
**tmp_ifproto_arr
= NULL
;
4010 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
4011 int tmp_ifproto_arr_idx
= 0;
4012 bool tmp_malloc
= false;
4015 * Pass the event to the interface filters
4017 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4018 /* prevent filter list from changing in case we drop the lock */
4019 if_flt_monitor_busy(ifp
);
4020 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4021 if (filter
->filt_event
!= NULL
) {
4022 lck_mtx_unlock(&ifp
->if_flt_lock
);
4024 filter
->filt_event(filter
->filt_cookie
, ifp
,
4025 filter
->filt_protocol
, event
);
4027 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4030 /* we're done with the filter list */
4031 if_flt_monitor_unbusy(ifp
);
4032 lck_mtx_unlock(&ifp
->if_flt_lock
);
4034 /* Get an io ref count if the interface is attached */
4035 if (!ifnet_is_attached(ifp
, 1))
4039 * An embedded tmp_list_entry in if_proto may still get
4040 * over-written by another thread after giving up ifnet lock,
4041 * therefore we are avoiding embedded pointers here.
4043 ifnet_lock_shared(ifp
);
4044 if_proto_count
= dlil_ifp_proto_count(ifp
);
4045 if (if_proto_count
) {
4047 VERIFY(ifp
->if_proto_hash
!= NULL
);
4048 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
4049 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
4051 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
4052 sizeof (*tmp_ifproto_arr
) * if_proto_count
,
4054 if (tmp_ifproto_arr
== NULL
) {
4055 ifnet_lock_done(ifp
);
4061 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
4062 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
4064 if_proto_ref(proto
);
4065 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
4066 tmp_ifproto_arr_idx
++;
4069 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
4071 ifnet_lock_done(ifp
);
4073 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
4074 tmp_ifproto_arr_idx
++) {
4075 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
4076 VERIFY(proto
!= NULL
);
4077 proto_media_event eventp
=
4078 (proto
->proto_kpi
== kProtoKPI_v1
?
4079 proto
->kpi
.v1
.event
:
4080 proto
->kpi
.v2
.event
);
4082 if (eventp
!= NULL
) {
4083 eventp(ifp
, proto
->protocol_family
,
4086 if_proto_free(proto
);
4091 FREE(tmp_ifproto_arr
, M_TEMP
);
4094 /* Pass the event to the interface */
4095 if (ifp
->if_event
!= NULL
)
4096 ifp
->if_event(ifp
, event
);
4098 /* Release the io ref count */
4099 ifnet_decr_iorefcnt(ifp
);
4101 return (dlil_post_complete_msg(update_generation
? ifp
: NULL
, event
));
4105 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
4107 struct kev_msg kev_msg
;
4110 if (ifp
== NULL
|| event
== NULL
)
4113 bzero(&kev_msg
, sizeof (kev_msg
));
4114 kev_msg
.vendor_code
= event
->vendor_code
;
4115 kev_msg
.kev_class
= event
->kev_class
;
4116 kev_msg
.kev_subclass
= event
->kev_subclass
;
4117 kev_msg
.event_code
= event
->event_code
;
4118 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
4119 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
4120 kev_msg
.dv
[1].data_length
= 0;
4122 result
= dlil_event_internal(ifp
, &kev_msg
, TRUE
);
4128 #include <netinet/ip6.h>
4129 #include <netinet/ip.h>
4131 dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
4135 struct ip6_hdr
*ip6
;
4136 int type
= SOCK_RAW
;
4141 m
= m_pullup(*mp
, sizeof(struct ip
));
4145 ip
= mtod(m
, struct ip
*);
4146 if (ip
->ip_p
== IPPROTO_TCP
)
4148 else if (ip
->ip_p
== IPPROTO_UDP
)
4152 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
4156 ip6
= mtod(m
, struct ip6_hdr
*);
4157 if (ip6
->ip6_nxt
== IPPROTO_TCP
)
4159 else if (ip6
->ip6_nxt
== IPPROTO_UDP
)
4170 dlil_count_chain_len(mbuf_t m
, struct chain_len_stats
*cls
)
4183 atomic_add_64(&cls
->cls_one
, 1);
4186 atomic_add_64(&cls
->cls_two
, 1);
4189 atomic_add_64(&cls
->cls_three
, 1);
4192 atomic_add_64(&cls
->cls_four
, 1);
4196 atomic_add_64(&cls
->cls_five_or_more
, 1);
4204 * Caller should have a lock on the protocol domain if the protocol
4205 * doesn't support finer grained locking. In most cases, the lock
4206 * will be held from the socket layer and won't be released until
4207 * we return back to the socket layer.
4209 * This does mean that we must take a protocol lock before we take
4210 * an interface lock if we're going to take both. This makes sense
4211 * because a protocol is likely to interact with an ifp while it
4212 * is under the protocol lock.
4214 * An advisory code will be returned if adv is not null. This
4215 * can be used to provide feedback about interface queues to the
4219 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
4220 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
4222 char *frame_type
= NULL
;
4223 char *dst_linkaddr
= NULL
;
4225 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
4226 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
4227 struct if_proto
*proto
= NULL
;
4229 mbuf_t send_head
= NULL
;
4230 mbuf_t
*send_tail
= &send_head
;
4232 u_int32_t pre
= 0, post
= 0;
4233 u_int32_t fpkts
= 0, fbytes
= 0;
4235 struct timespec now
;
4238 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4241 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4242 * from happening while this operation is in progress
4244 if (!ifnet_is_attached(ifp
, 1)) {
4250 VERIFY(ifp
->if_output_dlil
!= NULL
);
4252 /* update the driver's multicast filter, if needed */
4253 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
4254 ifp
->if_updatemcasts
= 0;
4256 frame_type
= frame_type_buffer
;
4257 dst_linkaddr
= dst_linkaddr_buffer
;
4260 ifnet_lock_shared(ifp
);
4261 /* callee holds a proto refcnt upon success */
4262 proto
= find_attached_proto(ifp
, proto_family
);
4263 if (proto
== NULL
) {
4264 ifnet_lock_done(ifp
);
4268 ifnet_lock_done(ifp
);
4272 if (packetlist
== NULL
)
4276 packetlist
= packetlist
->m_nextpkt
;
4277 m
->m_nextpkt
= NULL
;
4280 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4281 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
4283 if (preoutp
!= NULL
) {
4284 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
4285 frame_type
, dst_linkaddr
);
4288 if (retval
== EJUSTRETURN
)
4297 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
4298 dlil_get_socket_type(&m
, proto_family
, raw
));
4307 if (!raw
&& proto_family
== PF_INET
) {
4308 struct ip
*ip
= mtod(m
, struct ip
*);
4309 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4310 struct ip
*, ip
, struct ifnet
*, ifp
,
4311 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
4313 } else if (!raw
&& proto_family
== PF_INET6
) {
4314 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
4315 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4316 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
4317 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
4319 #endif /* CONFIG_DTRACE */
4321 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
4325 * If this is a broadcast packet that needs to be
4326 * looped back into the system, set the inbound ifp
4327 * to that of the outbound ifp. This will allow
4328 * us to determine that it is a legitimate packet
4329 * for the system. Only set the ifp if it's not
4330 * already set, just to be safe.
4332 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
4333 m
->m_pkthdr
.rcvif
== NULL
) {
4334 m
->m_pkthdr
.rcvif
= ifp
;
4338 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
4339 frame_type
, &pre
, &post
);
4341 if (retval
!= EJUSTRETURN
)
4347 * For partial checksum offload, adjust the start
4348 * and stuff offsets based on the prepended header.
4350 if ((m
->m_pkthdr
.csum_flags
&
4351 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4352 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4353 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
4354 m
->m_pkthdr
.csum_tx_start
+= pre
;
4357 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
))
4358 dlil_output_cksum_dbg(ifp
, m
, pre
,
4362 * Clear the ifp if it was set above, and to be
4363 * safe, only if it is still the same as the
4364 * outbound ifp we have in context. If it was
4365 * looped back, then a copy of it was sent to the
4366 * loopback interface with the rcvif set, and we
4367 * are clearing the one that will go down to the
4370 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
)
4371 m
->m_pkthdr
.rcvif
= NULL
;
4375 * Let interface filters (if any) do their thing ...
4377 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4378 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
4379 retval
= dlil_interface_filters_output(ifp
,
4382 if (retval
!= EJUSTRETURN
)
4388 * Strip away M_PROTO1 bit prior to sending packet
4389 * to the driver as this field may be used by the driver
4391 m
->m_flags
&= ~M_PROTO1
;
4394 * If the underlying interface is not capable of handling a
4395 * packet whose data portion spans across physically disjoint
4396 * pages, we need to "normalize" the packet so that we pass
4397 * down a chain of mbufs where each mbuf points to a span that
4398 * resides in the system page boundary. If the packet does
4399 * not cross page(s), the following is a no-op.
4401 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
4402 if ((m
= m_normalize(m
)) == NULL
)
4407 * If this is a TSO packet, make sure the interface still
4408 * advertise TSO capability.
4410 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
4416 ifp_inc_traffic_class_out(ifp
, m
);
4417 pktap_output(ifp
, proto_family
, m
, pre
, post
);
4420 * Count the number of elements in the mbuf chain
4422 if (tx_chain_len_count
) {
4423 dlil_count_chain_len(m
, &tx_chain_len_stats
);
4427 * Record timestamp; ifnet_enqueue() will use this info
4428 * rather than redoing the work. An optimization could
4429 * involve doing this just once at the top, if there are
4430 * no interface filters attached, but that's probably
4434 net_timernsec(&now
, &now_nsec
);
4435 (void) mbuf_set_timestamp(m
, now_nsec
, TRUE
);
4438 * Discard partial sum information if this packet originated
4439 * from another interface; the packet would already have the
4440 * final checksum and we shouldn't recompute it.
4442 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) &&
4443 (m
->m_pkthdr
.csum_flags
& (CSUM_DATA_VALID
|CSUM_PARTIAL
)) ==
4444 (CSUM_DATA_VALID
|CSUM_PARTIAL
)) {
4445 m
->m_pkthdr
.csum_flags
&= ~CSUM_TX_FLAGS
;
4446 m
->m_pkthdr
.csum_data
= 0;
4450 * Finally, call the driver.
4452 if (ifp
->if_eflags
& (IFEF_SENDLIST
| IFEF_ENQUEUE_MULTI
)) {
4453 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4454 flen
+= (m_pktlen(m
) - (pre
+ post
));
4455 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4458 send_tail
= &m
->m_nextpkt
;
4460 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4461 flen
= (m_pktlen(m
) - (pre
+ post
));
4462 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4466 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4468 retval
= (*ifp
->if_output_dlil
)(ifp
, m
);
4469 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4470 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
4471 adv
->code
= (retval
== EQFULL
?
4472 FADV_FLOW_CONTROLLED
:
4477 if (retval
== 0 && flen
> 0) {
4481 if (retval
!= 0 && dlil_verbose
) {
4482 printf("%s: output error on %s retval = %d\n",
4483 __func__
, if_name(ifp
),
4486 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
4489 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4494 packetlist
= packetlist
->m_nextpkt
;
4495 m
->m_nextpkt
= NULL
;
4497 } while (m
!= NULL
);
4499 if (send_head
!= NULL
) {
4500 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4502 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
4503 retval
= (*ifp
->if_output_dlil
)(ifp
, send_head
);
4504 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4506 adv
->code
= (retval
== EQFULL
?
4507 FADV_FLOW_CONTROLLED
:
4512 if (retval
== 0 && flen
> 0) {
4516 if (retval
!= 0 && dlil_verbose
) {
4517 printf("%s: output error on %s retval = %d\n",
4518 __func__
, if_name(ifp
), retval
);
4521 struct mbuf
*send_m
;
4523 VERIFY(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
);
4524 while (send_head
!= NULL
) {
4526 send_head
= send_m
->m_nextpkt
;
4527 send_m
->m_nextpkt
= NULL
;
4528 retval
= (*ifp
->if_output_dlil
)(ifp
, send_m
);
4529 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4531 adv
->code
= (retval
== EQFULL
?
4532 FADV_FLOW_CONTROLLED
:
4542 if (retval
!= 0 && dlil_verbose
) {
4543 printf("%s: output error on %s "
4545 __func__
, if_name(ifp
), retval
);
4553 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4556 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4560 ifp
->if_fbytes
+= fbytes
;
4562 ifp
->if_fpackets
+= fpkts
;
4564 if_proto_free(proto
);
4565 if (packetlist
) /* if any packets are left, clean up */
4566 mbuf_freem_list(packetlist
);
4567 if (retval
== EJUSTRETURN
)
4570 ifnet_decr_iorefcnt(ifp
);
4576 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
4579 struct ifnet_filter
*filter
;
4580 int retval
= EOPNOTSUPP
;
4583 if (ifp
== NULL
|| ioctl_code
== 0)
4586 /* Get an io ref count if the interface is attached */
4587 if (!ifnet_is_attached(ifp
, 1))
4588 return (EOPNOTSUPP
);
4591 * Run the interface filters first.
4592 * We want to run all filters before calling the protocol,
4593 * interface family, or interface.
4595 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4596 /* prevent filter list from changing in case we drop the lock */
4597 if_flt_monitor_busy(ifp
);
4598 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4599 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
4600 filter
->filt_protocol
== proto_fam
)) {
4601 lck_mtx_unlock(&ifp
->if_flt_lock
);
4603 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
4604 proto_fam
, ioctl_code
, ioctl_arg
);
4606 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4608 /* Only update retval if no one has handled the ioctl */
4609 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4610 if (result
== ENOTSUP
)
4611 result
= EOPNOTSUPP
;
4613 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
4614 /* we're done with the filter list */
4615 if_flt_monitor_unbusy(ifp
);
4616 lck_mtx_unlock(&ifp
->if_flt_lock
);
4622 /* we're done with the filter list */
4623 if_flt_monitor_unbusy(ifp
);
4624 lck_mtx_unlock(&ifp
->if_flt_lock
);
4626 /* Allow the protocol to handle the ioctl */
4627 if (proto_fam
!= 0) {
4628 struct if_proto
*proto
;
4630 /* callee holds a proto refcnt upon success */
4631 ifnet_lock_shared(ifp
);
4632 proto
= find_attached_proto(ifp
, proto_fam
);
4633 ifnet_lock_done(ifp
);
4634 if (proto
!= NULL
) {
4635 proto_media_ioctl ioctlp
=
4636 (proto
->proto_kpi
== kProtoKPI_v1
?
4637 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
4638 result
= EOPNOTSUPP
;
4640 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
4642 if_proto_free(proto
);
4644 /* Only update retval if no one has handled the ioctl */
4645 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4646 if (result
== ENOTSUP
)
4647 result
= EOPNOTSUPP
;
4649 if (retval
&& retval
!= EOPNOTSUPP
)
4655 /* retval is either 0 or EOPNOTSUPP */
4658 * Let the interface handle this ioctl.
4659 * If it returns EOPNOTSUPP, ignore that, we may have
4660 * already handled this in the protocol or family.
4663 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
4665 /* Only update retval if no one has handled the ioctl */
4666 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4667 if (result
== ENOTSUP
)
4668 result
= EOPNOTSUPP
;
4670 if (retval
&& retval
!= EOPNOTSUPP
) {
4676 if (retval
== EJUSTRETURN
)
4679 ifnet_decr_iorefcnt(ifp
);
4684 __private_extern__ errno_t
4685 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
4690 if (ifp
->if_set_bpf_tap
) {
4691 /* Get an io reference on the interface if it is attached */
4692 if (!ifnet_is_attached(ifp
, 1))
4694 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
4695 ifnet_decr_iorefcnt(ifp
);
4701 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
4702 struct sockaddr
*ll_addr
, size_t ll_len
)
4704 errno_t result
= EOPNOTSUPP
;
4705 struct if_proto
*proto
;
4706 const struct sockaddr
*verify
;
4707 proto_media_resolve_multi resolvep
;
4709 if (!ifnet_is_attached(ifp
, 1))
4712 bzero(ll_addr
, ll_len
);
4714 /* Call the protocol first; callee holds a proto refcnt upon success */
4715 ifnet_lock_shared(ifp
);
4716 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
4717 ifnet_lock_done(ifp
);
4718 if (proto
!= NULL
) {
4719 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
4720 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
4721 if (resolvep
!= NULL
)
4722 result
= resolvep(ifp
, proto_addr
,
4723 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
4724 if_proto_free(proto
);
4727 /* Let the interface verify the multicast address */
4728 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
4732 verify
= proto_addr
;
4733 result
= ifp
->if_check_multi(ifp
, verify
);
4736 ifnet_decr_iorefcnt(ifp
);
4740 __private_extern__ errno_t
4741 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
4742 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
4743 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
4745 struct if_proto
*proto
;
4748 /* callee holds a proto refcnt upon success */
4749 ifnet_lock_shared(ifp
);
4750 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
4751 ifnet_lock_done(ifp
);
4752 if (proto
== NULL
) {
4755 proto_media_send_arp arpp
;
4756 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4757 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
4763 arpstat
.txrequests
++;
4764 if (target_hw
!= NULL
)
4765 arpstat
.txurequests
++;
4768 arpstat
.txreplies
++;
4771 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
4772 target_hw
, target_proto
);
4774 if_proto_free(proto
);
4780 struct net_thread_marks
{ };
4781 static const struct net_thread_marks net_thread_marks_base
= { };
4783 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
4784 &net_thread_marks_base
;
4786 __private_extern__ net_thread_marks_t
4787 net_thread_marks_push(u_int32_t push
)
4789 static const char *const base
= (const void*)&net_thread_marks_base
;
4793 struct uthread
*uth
= get_bsdthread_info(current_thread());
4795 pop
= push
& ~uth
->uu_network_marks
;
4797 uth
->uu_network_marks
|= pop
;
4800 return ((net_thread_marks_t
)&base
[pop
]);
4803 __private_extern__ net_thread_marks_t
4804 net_thread_unmarks_push(u_int32_t unpush
)
4806 static const char *const base
= (const void*)&net_thread_marks_base
;
4807 u_int32_t unpop
= 0;
4810 struct uthread
*uth
= get_bsdthread_info(current_thread());
4812 unpop
= unpush
& uth
->uu_network_marks
;
4814 uth
->uu_network_marks
&= ~unpop
;
4817 return ((net_thread_marks_t
)&base
[unpop
]);
4820 __private_extern__
void
4821 net_thread_marks_pop(net_thread_marks_t popx
)
4823 static const char *const base
= (const void*)&net_thread_marks_base
;
4824 const ptrdiff_t pop
= (const char *)popx
- (const char *)base
;
4827 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4828 struct uthread
*uth
= get_bsdthread_info(current_thread());
4830 VERIFY((pop
& ones
) == pop
);
4831 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
4832 uth
->uu_network_marks
&= ~pop
;
4836 __private_extern__
void
4837 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
4839 static const char *const base
= (const void*)&net_thread_marks_base
;
4840 ptrdiff_t unpop
= (const char *)unpopx
- (const char *)base
;
4843 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4844 struct uthread
*uth
= get_bsdthread_info(current_thread());
4846 VERIFY((unpop
& ones
) == unpop
);
4847 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
4848 uth
->uu_network_marks
|= unpop
;
4852 __private_extern__ u_int32_t
4853 net_thread_is_marked(u_int32_t check
)
4856 struct uthread
*uth
= get_bsdthread_info(current_thread());
4857 return (uth
->uu_network_marks
& check
);
4863 __private_extern__ u_int32_t
4864 net_thread_is_unmarked(u_int32_t check
)
4867 struct uthread
*uth
= get_bsdthread_info(current_thread());
4868 return (~uth
->uu_network_marks
& check
);
4874 static __inline__
int
4875 _is_announcement(const struct sockaddr_in
* sender_sin
,
4876 const struct sockaddr_in
* target_sin
)
4878 if (sender_sin
== NULL
) {
4881 return (sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
);
4884 __private_extern__ errno_t
4885 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
*sender_hw
,
4886 const struct sockaddr
*sender_proto
, const struct sockaddr_dl
*target_hw
,
4887 const struct sockaddr
*target_proto0
, u_int32_t rtflags
)
4890 const struct sockaddr_in
* sender_sin
;
4891 const struct sockaddr_in
* target_sin
;
4892 struct sockaddr_inarp target_proto_sinarp
;
4893 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
4895 if (target_proto
== NULL
|| (sender_proto
!= NULL
&&
4896 sender_proto
->sa_family
!= target_proto
->sa_family
))
4900 * If the target is a (default) router, provide that
4901 * information to the send_arp callback routine.
4903 if (rtflags
& RTF_ROUTER
) {
4904 bcopy(target_proto
, &target_proto_sinarp
,
4905 sizeof (struct sockaddr_in
));
4906 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
4907 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
4911 * If this is an ARP request and the target IP is IPv4LL,
4912 * send the request on all interfaces. The exception is
4913 * an announcement, which must only appear on the specific
4916 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
4917 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
4918 if (target_proto
->sa_family
== AF_INET
&&
4919 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
4920 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
4921 !_is_announcement(target_sin
, sender_sin
)) {
4928 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
4929 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
4931 ifaddr_t source_hw
= NULL
;
4932 ifaddr_t source_ip
= NULL
;
4933 struct sockaddr_in source_ip_copy
;
4934 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
4937 * Only arp on interfaces marked for IPv4LL
4938 * ARPing. This may mean that we don't ARP on
4939 * the interface the subnet route points to.
4941 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
))
4944 /* Find the source IP address */
4945 ifnet_lock_shared(cur_ifp
);
4946 source_hw
= cur_ifp
->if_lladdr
;
4947 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
4949 IFA_LOCK(source_ip
);
4950 if (source_ip
->ifa_addr
!= NULL
&&
4951 source_ip
->ifa_addr
->sa_family
==
4953 /* Copy the source IP address */
4955 *(struct sockaddr_in
*)
4956 (void *)source_ip
->ifa_addr
;
4957 IFA_UNLOCK(source_ip
);
4960 IFA_UNLOCK(source_ip
);
4963 /* No IP Source, don't arp */
4964 if (source_ip
== NULL
) {
4965 ifnet_lock_done(cur_ifp
);
4969 IFA_ADDREF(source_hw
);
4970 ifnet_lock_done(cur_ifp
);
4973 new_result
= dlil_send_arp_internal(cur_ifp
,
4974 arpop
, (struct sockaddr_dl
*)(void *)
4975 source_hw
->ifa_addr
,
4976 (struct sockaddr
*)&source_ip_copy
, NULL
,
4979 IFA_REMREF(source_hw
);
4980 if (result
== ENOTSUP
) {
4981 result
= new_result
;
4984 ifnet_list_free(ifp_list
);
4987 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
4988 sender_proto
, target_hw
, target_proto
);
4995 * Caller must hold ifnet head lock.
4998 ifnet_lookup(struct ifnet
*ifp
)
5002 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
5003 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
5007 return (_ifp
!= NULL
);
5011 * Caller has to pass a non-zero refio argument to get a
5012 * IO reference count. This will prevent ifnet_detach from
5013 * being called when there are outstanding io reference counts.
5016 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
5020 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5021 if ((ret
= IF_FULLY_ATTACHED(ifp
))) {
5025 lck_mtx_unlock(&ifp
->if_ref_lock
);
5031 * Caller must ensure the interface is attached; the assumption is that
5032 * there is at least an outstanding IO reference count held already.
5033 * Most callers would call ifnet_is_attached() instead.
5036 ifnet_incr_iorefcnt(struct ifnet
*ifp
)
5038 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5039 VERIFY(IF_FULLY_ATTACHED(ifp
));
5040 VERIFY(ifp
->if_refio
> 0);
5042 lck_mtx_unlock(&ifp
->if_ref_lock
);
5046 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
5048 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5049 VERIFY(ifp
->if_refio
> 0);
5050 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
5054 * if there are no more outstanding io references, wakeup the
5055 * ifnet_detach thread if detaching flag is set.
5057 if (ifp
->if_refio
== 0 && (ifp
->if_refflags
& IFRF_DETACHING
))
5058 wakeup(&(ifp
->if_refio
));
5060 lck_mtx_unlock(&ifp
->if_ref_lock
);
5064 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
5066 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
5071 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
5072 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
5077 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
5078 tr
= dl_if_dbg
->dldbg_if_refhold
;
5080 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
5081 tr
= dl_if_dbg
->dldbg_if_refrele
;
5084 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
5085 ctrace_record(&tr
[idx
]);
5089 dlil_if_ref(struct ifnet
*ifp
)
5091 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5096 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
5097 ++dl_if
->dl_if_refcnt
;
5098 if (dl_if
->dl_if_refcnt
== 0) {
5099 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
5102 if (dl_if
->dl_if_trace
!= NULL
)
5103 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
5104 lck_mtx_unlock(&dl_if
->dl_if_lock
);
5110 dlil_if_free(struct ifnet
*ifp
)
5112 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5113 bool need_release
= FALSE
;
5118 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
5119 switch (dl_if
->dl_if_refcnt
) {
5121 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
5125 if ((ifp
->if_refflags
& IFRF_EMBRYONIC
) != 0) {
5126 need_release
= TRUE
;
5132 --dl_if
->dl_if_refcnt
;
5133 if (dl_if
->dl_if_trace
!= NULL
)
5134 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
5135 lck_mtx_unlock(&dl_if
->dl_if_lock
);
5137 dlil_if_release(ifp
);
5143 dlil_attach_protocol_internal(struct if_proto
*proto
,
5144 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
,
5145 uint32_t * proto_count
)
5147 struct kev_dl_proto_data ev_pr_data
;
5148 struct ifnet
*ifp
= proto
->ifp
;
5150 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
5151 struct if_proto
*prev_proto
;
5152 struct if_proto
*_proto
;
5154 /* callee holds a proto refcnt upon success */
5155 ifnet_lock_exclusive(ifp
);
5156 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
5157 if (_proto
!= NULL
) {
5158 ifnet_lock_done(ifp
);
5159 if_proto_free(_proto
);
5164 * Call family module add_proto routine so it can refine the
5165 * demux descriptors as it wishes.
5167 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
5170 ifnet_lock_done(ifp
);
5175 * Insert the protocol in the hash
5177 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
5178 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
)
5179 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
5181 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
5183 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
5186 /* hold a proto refcnt for attach */
5187 if_proto_ref(proto
);
5190 * The reserved field carries the number of protocol still attached
5191 * (subject to change)
5193 ev_pr_data
.proto_family
= proto
->protocol_family
;
5194 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
5195 ifnet_lock_done(ifp
);
5197 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
5198 (struct net_event_data
*)&ev_pr_data
,
5199 sizeof (struct kev_dl_proto_data
));
5200 if (proto_count
!= NULL
) {
5201 *proto_count
= ev_pr_data
.proto_remaining_count
;
5207 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
5208 const struct ifnet_attach_proto_param
*proto_details
)
5211 struct if_proto
*ifproto
= NULL
;
5212 uint32_t proto_count
= 0;
5214 ifnet_head_lock_shared();
5215 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5219 /* Check that the interface is in the global list */
5220 if (!ifnet_lookup(ifp
)) {
5225 ifproto
= zalloc(dlif_proto_zone
);
5226 if (ifproto
== NULL
) {
5230 bzero(ifproto
, dlif_proto_size
);
5232 /* refcnt held above during lookup */
5234 ifproto
->protocol_family
= protocol
;
5235 ifproto
->proto_kpi
= kProtoKPI_v1
;
5236 ifproto
->kpi
.v1
.input
= proto_details
->input
;
5237 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
5238 ifproto
->kpi
.v1
.event
= proto_details
->event
;
5239 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
5240 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
5241 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
5242 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
5244 retval
= dlil_attach_protocol_internal(ifproto
,
5245 proto_details
->demux_list
, proto_details
->demux_count
,
5249 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5250 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5251 if_name(ifp
), protocol
, retval
);
5254 printf("%s: attached v1 protocol %d (count = %d)\n",
5256 protocol
, proto_count
);
5261 } else if (ifproto
!= NULL
) {
5262 zfree(dlif_proto_zone
, ifproto
);
5268 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
5269 const struct ifnet_attach_proto_param_v2
*proto_details
)
5272 struct if_proto
*ifproto
= NULL
;
5273 uint32_t proto_count
= 0;
5275 ifnet_head_lock_shared();
5276 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5280 /* Check that the interface is in the global list */
5281 if (!ifnet_lookup(ifp
)) {
5286 ifproto
= zalloc(dlif_proto_zone
);
5287 if (ifproto
== NULL
) {
5291 bzero(ifproto
, sizeof(*ifproto
));
5293 /* refcnt held above during lookup */
5295 ifproto
->protocol_family
= protocol
;
5296 ifproto
->proto_kpi
= kProtoKPI_v2
;
5297 ifproto
->kpi
.v2
.input
= proto_details
->input
;
5298 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
5299 ifproto
->kpi
.v2
.event
= proto_details
->event
;
5300 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
5301 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
5302 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
5303 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
5305 retval
= dlil_attach_protocol_internal(ifproto
,
5306 proto_details
->demux_list
, proto_details
->demux_count
,
5310 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5311 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5312 if_name(ifp
), protocol
, retval
);
5315 printf("%s: attached v2 protocol %d (count = %d)\n",
5317 protocol
, proto_count
);
5322 } else if (ifproto
!= NULL
) {
5323 zfree(dlif_proto_zone
, ifproto
);
5329 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
5331 struct if_proto
*proto
= NULL
;
5334 if (ifp
== NULL
|| proto_family
== 0) {
5339 ifnet_lock_exclusive(ifp
);
5340 /* callee holds a proto refcnt upon success */
5341 proto
= find_attached_proto(ifp
, proto_family
);
5342 if (proto
== NULL
) {
5344 ifnet_lock_done(ifp
);
5348 /* call family module del_proto */
5349 if (ifp
->if_del_proto
)
5350 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
5352 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
5353 proto
, if_proto
, next_hash
);
5355 if (proto
->proto_kpi
== kProtoKPI_v1
) {
5356 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
5357 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
5358 proto
->kpi
.v1
.event
= ifproto_media_event
;
5359 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
5360 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
5361 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
5363 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
5364 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
5365 proto
->kpi
.v2
.event
= ifproto_media_event
;
5366 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
5367 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
5368 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
5370 proto
->detached
= 1;
5371 ifnet_lock_done(ifp
);
5374 printf("%s: detached %s protocol %d\n", if_name(ifp
),
5375 (proto
->proto_kpi
== kProtoKPI_v1
) ?
5376 "v1" : "v2", proto_family
);
5379 /* release proto refcnt held during protocol attach */
5380 if_proto_free(proto
);
5383 * Release proto refcnt held during lookup; the rest of
5384 * protocol detach steps will happen when the last proto
5385 * reference is released.
5387 if_proto_free(proto
);
5395 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
5396 struct mbuf
*packet
, char *header
)
5398 #pragma unused(ifp, protocol, packet, header)
5403 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
5404 struct mbuf
*packet
)
5406 #pragma unused(ifp, protocol, packet)
5412 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
5413 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
5414 char *link_layer_dest
)
5416 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5422 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
5423 const struct kev_msg
*event
)
5425 #pragma unused(ifp, protocol, event)
5429 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
5430 unsigned long command
, void *argument
)
5432 #pragma unused(ifp, protocol, command, argument)
5437 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
5438 struct sockaddr_dl
*out_ll
, size_t ll_len
)
5440 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5445 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
5446 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
5447 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
5449 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5453 extern int if_next_index(void);
5454 extern int tcp_ecn_outbound
;
5457 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
5459 struct ifnet
*tmp_if
;
5461 struct if_data_internal if_data_saved
;
5462 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5463 struct dlil_threading_info
*dl_inp
;
5464 u_int32_t sflags
= 0;
5471 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5472 * prevent the interface from being configured while it is
5473 * embryonic, as ifnet_head_lock is dropped and reacquired
5474 * below prior to marking the ifnet with IFRF_ATTACHED.
5477 ifnet_head_lock_exclusive();
5478 /* Verify we aren't already on the list */
5479 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
5480 if (tmp_if
== ifp
) {
5487 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5488 if (!(ifp
->if_refflags
& IFRF_EMBRYONIC
)) {
5489 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
5493 lck_mtx_unlock(&ifp
->if_ref_lock
);
5495 ifnet_lock_exclusive(ifp
);
5498 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
5499 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
5501 if (ll_addr
!= NULL
) {
5502 if (ifp
->if_addrlen
== 0) {
5503 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
5504 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
5505 ifnet_lock_done(ifp
);
5513 * Allow interfaces without protocol families to attach
5514 * only if they have the necessary fields filled out.
5516 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
5517 DLIL_PRINTF("%s: Attempt to attach interface without "
5518 "family module - %d\n", __func__
, ifp
->if_family
);
5519 ifnet_lock_done(ifp
);
5525 /* Allocate protocol hash table */
5526 VERIFY(ifp
->if_proto_hash
== NULL
);
5527 ifp
->if_proto_hash
= zalloc(dlif_phash_zone
);
5528 if (ifp
->if_proto_hash
== NULL
) {
5529 ifnet_lock_done(ifp
);
5534 bzero(ifp
->if_proto_hash
, dlif_phash_size
);
5536 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5537 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
5538 TAILQ_INIT(&ifp
->if_flt_head
);
5539 VERIFY(ifp
->if_flt_busy
== 0);
5540 VERIFY(ifp
->if_flt_waiters
== 0);
5541 lck_mtx_unlock(&ifp
->if_flt_lock
);
5543 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
5544 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
5545 LIST_INIT(&ifp
->if_multiaddrs
);
5548 VERIFY(ifp
->if_allhostsinm
== NULL
);
5549 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
5550 TAILQ_INIT(&ifp
->if_addrhead
);
5552 if (ifp
->if_index
== 0) {
5553 int idx
= if_next_index();
5557 ifnet_lock_done(ifp
);
5562 ifp
->if_index
= idx
;
5564 /* There should not be anything occupying this slot */
5565 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
5567 /* allocate (if needed) and initialize a link address */
5568 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
5570 ifnet_lock_done(ifp
);
5576 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
5577 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
5579 /* make this address the first on the list */
5581 /* hold a reference for ifnet_addrs[] */
5582 IFA_ADDREF_LOCKED(ifa
);
5583 /* if_attach_link_ifa() holds a reference for ifa_link */
5584 if_attach_link_ifa(ifp
, ifa
);
5588 mac_ifnet_label_associate(ifp
);
5591 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
5592 ifindex2ifnet
[ifp
->if_index
] = ifp
;
5594 /* Hold a reference to the underlying dlil_ifnet */
5595 ifnet_reference(ifp
);
5597 /* Clear stats (save and restore other fields that we care) */
5598 if_data_saved
= ifp
->if_data
;
5599 bzero(&ifp
->if_data
, sizeof (ifp
->if_data
));
5600 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
5601 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
5602 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
5603 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
5604 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
5605 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
5606 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
5607 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
5608 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
5609 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
5610 ifnet_touch_lastchange(ifp
);
5612 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
5613 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
||
5614 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_FQ_CODEL
);
5616 /* By default, use SFB and enable flow advisory */
5617 sflags
= PKTSCHEDF_QALG_SFB
;
5619 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
5621 if (if_delaybased_queue
)
5622 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
5624 if (ifp
->if_output_sched_model
==
5625 IFNET_SCHED_MODEL_DRIVER_MANAGED
)
5626 sflags
|= PKTSCHEDF_QALG_DRIVER_MANAGED
;
5628 /* Initialize transmit queue(s) */
5629 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5631 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5632 "err=%d", __func__
, ifp
, err
);
5636 /* Sanity checks on the input thread storage */
5637 dl_inp
= &dl_if
->dl_if_inpstorage
;
5638 bzero(&dl_inp
->stats
, sizeof (dl_inp
->stats
));
5639 VERIFY(dl_inp
->input_waiting
== 0);
5640 VERIFY(dl_inp
->wtot
== 0);
5641 VERIFY(dl_inp
->ifp
== NULL
);
5642 VERIFY(qhead(&dl_inp
->rcvq_pkts
) == NULL
&& qempty(&dl_inp
->rcvq_pkts
));
5643 VERIFY(qlimit(&dl_inp
->rcvq_pkts
) == 0);
5644 VERIFY(!dl_inp
->net_affinity
);
5645 VERIFY(ifp
->if_inp
== NULL
);
5646 VERIFY(dl_inp
->input_thr
== THREAD_NULL
);
5647 VERIFY(dl_inp
->wloop_thr
== THREAD_NULL
);
5648 VERIFY(dl_inp
->poll_thr
== THREAD_NULL
);
5649 VERIFY(dl_inp
->tag
== 0);
5650 VERIFY(dl_inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
5651 bzero(&dl_inp
->tstats
, sizeof (dl_inp
->tstats
));
5652 bzero(&dl_inp
->pstats
, sizeof (dl_inp
->pstats
));
5653 bzero(&dl_inp
->sstats
, sizeof (dl_inp
->sstats
));
5654 #if IFNET_INPUT_SANITY_CHK
5655 VERIFY(dl_inp
->input_mbuf_cnt
== 0);
5656 #endif /* IFNET_INPUT_SANITY_CHK */
5659 * A specific DLIL input thread is created per Ethernet/cellular
5660 * interface or for an interface which supports opportunistic
5661 * input polling. Pseudo interfaces or other types of interfaces
5662 * use the main input thread instead.
5664 if ((net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) ||
5665 ifp
->if_type
== IFT_ETHER
|| ifp
->if_type
== IFT_CELLULAR
) {
5666 ifp
->if_inp
= dl_inp
;
5667 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
);
5669 panic_plain("%s: ifp=%p couldn't get an input thread; "
5670 "err=%d", __func__
, ifp
, err
);
5675 if (ifp
->if_inp
!= NULL
&& ifp
->if_inp
->input_mit_tcall
== NULL
) {
5676 ifp
->if_inp
->input_mit_tcall
=
5677 thread_call_allocate_with_priority(dlil_mit_tcall_fn
,
5678 ifp
, THREAD_CALL_PRIORITY_KERNEL
);
5682 * If the driver supports the new transmit model, calculate flow hash
5683 * and create a workloop starter thread to invoke the if_start callback
5684 * where the packets may be dequeued and transmitted.
5686 if (ifp
->if_eflags
& IFEF_TXSTART
) {
5687 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
5688 VERIFY(ifp
->if_flowhash
!= 0);
5689 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
5691 ifnet_set_start_cycle(ifp
, NULL
);
5692 ifp
->if_start_active
= 0;
5693 ifp
->if_start_req
= 0;
5694 ifp
->if_start_flags
= 0;
5695 VERIFY(ifp
->if_start
!= NULL
);
5696 if ((err
= kernel_thread_start(ifnet_start_thread_fn
,
5697 ifp
, &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
5699 "ifp=%p couldn't get a start thread; "
5700 "err=%d", __func__
, ifp
, err
);
5703 ml_thread_policy(ifp
->if_start_thread
, MACHINE_GROUP
,
5704 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5706 ifp
->if_flowhash
= 0;
5710 * If the driver supports the new receive model, create a poller
5711 * thread to invoke if_input_poll callback where the packets may
5712 * be dequeued from the driver and processed for reception.
5714 if (ifp
->if_eflags
& IFEF_RXPOLL
) {
5715 VERIFY(ifp
->if_input_poll
!= NULL
);
5716 VERIFY(ifp
->if_input_ctl
!= NULL
);
5717 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
5719 ifnet_set_poll_cycle(ifp
, NULL
);
5720 ifp
->if_poll_update
= 0;
5721 ifp
->if_poll_active
= 0;
5722 ifp
->if_poll_req
= 0;
5723 if ((err
= kernel_thread_start(ifnet_poll_thread_fn
, ifp
,
5724 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
5725 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5726 "err=%d", __func__
, ifp
, err
);
5729 ml_thread_policy(ifp
->if_poll_thread
, MACHINE_GROUP
,
5730 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5733 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
5734 VERIFY(ifp
->if_desc
.ifd_len
== 0);
5735 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
5737 /* Record attach PC stacktrace */
5738 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
5740 ifp
->if_updatemcasts
= 0;
5741 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
5742 struct ifmultiaddr
*ifma
;
5743 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
5745 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
5746 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
)
5747 ifp
->if_updatemcasts
++;
5751 printf("%s: attached with %d suspended link-layer multicast "
5752 "membership(s)\n", if_name(ifp
),
5753 ifp
->if_updatemcasts
);
5756 /* Clear logging parameters */
5757 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
5759 /* Clear foreground/realtime activity timestamps */
5760 ifp
->if_fg_sendts
= 0;
5761 ifp
->if_rt_sendts
= 0;
5763 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
5764 VERIFY(ifp
->if_delegated
.type
== 0);
5765 VERIFY(ifp
->if_delegated
.family
== 0);
5766 VERIFY(ifp
->if_delegated
.subfamily
== 0);
5767 VERIFY(ifp
->if_delegated
.expensive
== 0);
5769 VERIFY(ifp
->if_agentids
== NULL
);
5770 VERIFY(ifp
->if_agentcount
== 0);
5772 /* Reset interface state */
5773 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
5774 ifp
->if_interface_state
.valid_bitmask
|=
5775 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
5776 ifp
->if_interface_state
.interface_availability
=
5777 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
;
5779 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5780 if (ifp
== lo_ifp
) {
5781 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_GOOD
;
5782 ifp
->if_interface_state
.valid_bitmask
|=
5783 IF_INTERFACE_STATE_LQM_STATE_VALID
;
5785 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_UNKNOWN
;
5789 * Enable ECN capability on this interface depending on the
5790 * value of ECN global setting
5792 if (tcp_ecn_outbound
== 2 && !IFNET_IS_CELLULAR(ifp
)) {
5793 ifp
->if_eflags
|= IFEF_ECN_ENABLE
;
5794 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
5798 * Built-in Cyclops always on policy for WiFi infra
5800 if (IFNET_IS_WIFI_INFRA(ifp
) && net_qos_policy_wifi_enabled
!= 0) {
5803 error
= if_set_qosmarking_mode(ifp
,
5804 IFRTYPE_QOSMARKING_FASTLANE
);
5806 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5807 __func__
, ifp
->if_xname
, error
);
5809 ifp
->if_eflags
|= IFEF_QOSMARKING_ENABLED
;
5810 #if (DEVELOPMENT || DEBUG)
5811 printf("%s fastlane enabled on %s\n",
5812 __func__
, ifp
->if_xname
);
5813 #endif /* (DEVELOPMENT || DEBUG) */
5817 ifnet_lock_done(ifp
);
5821 lck_mtx_lock(&ifp
->if_cached_route_lock
);
5822 /* Enable forwarding cached route */
5823 ifp
->if_fwd_cacheok
= 1;
5824 /* Clean up any existing cached routes */
5825 ROUTE_RELEASE(&ifp
->if_fwd_route
);
5826 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
5827 ROUTE_RELEASE(&ifp
->if_src_route
);
5828 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
5829 ROUTE_RELEASE(&ifp
->if_src_route6
);
5830 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
5831 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
5833 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5836 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5837 * and trees; do this before the ifnet is marked as attached.
5838 * The ifnet keeps the reference to the info structures even after
5839 * the ifnet is detached, since the network-layer records still
5840 * refer to the info structures even after that. This also
5841 * makes it possible for them to still function after the ifnet
5842 * is recycled or reattached.
5845 if (IGMP_IFINFO(ifp
) == NULL
) {
5846 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, M_WAITOK
);
5847 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
5849 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
5850 igmp_domifreattach(IGMP_IFINFO(ifp
));
5854 if (MLD_IFINFO(ifp
) == NULL
) {
5855 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, M_WAITOK
);
5856 VERIFY(MLD_IFINFO(ifp
) != NULL
);
5858 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
5859 mld_domifreattach(MLD_IFINFO(ifp
));
5863 VERIFY(ifp
->if_data_threshold
== 0);
5864 VERIFY(ifp
->if_dt_tcall
!= NULL
);
5867 * Finally, mark this ifnet as attached.
5869 lck_mtx_lock(rnh_lock
);
5870 ifnet_lock_exclusive(ifp
);
5871 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5872 ifp
->if_refflags
= IFRF_ATTACHED
; /* clears embryonic */
5873 lck_mtx_unlock(&ifp
->if_ref_lock
);
5875 /* boot-args override; enable idle notification */
5876 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
5879 /* apply previous request(s) to set the idle flags, if any */
5880 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
5881 ifp
->if_idle_new_flags_mask
);
5884 ifnet_lock_done(ifp
);
5885 lck_mtx_unlock(rnh_lock
);
5890 * Attach packet filter to this interface, if enabled.
5892 pf_ifnet_hook(ifp
, 1);
5895 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
5898 printf("%s: attached%s\n", if_name(ifp
),
5899 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
5906 * Prepare the storage for the first/permanent link address, which must
5907 * must have the same lifetime as the ifnet itself. Although the link
5908 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5909 * its location in memory must never change as it may still be referred
5910 * to by some parts of the system afterwards (unfortunate implementation
5911 * artifacts inherited from BSD.)
5913 * Caller must hold ifnet lock as writer.
5915 static struct ifaddr
*
5916 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
5918 struct ifaddr
*ifa
, *oifa
;
5919 struct sockaddr_dl
*asdl
, *msdl
;
5920 char workbuf
[IFNAMSIZ
*2];
5921 int namelen
, masklen
, socksize
;
5922 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5924 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
5925 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
5927 namelen
= snprintf(workbuf
, sizeof (workbuf
), "%s",
5929 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0])
5930 + ((namelen
> 0) ? namelen
: 0);
5931 socksize
= masklen
+ ifp
->if_addrlen
;
5932 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
5933 if ((u_int32_t
)socksize
< sizeof (struct sockaddr_dl
))
5934 socksize
= sizeof(struct sockaddr_dl
);
5935 socksize
= ROUNDUP(socksize
);
5938 ifa
= ifp
->if_lladdr
;
5939 if (socksize
> DLIL_SDLMAXLEN
||
5940 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
5942 * Rare, but in the event that the link address requires
5943 * more storage space than DLIL_SDLMAXLEN, allocate the
5944 * largest possible storages for address and mask, such
5945 * that we can reuse the same space when if_addrlen grows.
5946 * This same space will be used when if_addrlen shrinks.
5948 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
5949 int ifasize
= sizeof (*ifa
) + 2 * SOCK_MAXADDRLEN
;
5950 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
5954 /* Don't set IFD_ALLOC, as this is permanent */
5955 ifa
->ifa_debug
= IFD_LINK
;
5958 /* address and mask sockaddr_dl locations */
5959 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
5960 bzero(asdl
, SOCK_MAXADDRLEN
);
5961 msdl
= (struct sockaddr_dl
*)(void *)
5962 ((char *)asdl
+ SOCK_MAXADDRLEN
);
5963 bzero(msdl
, SOCK_MAXADDRLEN
);
5965 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
5967 * Use the storage areas for address and mask within the
5968 * dlil_ifnet structure. This is the most common case.
5971 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
5973 /* Don't set IFD_ALLOC, as this is permanent */
5974 ifa
->ifa_debug
= IFD_LINK
;
5977 /* address and mask sockaddr_dl locations */
5978 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
5979 bzero(asdl
, sizeof (dl_if
->dl_if_lladdr
.asdl
));
5980 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
5981 bzero(msdl
, sizeof (dl_if
->dl_if_lladdr
.msdl
));
5984 /* hold a permanent reference for the ifnet itself */
5985 IFA_ADDREF_LOCKED(ifa
);
5986 oifa
= ifp
->if_lladdr
;
5987 ifp
->if_lladdr
= ifa
;
5989 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
5991 ifa
->ifa_rtrequest
= link_rtrequest
;
5992 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
5993 asdl
->sdl_len
= socksize
;
5994 asdl
->sdl_family
= AF_LINK
;
5996 bcopy(workbuf
, asdl
->sdl_data
, min(namelen
,
5997 sizeof (asdl
->sdl_data
)));
5998 asdl
->sdl_nlen
= namelen
;
6002 asdl
->sdl_index
= ifp
->if_index
;
6003 asdl
->sdl_type
= ifp
->if_type
;
6004 if (ll_addr
!= NULL
) {
6005 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
6006 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
6010 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
6011 msdl
->sdl_len
= masklen
;
6013 msdl
->sdl_data
[--namelen
] = 0xff;
6023 if_purgeaddrs(struct ifnet
*ifp
)
6029 in6_purgeaddrs(ifp
);
6034 ifnet_detach(ifnet_t ifp
)
6036 struct ifnet
*delegated_ifp
;
6037 struct nd_ifinfo
*ndi
= NULL
;
6042 ndi
= ND_IFINFO(ifp
);
6044 ndi
->cga_initialized
= FALSE
;
6046 lck_mtx_lock(rnh_lock
);
6047 ifnet_head_lock_exclusive();
6048 ifnet_lock_exclusive(ifp
);
6051 * Check to see if this interface has previously triggered
6052 * aggressive protocol draining; if so, decrement the global
6053 * refcnt and clear PR_AGGDRAIN on the route domain if
6054 * there are no more of such an interface around.
6056 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
6058 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6059 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
6060 lck_mtx_unlock(&ifp
->if_ref_lock
);
6061 ifnet_lock_done(ifp
);
6063 lck_mtx_unlock(rnh_lock
);
6065 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
6066 /* Interface has already been detached */
6067 lck_mtx_unlock(&ifp
->if_ref_lock
);
6068 ifnet_lock_done(ifp
);
6070 lck_mtx_unlock(rnh_lock
);
6073 VERIFY(!(ifp
->if_refflags
& IFRF_EMBRYONIC
));
6074 /* Indicate this interface is being detached */
6075 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
6076 ifp
->if_refflags
|= IFRF_DETACHING
;
6077 lck_mtx_unlock(&ifp
->if_ref_lock
);
6080 printf("%s: detaching\n", if_name(ifp
));
6082 /* Reset ECN enable/disable flags */
6083 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
6084 ifp
->if_eflags
&= ~IFEF_ECN_ENABLE
;
6087 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6088 * no longer be visible during lookups from this point.
6090 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
6091 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
6092 ifp
->if_link
.tqe_next
= NULL
;
6093 ifp
->if_link
.tqe_prev
= NULL
;
6094 if (ifp
->if_ordered_link
.tqe_next
!= NULL
||
6095 ifp
->if_ordered_link
.tqe_prev
!= NULL
) {
6096 ifnet_remove_from_ordered_list(ifp
);
6098 ifindex2ifnet
[ifp
->if_index
] = NULL
;
6100 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6101 ifp
->if_eflags
&= ~(IFEF_IPV4_ROUTER
| IFEF_IPV6_ROUTER
);
6103 /* Record detach PC stacktrace */
6104 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
6106 /* Clear logging parameters */
6107 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
6109 /* Clear delegated interface info (reference released below) */
6110 delegated_ifp
= ifp
->if_delegated
.ifp
;
6111 bzero(&ifp
->if_delegated
, sizeof (ifp
->if_delegated
));
6113 /* Reset interface state */
6114 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
6116 ifnet_lock_done(ifp
);
6118 lck_mtx_unlock(rnh_lock
);
6121 /* Release reference held on the delegated interface */
6122 if (delegated_ifp
!= NULL
)
6123 ifnet_release(delegated_ifp
);
6125 /* Reset Link Quality Metric (unless loopback [lo0]) */
6127 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
, 0);
6129 /* Reset TCP local statistics */
6130 if (ifp
->if_tcp_stat
!= NULL
)
6131 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
6133 /* Reset UDP local statistics */
6134 if (ifp
->if_udp_stat
!= NULL
)
6135 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
6137 /* Reset ifnet IPv4 stats */
6138 if (ifp
->if_ipv4_stat
!= NULL
)
6139 bzero(ifp
->if_ipv4_stat
, sizeof(*ifp
->if_ipv4_stat
));
6141 /* Reset ifnet IPv6 stats */
6142 if (ifp
->if_ipv6_stat
!= NULL
)
6143 bzero(ifp
->if_ipv6_stat
, sizeof(*ifp
->if_ipv6_stat
));
6145 /* Release memory held for interface link status report */
6146 if (ifp
->if_link_status
!= NULL
) {
6147 FREE(ifp
->if_link_status
, M_TEMP
);
6148 ifp
->if_link_status
= NULL
;
6151 /* Clear agent IDs */
6152 if (ifp
->if_agentids
!= NULL
) {
6153 FREE(ifp
->if_agentids
, M_NETAGENT
);
6154 ifp
->if_agentids
= NULL
;
6156 ifp
->if_agentcount
= 0;
6159 /* Let BPF know we're detaching */
6162 /* Mark the interface as DOWN */
6165 /* Disable forwarding cached route */
6166 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6167 ifp
->if_fwd_cacheok
= 0;
6168 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6170 /* Disable data threshold and wait for any pending event posting */
6171 ifp
->if_data_threshold
= 0;
6172 VERIFY(ifp
->if_dt_tcall
!= NULL
);
6173 (void) thread_call_cancel_wait(ifp
->if_dt_tcall
);
6176 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6177 * references to the info structures and leave them attached to
6181 igmp_domifdetach(ifp
);
6184 mld_domifdetach(ifp
);
6187 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
6189 /* Let worker thread take care of the rest, to avoid reentrancy */
6191 ifnet_detaching_enqueue(ifp
);
6198 ifnet_detaching_enqueue(struct ifnet
*ifp
)
6200 dlil_if_lock_assert();
6202 ++ifnet_detaching_cnt
;
6203 VERIFY(ifnet_detaching_cnt
!= 0);
6204 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6205 wakeup((caddr_t
)&ifnet_delayed_run
);
6208 static struct ifnet
*
6209 ifnet_detaching_dequeue(void)
6213 dlil_if_lock_assert();
6215 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
6216 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
6218 VERIFY(ifnet_detaching_cnt
!= 0);
6219 --ifnet_detaching_cnt
;
6220 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6221 ifp
->if_detaching_link
.tqe_next
= NULL
;
6222 ifp
->if_detaching_link
.tqe_prev
= NULL
;
6228 ifnet_detacher_thread_cont(int err
)
6234 dlil_if_lock_assert();
6235 while (ifnet_detaching_cnt
== 0) {
6236 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6237 (PZERO
- 1), "ifnet_detacher_cont", 0,
6238 ifnet_detacher_thread_cont
);
6242 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
6244 /* Take care of detaching ifnet */
6245 ifp
= ifnet_detaching_dequeue();
6248 ifnet_detach_final(ifp
);
6255 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
6257 #pragma unused(v, w)
6259 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6260 (PZERO
- 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont
);
6262 * msleep0() shouldn't have returned as PCATCH was not set;
6263 * therefore assert in this case.
6270 ifnet_detach_final(struct ifnet
*ifp
)
6272 struct ifnet_filter
*filter
, *filter_next
;
6273 struct ifnet_filter_head fhead
;
6274 struct dlil_threading_info
*inp
;
6276 ifnet_detached_func if_free
;
6279 lck_mtx_lock(&ifp
->if_ref_lock
);
6280 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6281 panic("%s: flags mismatch (detaching not set) ifp=%p",
6287 * Wait until the existing IO references get released
6288 * before we proceed with ifnet_detach. This is not a
6289 * common case, so block without using a continuation.
6291 while (ifp
->if_refio
> 0) {
6292 printf("%s: Waiting for IO references on %s interface "
6293 "to be released\n", __func__
, if_name(ifp
));
6294 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
6295 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
6297 lck_mtx_unlock(&ifp
->if_ref_lock
);
6299 /* Drain and destroy send queue */
6300 ifclassq_teardown(ifp
);
6302 /* Detach interface filters */
6303 lck_mtx_lock(&ifp
->if_flt_lock
);
6304 if_flt_monitor_enter(ifp
);
6306 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
6307 fhead
= ifp
->if_flt_head
;
6308 TAILQ_INIT(&ifp
->if_flt_head
);
6310 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
6311 filter_next
= TAILQ_NEXT(filter
, filt_next
);
6312 lck_mtx_unlock(&ifp
->if_flt_lock
);
6314 dlil_detach_filter_internal(filter
, 1);
6315 lck_mtx_lock(&ifp
->if_flt_lock
);
6317 if_flt_monitor_leave(ifp
);
6318 lck_mtx_unlock(&ifp
->if_flt_lock
);
6320 /* Tell upper layers to drop their network addresses */
6323 ifnet_lock_exclusive(ifp
);
6325 /* Uplumb all protocols */
6326 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
6327 struct if_proto
*proto
;
6329 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6330 while (proto
!= NULL
) {
6331 protocol_family_t family
= proto
->protocol_family
;
6332 ifnet_lock_done(ifp
);
6333 proto_unplumb(family
, ifp
);
6334 ifnet_lock_exclusive(ifp
);
6335 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6337 /* There should not be any protocols left */
6338 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
6340 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
6341 ifp
->if_proto_hash
= NULL
;
6343 /* Detach (permanent) link address from if_addrhead */
6344 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
6345 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
6347 if_detach_link_ifa(ifp
, ifa
);
6350 /* Remove (permanent) link address from ifnet_addrs[] */
6352 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
6354 /* This interface should not be on {ifnet_head,detaching} */
6355 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
6356 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
6357 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
6358 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
6359 VERIFY(ifp
->if_ordered_link
.tqe_next
== NULL
);
6360 VERIFY(ifp
->if_ordered_link
.tqe_prev
== NULL
);
6362 /* The slot should have been emptied */
6363 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
6365 /* There should not be any addresses left */
6366 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
6369 * Signal the starter thread to terminate itself.
6371 if (ifp
->if_start_thread
!= THREAD_NULL
) {
6372 lck_mtx_lock_spin(&ifp
->if_start_lock
);
6373 ifp
->if_start_flags
= 0;
6374 ifp
->if_start_thread
= THREAD_NULL
;
6375 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
6376 lck_mtx_unlock(&ifp
->if_start_lock
);
6380 * Signal the poller thread to terminate itself.
6382 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
6383 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
6384 ifp
->if_poll_thread
= THREAD_NULL
;
6385 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
6386 lck_mtx_unlock(&ifp
->if_poll_lock
);
6390 * If thread affinity was set for the workloop thread, we will need
6391 * to tear down the affinity and release the extra reference count
6392 * taken at attach time. Does not apply to lo0 or other interfaces
6393 * without dedicated input threads.
6395 if ((inp
= ifp
->if_inp
) != NULL
) {
6396 VERIFY(inp
!= dlil_main_input_thread
);
6398 if (inp
->net_affinity
) {
6399 struct thread
*tp
, *wtp
, *ptp
;
6401 lck_mtx_lock_spin(&inp
->input_lck
);
6402 wtp
= inp
->wloop_thr
;
6403 inp
->wloop_thr
= THREAD_NULL
;
6404 ptp
= inp
->poll_thr
;
6405 inp
->poll_thr
= THREAD_NULL
;
6406 tp
= inp
->input_thr
; /* don't nullify now */
6408 inp
->net_affinity
= FALSE
;
6409 lck_mtx_unlock(&inp
->input_lck
);
6411 /* Tear down poll thread affinity */
6413 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
6414 (void) dlil_affinity_set(ptp
,
6415 THREAD_AFFINITY_TAG_NULL
);
6416 thread_deallocate(ptp
);
6419 /* Tear down workloop thread affinity */
6421 (void) dlil_affinity_set(wtp
,
6422 THREAD_AFFINITY_TAG_NULL
);
6423 thread_deallocate(wtp
);
6426 /* Tear down DLIL input thread affinity */
6427 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
6428 thread_deallocate(tp
);
6431 /* disassociate ifp DLIL input thread */
6434 /* tell the input thread to terminate */
6435 lck_mtx_lock_spin(&inp
->input_lck
);
6436 inp
->input_waiting
|= DLIL_INPUT_TERMINATE
;
6437 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
6438 wakeup_one((caddr_t
)&inp
->input_waiting
);
6440 lck_mtx_unlock(&inp
->input_lck
);
6442 /* wait for the input thread to terminate */
6443 lck_mtx_lock_spin(&inp
->input_lck
);
6444 while ((inp
->input_waiting
& DLIL_INPUT_TERMINATE_COMPLETE
)
6446 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
6447 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
6449 lck_mtx_unlock(&inp
->input_lck
);
6451 /* clean-up input thread state */
6452 dlil_clean_threading_info(inp
);
6456 /* The driver might unload, so point these to ourselves */
6457 if_free
= ifp
->if_free
;
6458 ifp
->if_output_dlil
= ifp_if_output
;
6459 ifp
->if_output
= ifp_if_output
;
6460 ifp
->if_pre_enqueue
= ifp_if_output
;
6461 ifp
->if_start
= ifp_if_start
;
6462 ifp
->if_output_ctl
= ifp_if_ctl
;
6463 ifp
->if_input_dlil
= ifp_if_input
;
6464 ifp
->if_input_poll
= ifp_if_input_poll
;
6465 ifp
->if_input_ctl
= ifp_if_ctl
;
6466 ifp
->if_ioctl
= ifp_if_ioctl
;
6467 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
6468 ifp
->if_free
= ifp_if_free
;
6469 ifp
->if_demux
= ifp_if_demux
;
6470 ifp
->if_event
= ifp_if_event
;
6471 ifp
->if_framer_legacy
= ifp_if_framer
;
6472 ifp
->if_framer
= ifp_if_framer_extended
;
6473 ifp
->if_add_proto
= ifp_if_add_proto
;
6474 ifp
->if_del_proto
= ifp_if_del_proto
;
6475 ifp
->if_check_multi
= ifp_if_check_multi
;
6477 /* wipe out interface description */
6478 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
6479 ifp
->if_desc
.ifd_len
= 0;
6480 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
6481 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
6483 /* there shouldn't be any delegation by now */
6484 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
6485 VERIFY(ifp
->if_delegated
.type
== 0);
6486 VERIFY(ifp
->if_delegated
.family
== 0);
6487 VERIFY(ifp
->if_delegated
.subfamily
== 0);
6488 VERIFY(ifp
->if_delegated
.expensive
== 0);
6490 /* QoS marking get cleared */
6491 ifp
->if_eflags
&= ~IFEF_QOSMARKING_ENABLED
;
6492 if_set_qosmarking_mode(ifp
, IFRTYPE_QOSMARKING_MODE_NONE
);
6495 ifnet_lock_done(ifp
);
6499 * Detach this interface from packet filter, if enabled.
6501 pf_ifnet_hook(ifp
, 0);
6504 /* Filter list should be empty */
6505 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
6506 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
6507 VERIFY(ifp
->if_flt_busy
== 0);
6508 VERIFY(ifp
->if_flt_waiters
== 0);
6509 lck_mtx_unlock(&ifp
->if_flt_lock
);
6511 /* Last chance to drain send queue */
6514 /* Last chance to cleanup any cached route */
6515 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6516 VERIFY(!ifp
->if_fwd_cacheok
);
6517 ROUTE_RELEASE(&ifp
->if_fwd_route
);
6518 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
6519 ROUTE_RELEASE(&ifp
->if_src_route
);
6520 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
6521 ROUTE_RELEASE(&ifp
->if_src_route6
);
6522 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
6523 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6525 VERIFY(ifp
->if_data_threshold
== 0);
6526 VERIFY(ifp
->if_dt_tcall
!= NULL
);
6527 VERIFY(!thread_call_isactive(ifp
->if_dt_tcall
));
6529 ifnet_llreach_ifdetach(ifp
);
6531 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
6534 * Finally, mark this ifnet as detached.
6536 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6537 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6538 panic("%s: flags mismatch (detaching not set) ifp=%p",
6542 ifp
->if_refflags
&= ~IFRF_DETACHING
;
6543 lck_mtx_unlock(&ifp
->if_ref_lock
);
6544 if (if_free
!= NULL
)
6548 printf("%s: detached\n", if_name(ifp
));
6550 /* Release reference held during ifnet attach */
6555 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
6563 ifp_if_start(struct ifnet
*ifp
)
6569 ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
6570 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
6571 boolean_t poll
, struct thread
*tp
)
6573 #pragma unused(ifp, m_tail, s, poll, tp)
6574 m_freem_list(m_head
);
6579 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
6580 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
6582 #pragma unused(ifp, flags, max_cnt)
6594 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
6596 #pragma unused(ifp, cmd, arglen, arg)
6597 return (EOPNOTSUPP
);
6601 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
6603 #pragma unused(ifp, fh, pf)
6605 return (EJUSTRETURN
);
6609 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
6610 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
6612 #pragma unused(ifp, pf, da, dc)
6617 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
6619 #pragma unused(ifp, pf)
6624 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
6626 #pragma unused(ifp, sa)
6627 return (EOPNOTSUPP
);
6632 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
6633 const struct sockaddr
*sa
, const char *ll
, const char *t
,
6634 u_int32_t
*pre
, u_int32_t
*post
)
6637 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
6638 const struct sockaddr
*sa
, const char *ll
, const char *t
)
6639 #endif /* !CONFIG_EMBEDDED */
6641 #pragma unused(ifp, m, sa, ll, t)
6643 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, pre
, post
));
6645 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
));
6646 #endif /* !CONFIG_EMBEDDED */
6650 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
6651 const struct sockaddr
*sa
, const char *ll
, const char *t
,
6652 u_int32_t
*pre
, u_int32_t
*post
)
6654 #pragma unused(ifp, sa, ll, t)
6663 return (EJUSTRETURN
);
6667 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
6669 #pragma unused(ifp, cmd, arg)
6670 return (EOPNOTSUPP
);
6674 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
6676 #pragma unused(ifp, tm, f)
6677 /* XXX not sure what to do here */
6682 ifp_if_free(struct ifnet
*ifp
)
6688 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
6690 #pragma unused(ifp, e)
6694 int dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
6695 size_t uniqueid_len
, struct ifnet
**ifp
)
6697 struct ifnet
*ifp1
= NULL
;
6698 struct dlil_ifnet
*dlifp1
= NULL
;
6699 void *buf
, *base
, **pbuf
;
6703 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
6704 ifp1
= (struct ifnet
*)dlifp1
;
6706 if (ifp1
->if_family
!= family
)
6709 lck_mtx_lock(&dlifp1
->dl_if_lock
);
6710 /* same uniqueid and same len or no unique id specified */
6711 if ((uniqueid_len
== dlifp1
->dl_if_uniqueid_len
) &&
6712 bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
) == 0) {
6713 /* check for matching interface in use */
6714 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
6717 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6721 dlifp1
->dl_if_flags
|= (DLIF_INUSE
|DLIF_REUSE
);
6722 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6727 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6730 /* no interface found, allocate a new one */
6731 buf
= zalloc(dlif_zone
);
6736 bzero(buf
, dlif_bufsize
);
6738 /* Get the 64-bit aligned base address for this object */
6739 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
6740 sizeof (u_int64_t
));
6741 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
6744 * Wind back a pointer size from the aligned base and
6745 * save the original address so we can free it later.
6747 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
6752 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
6754 if (dlifp1
->dl_if_uniqueid
== NULL
) {
6755 zfree(dlif_zone
, buf
);
6759 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
6760 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
6763 ifp1
= (struct ifnet
*)dlifp1
;
6764 dlifp1
->dl_if_flags
= DLIF_INUSE
;
6766 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
6767 dlifp1
->dl_if_trace
= dlil_if_trace
;
6769 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
6770 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
6772 /* initialize interface description */
6773 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
6774 ifp1
->if_desc
.ifd_len
= 0;
6775 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
6779 mac_ifnet_label_init(ifp1
);
6782 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
6783 DLIL_PRINTF("%s: failed to allocate if local stats, "
6784 "error: %d\n", __func__
, ret
);
6785 /* This probably shouldn't be fatal */
6789 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6790 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6791 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6792 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6793 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
6795 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6797 lck_rw_init(&ifp1
->if_inetdata_lock
, ifnet_lock_group
,
6799 ifp1
->if_inetdata
= NULL
;
6802 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
,
6804 ifp1
->if_inet6data
= NULL
;
6806 lck_rw_init(&ifp1
->if_link_status_lock
, ifnet_lock_group
,
6808 ifp1
->if_link_status
= NULL
;
6810 /* for send data paths */
6811 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
6813 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
6815 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
6818 /* for receive data paths */
6819 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
6822 /* thread call allocation is done with sleeping zalloc */
6823 ifp1
->if_dt_tcall
= thread_call_allocate_with_options(dlil_dt_tcall_fn
,
6824 ifp1
, THREAD_CALL_PRIORITY_KERNEL
, THREAD_CALL_OPTIONS_ONCE
);
6825 if (ifp1
->if_dt_tcall
== NULL
) {
6826 panic_plain("%s: couldn't create if_dt_tcall", __func__
);
6830 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
6837 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof (u_int64_t
)) &&
6838 IS_P2ALIGNED(&ifp1
->if_data
, sizeof (u_int64_t
))));
6843 __private_extern__
void
6844 dlil_if_release(ifnet_t ifp
)
6846 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
6848 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_count
) > 0);
6849 if (!(ifp
->if_xflags
& IFXF_ALLOC_KPI
)) {
6850 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_os_count
) > 0);
6853 ifnet_lock_exclusive(ifp
);
6854 lck_mtx_lock(&dlifp
->dl_if_lock
);
6855 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
6856 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
6857 ifp
->if_name
= dlifp
->dl_if_namestorage
;
6858 /* Reset external name (name + unit) */
6859 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
6860 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
6861 "%s?", ifp
->if_name
);
6862 lck_mtx_unlock(&dlifp
->dl_if_lock
);
6865 * We can either recycle the MAC label here or in dlil_if_acquire().
6866 * It seems logical to do it here but this means that anything that
6867 * still has a handle on ifp will now see it as unlabeled.
6868 * Since the interface is "dead" that may be OK. Revisit later.
6870 mac_ifnet_label_recycle(ifp
);
6872 ifnet_lock_done(ifp
);
6875 __private_extern__
void
6878 lck_mtx_lock(&dlil_ifnet_lock
);
6881 __private_extern__
void
6882 dlil_if_unlock(void)
6884 lck_mtx_unlock(&dlil_ifnet_lock
);
6887 __private_extern__
void
6888 dlil_if_lock_assert(void)
6890 LCK_MTX_ASSERT(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
6893 __private_extern__
void
6894 dlil_proto_unplumb_all(struct ifnet
*ifp
)
6897 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6898 * each bucket contains exactly one entry; PF_VLAN does not need an
6901 * if_proto_hash[3] is for other protocols; we expect anything
6902 * in this bucket to respond to the DETACHING event (which would
6903 * have happened by now) and do the unplumb then.
6905 (void) proto_unplumb(PF_INET
, ifp
);
6907 (void) proto_unplumb(PF_INET6
, ifp
);
6912 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
6914 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6915 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6917 route_copyout(dst
, &ifp
->if_src_route
, sizeof (*dst
));
6919 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6923 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
6925 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6926 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6928 if (ifp
->if_fwd_cacheok
) {
6929 route_copyin(src
, &ifp
->if_src_route
, sizeof (*src
));
6933 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6938 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
6940 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6941 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6943 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
6946 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6950 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
6952 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6953 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6955 if (ifp
->if_fwd_cacheok
) {
6956 route_copyin((struct route
*)src
,
6957 (struct route
*)&ifp
->if_src_route6
, sizeof (*src
));
6961 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6966 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
6968 struct route src_rt
;
6969 struct sockaddr_in
*dst
;
6971 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
6973 ifp_src_route_copyout(ifp
, &src_rt
);
6975 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
6976 ROUTE_RELEASE(&src_rt
);
6977 if (dst
->sin_family
!= AF_INET
) {
6978 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
6979 dst
->sin_len
= sizeof (src_rt
.ro_dst
);
6980 dst
->sin_family
= AF_INET
;
6982 dst
->sin_addr
= src_ip
;
6984 VERIFY(src_rt
.ro_rt
== NULL
);
6985 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
6986 0, 0, ifp
->if_index
);
6988 if (src_rt
.ro_rt
!= NULL
) {
6989 /* retain a ref, copyin consumes one */
6990 struct rtentry
*rte
= src_rt
.ro_rt
;
6992 ifp_src_route_copyin(ifp
, &src_rt
);
6997 return (src_rt
.ro_rt
);
7002 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
7004 struct route_in6 src_rt
;
7006 ifp_src_route6_copyout(ifp
, &src_rt
);
7008 if (ROUTE_UNUSABLE(&src_rt
) ||
7009 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
7010 ROUTE_RELEASE(&src_rt
);
7011 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
7012 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
7013 src_rt
.ro_dst
.sin6_len
= sizeof (src_rt
.ro_dst
);
7014 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
7016 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
7017 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
7018 sizeof (src_rt
.ro_dst
.sin6_addr
));
7020 if (src_rt
.ro_rt
== NULL
) {
7021 src_rt
.ro_rt
= rtalloc1_scoped(
7022 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
7025 if (src_rt
.ro_rt
!= NULL
) {
7026 /* retain a ref, copyin consumes one */
7027 struct rtentry
*rte
= src_rt
.ro_rt
;
7029 ifp_src_route6_copyin(ifp
, &src_rt
);
7035 return (src_rt
.ro_rt
);
7040 if_lqm_update(struct ifnet
*ifp
, int lqm
, int locked
)
7042 struct kev_dl_link_quality_metric_data ev_lqm_data
;
7044 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
7046 /* Normalize to edge */
7047 if (lqm
>= 0 && lqm
<= IFNET_LQM_THRESH_ABORT
) {
7048 lqm
= IFNET_LQM_THRESH_ABORT
;
7049 atomic_bitset_32(&tcbinfo
.ipi_flags
,
7050 INPCBINFO_HANDLE_LQM_ABORT
);
7051 inpcb_timer_sched(&tcbinfo
, INPCB_TIMER_FAST
);
7052 } else if (lqm
> IFNET_LQM_THRESH_ABORT
&&
7053 lqm
<= IFNET_LQM_THRESH_MINIMALLY_VIABLE
) {
7054 lqm
= IFNET_LQM_THRESH_MINIMALLY_VIABLE
;
7055 } else if (lqm
> IFNET_LQM_THRESH_MINIMALLY_VIABLE
&&
7056 lqm
<= IFNET_LQM_THRESH_POOR
) {
7057 lqm
= IFNET_LQM_THRESH_POOR
;
7058 } else if (lqm
> IFNET_LQM_THRESH_POOR
&&
7059 lqm
<= IFNET_LQM_THRESH_GOOD
) {
7060 lqm
= IFNET_LQM_THRESH_GOOD
;
7064 * Take the lock if needed
7067 ifnet_lock_exclusive(ifp
);
7069 if (lqm
== ifp
->if_interface_state
.lqm_state
&&
7070 (ifp
->if_interface_state
.valid_bitmask
&
7071 IF_INTERFACE_STATE_LQM_STATE_VALID
)) {
7073 * Release the lock if was not held by the caller
7076 ifnet_lock_done(ifp
);
7077 return; /* nothing to update */
7079 ifp
->if_interface_state
.valid_bitmask
|=
7080 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7081 ifp
->if_interface_state
.lqm_state
= lqm
;
7084 * Don't want to hold the lock when issuing kernel events
7086 ifnet_lock_done(ifp
);
7088 bzero(&ev_lqm_data
, sizeof (ev_lqm_data
));
7089 ev_lqm_data
.link_quality_metric
= lqm
;
7091 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
7092 (struct net_event_data
*)&ev_lqm_data
, sizeof (ev_lqm_data
));
7095 * Reacquire the lock for the caller
7098 ifnet_lock_exclusive(ifp
);
7102 if_rrc_state_update(struct ifnet
*ifp
, unsigned int rrc_state
)
7104 struct kev_dl_rrc_state kev
;
7106 if (rrc_state
== ifp
->if_interface_state
.rrc_state
&&
7107 (ifp
->if_interface_state
.valid_bitmask
&
7108 IF_INTERFACE_STATE_RRC_STATE_VALID
))
7111 ifp
->if_interface_state
.valid_bitmask
|=
7112 IF_INTERFACE_STATE_RRC_STATE_VALID
;
7114 ifp
->if_interface_state
.rrc_state
= rrc_state
;
7117 * Don't want to hold the lock when issuing kernel events
7119 ifnet_lock_done(ifp
);
7121 bzero(&kev
, sizeof(struct kev_dl_rrc_state
));
7122 kev
.rrc_state
= rrc_state
;
7124 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_RRC_STATE_CHANGED
,
7125 (struct net_event_data
*)&kev
, sizeof(struct kev_dl_rrc_state
));
7127 ifnet_lock_exclusive(ifp
);
7131 if_state_update(struct ifnet
*ifp
,
7132 struct if_interface_state
*if_interface_state
)
7134 u_short if_index_available
= 0;
7136 ifnet_lock_exclusive(ifp
);
7138 if ((ifp
->if_type
!= IFT_CELLULAR
) &&
7139 (if_interface_state
->valid_bitmask
&
7140 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
7141 ifnet_lock_done(ifp
);
7144 if ((if_interface_state
->valid_bitmask
&
7145 IF_INTERFACE_STATE_LQM_STATE_VALID
) &&
7146 (if_interface_state
->lqm_state
< IFNET_LQM_MIN
||
7147 if_interface_state
->lqm_state
> IFNET_LQM_MAX
)) {
7148 ifnet_lock_done(ifp
);
7151 if ((if_interface_state
->valid_bitmask
&
7152 IF_INTERFACE_STATE_RRC_STATE_VALID
) &&
7153 if_interface_state
->rrc_state
!=
7154 IF_INTERFACE_STATE_RRC_STATE_IDLE
&&
7155 if_interface_state
->rrc_state
!=
7156 IF_INTERFACE_STATE_RRC_STATE_CONNECTED
) {
7157 ifnet_lock_done(ifp
);
7161 if (if_interface_state
->valid_bitmask
&
7162 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
7163 if_lqm_update(ifp
, if_interface_state
->lqm_state
, 1);
7165 if (if_interface_state
->valid_bitmask
&
7166 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
7167 if_rrc_state_update(ifp
, if_interface_state
->rrc_state
);
7169 if (if_interface_state
->valid_bitmask
&
7170 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
7171 ifp
->if_interface_state
.valid_bitmask
|=
7172 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7173 ifp
->if_interface_state
.interface_availability
=
7174 if_interface_state
->interface_availability
;
7176 if (ifp
->if_interface_state
.interface_availability
==
7177 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
) {
7178 if_index_available
= ifp
->if_index
;
7181 ifnet_lock_done(ifp
);
7184 * Check if the TCP connections going on this interface should be
7185 * forced to send probe packets instead of waiting for TCP timers
7186 * to fire. This will be done when there is an explicit
7187 * notification that the interface became available.
7189 if (if_index_available
> 0)
7190 tcp_interface_send_probe(if_index_available
);
7196 if_get_state(struct ifnet
*ifp
,
7197 struct if_interface_state
*if_interface_state
)
7199 ifnet_lock_shared(ifp
);
7201 if_interface_state
->valid_bitmask
= 0;
7203 if (ifp
->if_interface_state
.valid_bitmask
&
7204 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
7205 if_interface_state
->valid_bitmask
|=
7206 IF_INTERFACE_STATE_RRC_STATE_VALID
;
7207 if_interface_state
->rrc_state
=
7208 ifp
->if_interface_state
.rrc_state
;
7210 if (ifp
->if_interface_state
.valid_bitmask
&
7211 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
7212 if_interface_state
->valid_bitmask
|=
7213 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7214 if_interface_state
->lqm_state
=
7215 ifp
->if_interface_state
.lqm_state
;
7217 if (ifp
->if_interface_state
.valid_bitmask
&
7218 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
7219 if_interface_state
->valid_bitmask
|=
7220 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7221 if_interface_state
->interface_availability
=
7222 ifp
->if_interface_state
.interface_availability
;
7225 ifnet_lock_done(ifp
);
7229 if_probe_connectivity(struct ifnet
*ifp
, u_int32_t conn_probe
)
7231 ifnet_lock_exclusive(ifp
);
7232 if (conn_probe
> 1) {
7233 ifnet_lock_done(ifp
);
7236 if (conn_probe
== 0)
7237 ifp
->if_eflags
&= ~IFEF_PROBE_CONNECTIVITY
;
7239 ifp
->if_eflags
|= IFEF_PROBE_CONNECTIVITY
;
7240 ifnet_lock_done(ifp
);
7243 necp_update_all_clients();
7246 tcp_probe_connectivity(ifp
, conn_probe
);
7252 uuid_get_ethernet(u_int8_t
*node
)
7255 struct sockaddr_dl
*sdl
;
7257 ifnet_head_lock_shared();
7258 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
7259 ifnet_lock_shared(ifp
);
7260 IFA_LOCK_SPIN(ifp
->if_lladdr
);
7261 sdl
= (struct sockaddr_dl
*)(void *)ifp
->if_lladdr
->ifa_addr
;
7262 if (sdl
->sdl_type
== IFT_ETHER
) {
7263 memcpy(node
, LLADDR(sdl
), ETHER_ADDR_LEN
);
7264 IFA_UNLOCK(ifp
->if_lladdr
);
7265 ifnet_lock_done(ifp
);
7269 IFA_UNLOCK(ifp
->if_lladdr
);
7270 ifnet_lock_done(ifp
);
7278 sysctl_rxpoll SYSCTL_HANDLER_ARGS
7280 #pragma unused(arg1, arg2)
7286 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7287 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7290 if (net_rxpoll
== 0)
7298 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7300 #pragma unused(arg1, arg2)
7304 q
= if_rxpoll_mode_holdtime
;
7306 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7307 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7310 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
)
7311 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
7313 if_rxpoll_mode_holdtime
= q
;
7319 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7321 #pragma unused(arg1, arg2)
7325 q
= if_rxpoll_sample_holdtime
;
7327 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7328 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7331 if (q
< IF_RXPOLL_SAMPLETIME_MIN
)
7332 q
= IF_RXPOLL_SAMPLETIME_MIN
;
7334 if_rxpoll_sample_holdtime
= q
;
7340 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7342 #pragma unused(arg1, arg2)
7346 q
= if_rxpoll_interval_time
;
7348 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7349 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7352 if (q
< IF_RXPOLL_INTERVALTIME_MIN
)
7353 q
= IF_RXPOLL_INTERVALTIME_MIN
;
7355 if_rxpoll_interval_time
= q
;
7361 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7363 #pragma unused(arg1, arg2)
7367 i
= if_rxpoll_wlowat
;
7369 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7370 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7373 if (i
== 0 || i
>= if_rxpoll_whiwat
)
7376 if_rxpoll_wlowat
= i
;
7381 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7383 #pragma unused(arg1, arg2)
7387 i
= if_rxpoll_whiwat
;
7389 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7390 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7393 if (i
<= if_rxpoll_wlowat
)
7396 if_rxpoll_whiwat
= i
;
7401 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
7403 #pragma unused(arg1, arg2)
7408 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7409 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7412 if (i
< IF_SNDQ_MINLEN
)
7420 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
7422 #pragma unused(arg1, arg2)
7427 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7428 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7431 if (i
< IF_RCVQ_MINLEN
)
7439 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
7440 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
7442 struct kev_dl_node_presence kev
;
7443 struct sockaddr_dl
*sdl
;
7444 struct sockaddr_in6
*sin6
;
7448 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
7450 bzero(&kev
, sizeof (kev
));
7451 sin6
= &kev
.sin6_node_address
;
7452 sdl
= &kev
.sdl_node_address
;
7453 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
7455 kev
.link_quality_metric
= lqm
;
7456 kev
.node_proximity_metric
= npm
;
7457 bcopy(srvinfo
, kev
.node_service_info
, sizeof (kev
.node_service_info
));
7459 nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
7460 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
7461 &kev
.link_data
, sizeof (kev
));
7465 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
7467 struct kev_dl_node_absence kev
;
7468 struct sockaddr_in6
*sin6
;
7469 struct sockaddr_dl
*sdl
;
7473 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
7475 bzero(&kev
, sizeof (kev
));
7476 sin6
= &kev
.sin6_node_address
;
7477 sdl
= &kev
.sdl_node_address
;
7478 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
7480 nd6_alt_node_absent(ifp
, sin6
);
7481 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
7482 &kev
.link_data
, sizeof (kev
));
7486 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
7487 kauth_cred_t
*credp
)
7489 const u_int8_t
*bytes
;
7492 bytes
= CONST_LLADDR(sdl
);
7493 size
= sdl
->sdl_alen
;
7496 if (dlil_lladdr_ckreq
) {
7497 switch (sdl
->sdl_type
) {
7506 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
7507 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
7515 #pragma unused(credp)
7518 if (sizep
!= NULL
) *sizep
= size
;
7523 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
7524 u_int8_t info
[DLIL_MODARGLEN
])
7526 struct kev_dl_issues kev
;
7529 VERIFY(ifp
!= NULL
);
7530 VERIFY(modid
!= NULL
);
7531 _CASSERT(sizeof (kev
.modid
) == DLIL_MODIDLEN
);
7532 _CASSERT(sizeof (kev
.info
) == DLIL_MODARGLEN
);
7534 bzero(&kev
, sizeof (kev
));
7537 kev
.timestamp
= tv
.tv_sec
;
7538 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
7540 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
7542 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
7543 &kev
.link_data
, sizeof (kev
));
7547 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7550 u_int32_t level
= IFNET_THROTTLE_OFF
;
7553 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
7555 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
7557 * XXX: Use priv_check_cred() instead of root check?
7559 if ((result
= proc_suser(p
)) != 0)
7562 if (ifr
->ifr_opportunistic
.ifo_flags
==
7563 IFRIFOF_BLOCK_OPPORTUNISTIC
)
7564 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
7565 else if (ifr
->ifr_opportunistic
.ifo_flags
== 0)
7566 level
= IFNET_THROTTLE_OFF
;
7571 result
= ifnet_set_throttle(ifp
, level
);
7572 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
7573 ifr
->ifr_opportunistic
.ifo_flags
= 0;
7574 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
7575 ifr
->ifr_opportunistic
.ifo_flags
|=
7576 IFRIFOF_BLOCK_OPPORTUNISTIC
;
7581 * Return the count of current opportunistic connections
7582 * over the interface.
7586 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
7587 INPCB_OPPORTUNISTIC_SETCMD
: 0;
7588 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
7589 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
7590 ifr
->ifr_opportunistic
.ifo_inuse
=
7591 udp_count_opportunistic(ifp
->if_index
, flags
) +
7592 tcp_count_opportunistic(ifp
->if_index
, flags
);
7595 if (result
== EALREADY
)
7602 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
7604 struct ifclassq
*ifq
;
7607 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7610 *level
= IFNET_THROTTLE_OFF
;
7614 /* Throttling works only for IFCQ, not ALTQ instances */
7615 if (IFCQ_IS_ENABLED(ifq
))
7616 IFCQ_GET_THROTTLE(ifq
, *level
, err
);
7623 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
7625 struct ifclassq
*ifq
;
7628 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7634 case IFNET_THROTTLE_OFF
:
7635 case IFNET_THROTTLE_OPPORTUNISTIC
:
7642 if (IFCQ_IS_ENABLED(ifq
))
7643 IFCQ_SET_THROTTLE(ifq
, level
, err
);
7647 printf("%s: throttling level set to %d\n", if_name(ifp
),
7649 if (level
== IFNET_THROTTLE_OFF
)
7657 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7663 int level
, category
, subcategory
;
7665 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
7667 if (cmd
== SIOCSIFLOG
) {
7668 if ((result
= priv_check_cred(kauth_cred_get(),
7669 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0)
7672 level
= ifr
->ifr_log
.ifl_level
;
7673 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
)
7676 flags
= ifr
->ifr_log
.ifl_flags
;
7677 if ((flags
&= IFNET_LOGF_MASK
) == 0)
7680 category
= ifr
->ifr_log
.ifl_category
;
7681 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
7684 result
= ifnet_set_log(ifp
, level
, flags
,
7685 category
, subcategory
);
7687 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
7690 ifr
->ifr_log
.ifl_level
= level
;
7691 ifr
->ifr_log
.ifl_flags
= flags
;
7692 ifr
->ifr_log
.ifl_category
= category
;
7693 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
7701 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
7702 int32_t category
, int32_t subcategory
)
7706 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
7707 VERIFY(flags
& IFNET_LOGF_MASK
);
7710 * The logging level applies to all facilities; make sure to
7711 * update them all with the most current level.
7713 flags
|= ifp
->if_log
.flags
;
7715 if (ifp
->if_output_ctl
!= NULL
) {
7716 struct ifnet_log_params l
;
7718 bzero(&l
, sizeof (l
));
7721 l
.flags
&= ~IFNET_LOGF_DLIL
;
7722 l
.category
= category
;
7723 l
.subcategory
= subcategory
;
7725 /* Send this request to lower layers */
7727 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
7730 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
7732 * If targeted to the lower layers without an output
7733 * control callback registered on the interface, just
7734 * silently ignore facilities other than ours.
7736 flags
&= IFNET_LOGF_DLIL
;
7737 if (flags
== 0 && (!(ifp
->if_log
.flags
& IFNET_LOGF_DLIL
)))
7742 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
)
7743 ifp
->if_log
.flags
= 0;
7745 ifp
->if_log
.flags
|= flags
;
7747 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
7748 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
7749 ifp
->if_log
.level
, ifp
->if_log
.flags
,
7750 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
7751 category
, subcategory
);
7758 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
7759 int32_t *category
, int32_t *subcategory
)
7762 *level
= ifp
->if_log
.level
;
7764 *flags
= ifp
->if_log
.flags
;
7765 if (category
!= NULL
)
7766 *category
= ifp
->if_log
.category
;
7767 if (subcategory
!= NULL
)
7768 *subcategory
= ifp
->if_log
.subcategory
;
7774 ifnet_notify_address(struct ifnet
*ifp
, int af
)
7776 struct ifnet_notify_address_params na
;
7779 (void) pf_ifaddr_hook(ifp
);
7782 if (ifp
->if_output_ctl
== NULL
)
7783 return (EOPNOTSUPP
);
7785 bzero(&na
, sizeof (na
));
7786 na
.address_family
= af
;
7788 return (ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
7793 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
7795 if (ifp
== NULL
|| flowid
== NULL
) {
7797 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7798 !IF_FULLY_ATTACHED(ifp
)) {
7802 *flowid
= ifp
->if_flowhash
;
7808 ifnet_disable_output(struct ifnet
*ifp
)
7814 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7815 !IF_FULLY_ATTACHED(ifp
)) {
7819 if ((err
= ifnet_fc_add(ifp
)) == 0) {
7820 lck_mtx_lock_spin(&ifp
->if_start_lock
);
7821 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
7822 lck_mtx_unlock(&ifp
->if_start_lock
);
7828 ifnet_enable_output(struct ifnet
*ifp
)
7832 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7833 !IF_FULLY_ATTACHED(ifp
)) {
7837 ifnet_start_common(ifp
, 1);
7842 ifnet_flowadv(uint32_t flowhash
)
7844 struct ifnet_fc_entry
*ifce
;
7847 ifce
= ifnet_fc_get(flowhash
);
7851 VERIFY(ifce
->ifce_ifp
!= NULL
);
7852 ifp
= ifce
->ifce_ifp
;
7854 /* flow hash gets recalculated per attach, so check */
7855 if (ifnet_is_attached(ifp
, 1)) {
7856 if (ifp
->if_flowhash
== flowhash
)
7857 (void) ifnet_enable_output(ifp
);
7858 ifnet_decr_iorefcnt(ifp
);
7860 ifnet_fc_entry_free(ifce
);
7864 * Function to compare ifnet_fc_entries in ifnet flow control tree
7867 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
7869 return (fc1
->ifce_flowhash
- fc2
->ifce_flowhash
);
7873 ifnet_fc_add(struct ifnet
*ifp
)
7875 struct ifnet_fc_entry keyfc
, *ifce
;
7878 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
7879 VERIFY(ifp
->if_flowhash
!= 0);
7880 flowhash
= ifp
->if_flowhash
;
7882 bzero(&keyfc
, sizeof (keyfc
));
7883 keyfc
.ifce_flowhash
= flowhash
;
7885 lck_mtx_lock_spin(&ifnet_fc_lock
);
7886 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
7887 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
7888 /* Entry is already in ifnet_fc_tree, return */
7889 lck_mtx_unlock(&ifnet_fc_lock
);
7895 * There is a different fc entry with the same flow hash
7896 * but different ifp pointer. There can be a collision
7897 * on flow hash but the probability is low. Let's just
7898 * avoid adding a second one when there is a collision.
7900 lck_mtx_unlock(&ifnet_fc_lock
);
7904 /* become regular mutex */
7905 lck_mtx_convert_spin(&ifnet_fc_lock
);
7907 ifce
= zalloc_noblock(ifnet_fc_zone
);
7909 /* memory allocation failed */
7910 lck_mtx_unlock(&ifnet_fc_lock
);
7913 bzero(ifce
, ifnet_fc_zone_size
);
7915 ifce
->ifce_flowhash
= flowhash
;
7916 ifce
->ifce_ifp
= ifp
;
7918 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
7919 lck_mtx_unlock(&ifnet_fc_lock
);
7923 static struct ifnet_fc_entry
*
7924 ifnet_fc_get(uint32_t flowhash
)
7926 struct ifnet_fc_entry keyfc
, *ifce
;
7929 bzero(&keyfc
, sizeof (keyfc
));
7930 keyfc
.ifce_flowhash
= flowhash
;
7932 lck_mtx_lock_spin(&ifnet_fc_lock
);
7933 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
7935 /* Entry is not present in ifnet_fc_tree, return */
7936 lck_mtx_unlock(&ifnet_fc_lock
);
7940 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
7942 VERIFY(ifce
->ifce_ifp
!= NULL
);
7943 ifp
= ifce
->ifce_ifp
;
7945 /* become regular mutex */
7946 lck_mtx_convert_spin(&ifnet_fc_lock
);
7948 if (!ifnet_is_attached(ifp
, 0)) {
7950 * This ifp is not attached or in the process of being
7951 * detached; just don't process it.
7953 ifnet_fc_entry_free(ifce
);
7956 lck_mtx_unlock(&ifnet_fc_lock
);
7962 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
7964 zfree(ifnet_fc_zone
, ifce
);
7968 ifnet_calc_flowhash(struct ifnet
*ifp
)
7970 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
7971 uint32_t flowhash
= 0;
7973 if (ifnet_flowhash_seed
== 0)
7974 ifnet_flowhash_seed
= RandomULong();
7976 bzero(&fh
, sizeof (fh
));
7978 (void) snprintf(fh
.ifk_name
, sizeof (fh
.ifk_name
), "%s", ifp
->if_name
);
7979 fh
.ifk_unit
= ifp
->if_unit
;
7980 fh
.ifk_flags
= ifp
->if_flags
;
7981 fh
.ifk_eflags
= ifp
->if_eflags
;
7982 fh
.ifk_capabilities
= ifp
->if_capabilities
;
7983 fh
.ifk_capenable
= ifp
->if_capenable
;
7984 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
7985 fh
.ifk_rand1
= RandomULong();
7986 fh
.ifk_rand2
= RandomULong();
7989 flowhash
= net_flowhash(&fh
, sizeof (fh
), ifnet_flowhash_seed
);
7990 if (flowhash
== 0) {
7991 /* try to get a non-zero flowhash */
7992 ifnet_flowhash_seed
= RandomULong();
8000 ifnet_set_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t len
,
8001 uint16_t flags
, uint8_t *data
)
8003 #pragma unused(flags)
8008 if_inetdata_lock_exclusive(ifp
);
8009 if (IN_IFEXTRA(ifp
) != NULL
) {
8011 /* Allow clearing the signature */
8012 IN_IFEXTRA(ifp
)->netsig_len
= 0;
8013 bzero(IN_IFEXTRA(ifp
)->netsig
,
8014 sizeof (IN_IFEXTRA(ifp
)->netsig
));
8015 if_inetdata_lock_done(ifp
);
8017 } else if (len
> sizeof (IN_IFEXTRA(ifp
)->netsig
)) {
8019 if_inetdata_lock_done(ifp
);
8022 IN_IFEXTRA(ifp
)->netsig_len
= len
;
8023 bcopy(data
, IN_IFEXTRA(ifp
)->netsig
, len
);
8027 if_inetdata_lock_done(ifp
);
8031 if_inet6data_lock_exclusive(ifp
);
8032 if (IN6_IFEXTRA(ifp
) != NULL
) {
8034 /* Allow clearing the signature */
8035 IN6_IFEXTRA(ifp
)->netsig_len
= 0;
8036 bzero(IN6_IFEXTRA(ifp
)->netsig
,
8037 sizeof (IN6_IFEXTRA(ifp
)->netsig
));
8038 if_inet6data_lock_done(ifp
);
8040 } else if (len
> sizeof (IN6_IFEXTRA(ifp
)->netsig
)) {
8042 if_inet6data_lock_done(ifp
);
8045 IN6_IFEXTRA(ifp
)->netsig_len
= len
;
8046 bcopy(data
, IN6_IFEXTRA(ifp
)->netsig
, len
);
8050 if_inet6data_lock_done(ifp
);
8062 ifnet_get_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t *len
,
8063 uint16_t *flags
, uint8_t *data
)
8067 if (ifp
== NULL
|| len
== NULL
|| data
== NULL
)
8072 if_inetdata_lock_shared(ifp
);
8073 if (IN_IFEXTRA(ifp
) != NULL
) {
8074 if (*len
== 0 || *len
< IN_IFEXTRA(ifp
)->netsig_len
) {
8076 if_inetdata_lock_done(ifp
);
8079 if ((*len
= IN_IFEXTRA(ifp
)->netsig_len
) > 0)
8080 bcopy(IN_IFEXTRA(ifp
)->netsig
, data
, *len
);
8086 if_inetdata_lock_done(ifp
);
8090 if_inet6data_lock_shared(ifp
);
8091 if (IN6_IFEXTRA(ifp
) != NULL
) {
8092 if (*len
== 0 || *len
< IN6_IFEXTRA(ifp
)->netsig_len
) {
8094 if_inet6data_lock_done(ifp
);
8097 if ((*len
= IN6_IFEXTRA(ifp
)->netsig_len
) > 0)
8098 bcopy(IN6_IFEXTRA(ifp
)->netsig
, data
, *len
);
8104 if_inet6data_lock_done(ifp
);
8112 if (error
== 0 && flags
!= NULL
)
8120 ifnet_set_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
8122 int i
, error
= 0, one_set
= 0;
8124 if_inet6data_lock_exclusive(ifp
);
8126 if (IN6_IFEXTRA(ifp
) == NULL
) {
8131 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
8132 uint32_t prefix_len
=
8133 prefixes
[i
].prefix_len
;
8134 struct in6_addr
*prefix
=
8135 &prefixes
[i
].ipv6_prefix
;
8137 if (prefix_len
== 0) {
8138 /* Allow clearing the signature */
8139 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= 0;
8140 bzero(&IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
8141 sizeof(struct in6_addr
));
8144 } else if (prefix_len
!= NAT64_PREFIX_LEN_32
&&
8145 prefix_len
!= NAT64_PREFIX_LEN_40
&&
8146 prefix_len
!= NAT64_PREFIX_LEN_48
&&
8147 prefix_len
!= NAT64_PREFIX_LEN_56
&&
8148 prefix_len
!= NAT64_PREFIX_LEN_64
&&
8149 prefix_len
!= NAT64_PREFIX_LEN_96
) {
8154 if (IN6_IS_SCOPE_EMBED(prefix
)) {
8159 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= prefix_len
;
8160 bcopy(prefix
, &IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
8161 sizeof(struct in6_addr
));
8166 if_inet6data_lock_done(ifp
);
8168 if (error
== 0 && one_set
!= 0)
8169 necp_update_all_clients();
8175 ifnet_get_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
8177 int i
, found_one
= 0, error
= 0;
8182 if_inet6data_lock_shared(ifp
);
8184 if (IN6_IFEXTRA(ifp
) == NULL
) {
8189 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
8190 if (IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
!= 0)
8194 if (found_one
== 0) {
8200 bcopy(IN6_IFEXTRA(ifp
)->nat64_prefixes
, prefixes
,
8201 sizeof(IN6_IFEXTRA(ifp
)->nat64_prefixes
));
8204 if_inet6data_lock_done(ifp
);
8211 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
8212 protocol_family_t pf
)
8217 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
8218 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
|CSUM_TSO_IPV6
)))
8223 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
8224 if (did_sw
& CSUM_DELAY_IP
)
8225 hwcksum_dbg_finalized_hdr
++;
8226 if (did_sw
& CSUM_DELAY_DATA
)
8227 hwcksum_dbg_finalized_data
++;
8232 * Checksum offload should not have been enabled when
8233 * extension headers exist; that also means that we
8234 * cannot force-finalize packets with extension headers.
8235 * Indicate to the callee should it skip such case by
8236 * setting optlen to -1.
8238 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
8239 m
->m_pkthdr
.csum_flags
);
8240 if (did_sw
& CSUM_DELAY_IPV6_DATA
)
8241 hwcksum_dbg_finalized_data
++;
8250 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
8251 protocol_family_t pf
)
8256 if (frame_header
== NULL
||
8257 frame_header
< (char *)mbuf_datastart(m
) ||
8258 frame_header
> (char *)m
->m_data
) {
8259 printf("%s: frame header pointer 0x%llx out of range "
8260 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
8261 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
8262 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
8263 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
8264 (uint64_t)VM_KERNEL_ADDRPERM(m
));
8267 hlen
= (m
->m_data
- frame_header
);
8280 * Force partial checksum offload; useful to simulate cases
8281 * where the hardware does not support partial checksum offload,
8282 * in order to validate correctness throughout the layers above.
8284 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
8285 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
8287 if (foff
> (uint32_t)m
->m_pkthdr
.len
)
8290 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
8292 /* Compute 16-bit 1's complement sum from forced offset */
8293 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
8295 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
8296 m
->m_pkthdr
.csum_rx_val
= sum
;
8297 m
->m_pkthdr
.csum_rx_start
= (foff
+ hlen
);
8299 hwcksum_dbg_partial_forced
++;
8300 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
8304 * Partial checksum offload verification (and adjustment);
8305 * useful to validate and test cases where the hardware
8306 * supports partial checksum offload.
8308 if ((m
->m_pkthdr
.csum_flags
&
8309 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
8310 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
8313 /* Start offset must begin after frame header */
8314 rxoff
= m
->m_pkthdr
.csum_rx_start
;
8316 hwcksum_dbg_bad_rxoff
++;
8318 printf("%s: partial cksum start offset %d "
8319 "is less than frame header length %d for "
8320 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
8321 (uint64_t)VM_KERNEL_ADDRPERM(m
));
8327 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
8329 * Compute the expected 16-bit 1's complement sum;
8330 * skip this if we've already computed it above
8331 * when partial checksum offload is forced.
8333 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
8335 /* Hardware or driver is buggy */
8336 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
8337 hwcksum_dbg_bad_cksum
++;
8339 printf("%s: bad partial cksum value "
8340 "0x%x (expected 0x%x) for mbuf "
8341 "0x%llx [rx_start %d]\n",
8343 m
->m_pkthdr
.csum_rx_val
, sum
,
8344 (uint64_t)VM_KERNEL_ADDRPERM(m
),
8345 m
->m_pkthdr
.csum_rx_start
);
8350 hwcksum_dbg_verified
++;
8353 * This code allows us to emulate various hardwares that
8354 * perform 16-bit 1's complement sum beginning at various
8355 * start offset values.
8357 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
8358 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
8360 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
)
8363 sum
= m_adj_sum16(m
, rxoff
, aoff
,
8364 m_pktlen(m
) - aoff
, sum
);
8366 m
->m_pkthdr
.csum_rx_val
= sum
;
8367 m
->m_pkthdr
.csum_rx_start
= (aoff
+ hlen
);
8369 hwcksum_dbg_adjusted
++;
8375 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8377 #pragma unused(arg1, arg2)
8381 i
= hwcksum_dbg_mode
;
8383 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8384 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8387 if (hwcksum_dbg
== 0)
8390 if ((i
& ~HWCKSUM_DBG_MASK
) != 0)
8393 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
8399 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8401 #pragma unused(arg1, arg2)
8405 i
= hwcksum_dbg_partial_rxoff_forced
;
8407 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8408 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8411 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
))
8414 hwcksum_dbg_partial_rxoff_forced
= i
;
8420 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8422 #pragma unused(arg1, arg2)
8426 i
= hwcksum_dbg_partial_rxoff_adj
;
8428 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8429 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8432 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
))
8435 hwcksum_dbg_partial_rxoff_adj
= i
;
8441 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8443 #pragma unused(oidp, arg1, arg2)
8446 if (req
->oldptr
== USER_ADDR_NULL
) {
8449 if (req
->newptr
!= USER_ADDR_NULL
) {
8452 err
= SYSCTL_OUT(req
, &tx_chain_len_stats
,
8453 sizeof(struct chain_len_stats
));
8459 #if DEBUG || DEVELOPMENT
8460 /* Blob for sum16 verification */
8461 static uint8_t sumdata
[] = {
8462 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8463 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8464 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8465 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8466 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8467 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8468 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8469 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8470 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8471 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8472 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8473 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8474 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8475 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8476 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8477 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8478 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8479 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8480 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8481 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8482 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8483 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8484 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8485 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8486 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8487 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8488 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8489 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8490 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8491 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8492 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8493 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8494 0xc8, 0x28, 0x02, 0x00, 0x00
8497 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
8501 uint16_t sumr
; /* reference */
8502 uint16_t sumrp
; /* reference, precomputed */
8504 { FALSE
, 0, 0, 0x0000 },
8505 { FALSE
, 1, 0, 0x001f },
8506 { FALSE
, 2, 0, 0x8b1f },
8507 { FALSE
, 3, 0, 0x8b27 },
8508 { FALSE
, 7, 0, 0x790e },
8509 { FALSE
, 11, 0, 0xcb6d },
8510 { FALSE
, 20, 0, 0x20dd },
8511 { FALSE
, 27, 0, 0xbabd },
8512 { FALSE
, 32, 0, 0xf3e8 },
8513 { FALSE
, 37, 0, 0x197d },
8514 { FALSE
, 43, 0, 0x9eae },
8515 { FALSE
, 64, 0, 0x4678 },
8516 { FALSE
, 127, 0, 0x9399 },
8517 { FALSE
, 256, 0, 0xd147 },
8518 { FALSE
, 325, 0, 0x0358 },
8520 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8523 dlil_verify_sum16(void)
8529 /* Make sure test data plus extra room for alignment fits in cluster */
8530 _CASSERT((sizeof (sumdata
) + (sizeof (uint64_t) * 2)) <= MCLBYTES
);
8532 kprintf("DLIL: running SUM16 self-tests ... ");
8534 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
8535 MH_ALIGN(m
, sizeof (uint32_t)); /* 32-bit starting alignment */
8536 buf
= mtod(m
, uint8_t *); /* base address */
8538 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
8539 uint16_t len
= sumtbl
[n
].len
;
8542 /* Verify for all possible alignments */
8543 for (i
= 0; i
< (int)sizeof (uint64_t); i
++) {
8547 /* Copy over test data to mbuf */
8548 VERIFY(len
<= sizeof (sumdata
));
8550 bcopy(sumdata
, c
, len
);
8552 /* Zero-offset test (align by data pointer) */
8553 m
->m_data
= (caddr_t
)c
;
8555 sum
= m_sum16(m
, 0, len
);
8557 if (!sumtbl
[n
].init
) {
8558 sumr
= in_cksum_mbuf_ref(m
, len
, 0, 0);
8559 sumtbl
[n
].sumr
= sumr
;
8560 sumtbl
[n
].init
= TRUE
;
8562 sumr
= sumtbl
[n
].sumr
;
8565 /* Something is horribly broken; stop now */
8566 if (sumr
!= sumtbl
[n
].sumrp
) {
8567 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
8568 "for len=%d align=%d sum=0x%04x "
8569 "[expected=0x%04x]\n", __func__
,
8572 } else if (sum
!= sumr
) {
8573 panic_plain("\n%s: broken m_sum16() for len=%d "
8574 "align=%d sum=0x%04x [expected=0x%04x]\n",
8575 __func__
, len
, i
, sum
, sumr
);
8579 /* Alignment test by offset (fixed data pointer) */
8580 m
->m_data
= (caddr_t
)buf
;
8582 sum
= m_sum16(m
, i
, len
);
8584 /* Something is horribly broken; stop now */
8586 panic_plain("\n%s: broken m_sum16() for len=%d "
8587 "offset=%d sum=0x%04x [expected=0x%04x]\n",
8588 __func__
, len
, i
, sum
, sumr
);
8592 /* Simple sum16 contiguous buffer test by aligment */
8593 sum
= b_sum16(c
, len
);
8595 /* Something is horribly broken; stop now */
8597 panic_plain("\n%s: broken b_sum16() for len=%d "
8598 "align=%d sum=0x%04x [expected=0x%04x]\n",
8599 __func__
, len
, i
, sum
, sumr
);
8607 kprintf("PASSED\n");
8609 #endif /* DEBUG || DEVELOPMENT */
8611 #define CASE_STRINGIFY(x) case x: return #x
8613 __private_extern__
const char *
8614 dlil_kev_dl_code_str(u_int32_t event_code
)
8616 switch (event_code
) {
8617 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
8618 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
8619 CASE_STRINGIFY(KEV_DL_SIFMTU
);
8620 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
8621 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
8622 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
8623 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
8624 CASE_STRINGIFY(KEV_DL_DELMULTI
);
8625 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
8626 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
8627 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
8628 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
8629 CASE_STRINGIFY(KEV_DL_LINK_ON
);
8630 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
8631 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
8632 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
8633 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
8634 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
8635 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
8636 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
8637 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
8638 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
8639 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
8640 CASE_STRINGIFY(KEV_DL_ISSUES
);
8641 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);
8649 * Mirror the arguments of ifnet_get_local_ports_extended()
8655 sysctl_get_ports_used SYSCTL_HANDLER_ARGS
8657 #pragma unused(oidp)
8658 int *name
= (int *)arg1
;
8662 protocol_family_t protocol
;
8665 u_int8_t
*bitfield
= NULL
;
8667 if (req
->newptr
!= USER_ADDR_NULL
) {
8676 if (req
->oldptr
== USER_ADDR_NULL
) {
8677 req
->oldidx
= bitstr_size(65536);
8680 if (req
->oldlen
< bitstr_size(65536)) {
8689 ifnet_head_lock_shared();
8690 if (!IF_INDEX_IN_RANGE(idx
)) {
8695 ifp
= ifindex2ifnet
[idx
];
8698 bitfield
= _MALLOC(bitstr_size(65536), M_TEMP
, M_WAITOK
| M_ZERO
);
8699 if (bitfield
== NULL
) {
8703 error
= ifnet_get_local_ports_extended(ifp
, protocol
, flags
, bitfield
);
8705 printf("%s: ifnet_get_local_ports_extended() error %d\n",
8709 error
= SYSCTL_OUT(req
, bitfield
, bitstr_size(65536));
8711 if (bitfield
!= NULL
)
8712 _FREE(bitfield
, M_TEMP
);
8717 dlil_dt_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
8719 #pragma unused(arg1)
8720 struct ifnet
*ifp
= arg0
;
8722 if (ifnet_is_attached(ifp
, 1)) {
8723 nstat_ifnet_threshold_reached(ifp
->if_index
);
8724 ifnet_decr_iorefcnt(ifp
);
8729 ifnet_notify_data_threshold(struct ifnet
*ifp
)
8731 uint64_t bytes
= (ifp
->if_ibytes
+ ifp
->if_obytes
);
8732 uint64_t oldbytes
= ifp
->if_dt_bytes
;
8734 ASSERT(ifp
->if_dt_tcall
!= NULL
);
8737 * If we went over the threshold, notify NetworkStatistics.
8738 * We rate-limit it based on the threshold interval value.
8740 if (threshold_notify
&& (bytes
- oldbytes
) > ifp
->if_data_threshold
&&
8741 OSCompareAndSwap64(oldbytes
, bytes
, &ifp
->if_dt_bytes
) &&
8742 !thread_call_isactive(ifp
->if_dt_tcall
)) {
8743 uint64_t tival
= (threshold_interval
* NSEC_PER_SEC
);
8744 uint64_t now
= mach_absolute_time(), deadline
= now
;
8748 nanoseconds_to_absolutetime(tival
, &ival
);
8749 clock_deadline_for_periodic_event(ival
, now
, &deadline
);
8750 (void) thread_call_enter_delayed(ifp
->if_dt_tcall
,
8753 (void) thread_call_enter(ifp
->if_dt_tcall
);
8758 #if (DEVELOPMENT || DEBUG)
8760 * The sysctl variable name contains the input parameters of
8761 * ifnet_get_keepalive_offload_frames()
8762 * ifp (interface index): name[0]
8763 * frames_array_count: name[1]
8764 * frame_data_offset: name[2]
8765 * The return length gives used_frames_count
8768 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8770 #pragma unused(oidp)
8771 int *name
= (int *)arg1
;
8772 u_int namelen
= arg2
;
8775 u_int32_t frames_array_count
;
8776 size_t frame_data_offset
;
8777 u_int32_t used_frames_count
;
8778 struct ifnet_keepalive_offload_frame
*frames_array
= NULL
;
8783 * Only root can get look at other people TCP frames
8785 error
= proc_suser(current_proc());
8789 * Validate the input parameters
8791 if (req
->newptr
!= USER_ADDR_NULL
) {
8799 if (req
->oldptr
== USER_ADDR_NULL
) {
8803 if (req
->oldlen
== 0) {
8808 frames_array_count
= name
[1];
8809 frame_data_offset
= name
[2];
8811 /* Make sure the passed buffer is large enough */
8812 if (frames_array_count
* sizeof(struct ifnet_keepalive_offload_frame
) >
8818 ifnet_head_lock_shared();
8819 if (!IF_INDEX_IN_RANGE(idx
)) {
8824 ifp
= ifindex2ifnet
[idx
];
8827 frames_array
= _MALLOC(frames_array_count
*
8828 sizeof(struct ifnet_keepalive_offload_frame
), M_TEMP
, M_WAITOK
);
8829 if (frames_array
== NULL
) {
8834 error
= ifnet_get_keepalive_offload_frames(ifp
, frames_array
,
8835 frames_array_count
, frame_data_offset
, &used_frames_count
);
8837 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8842 for (i
= 0; i
< used_frames_count
; i
++) {
8843 error
= SYSCTL_OUT(req
, frames_array
+ i
,
8844 sizeof(struct ifnet_keepalive_offload_frame
));
8850 if (frames_array
!= NULL
)
8851 _FREE(frames_array
, M_TEMP
);
8854 #endif /* DEVELOPMENT || DEBUG */
8857 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow
*ifs
,
8860 tcp_update_stats_per_flow(ifs
, ifp
);
8864 dlil_mit_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
8866 #pragma unused(arg1)
8867 struct ifnet
*ifp
= (struct ifnet
*)arg0
;
8868 struct dlil_threading_info
*inp
= ifp
->if_inp
;
8870 ifnet_lock_shared(ifp
);
8871 if (!IF_FULLY_ATTACHED(ifp
) || inp
== NULL
) {
8872 ifnet_lock_done(ifp
);
8876 lck_mtx_lock_spin(&inp
->input_lck
);
8877 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
8878 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
) ||
8879 !qempty(&inp
->rcvq_pkts
)) {
8881 wakeup_one((caddr_t
)&inp
->input_waiting
);
8883 lck_mtx_unlock(&inp
->input_lck
);
8884 ifnet_lock_done(ifp
);