2 * Copyright (c) 1999-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_llreach.h>
71 #include <net/kpi_interfacefilter.h>
72 #include <net/classq/classq.h>
73 #include <net/classq/classq_sfb.h>
74 #include <net/flowhash.h>
75 #include <net/ntstat.h>
76 #include <net/if_llatbl.h>
77 #include <net/net_api_stats.h>
80 #include <netinet/in_var.h>
81 #include <netinet/igmp_var.h>
82 #include <netinet/ip_var.h>
83 #include <netinet/tcp.h>
84 #include <netinet/tcp_var.h>
85 #include <netinet/udp.h>
86 #include <netinet/udp_var.h>
87 #include <netinet/if_ether.h>
88 #include <netinet/in_pcb.h>
89 #include <netinet/in_tclass.h>
93 #include <netinet6/in6_var.h>
94 #include <netinet6/nd6.h>
95 #include <netinet6/mld6_var.h>
96 #include <netinet6/scope6_var.h>
99 #include <libkern/OSAtomic.h>
100 #include <libkern/tree.h>
102 #include <dev/random/randomdev.h>
103 #include <machine/machine_routines.h>
105 #include <mach/thread_act.h>
106 #include <mach/sdt.h>
109 #include <sys/kauth.h>
110 #include <security/mac_framework.h>
111 #include <net/ethernet.h>
112 #include <net/firewire.h>
116 #include <net/pfvar.h>
118 #include <net/pktsched/pktsched.h>
121 #include <net/necp.h>
125 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
126 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
127 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
128 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
129 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
131 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
132 #define MAX_LINKADDR 4 /* LONGWORDS */
133 #define M_NKE M_IFADDR
136 #define DLIL_PRINTF printf
138 #define DLIL_PRINTF kprintf
141 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
142 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
144 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
145 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
153 * List of if_proto structures in if_proto_hash[] is protected by
154 * the ifnet lock. The rest of the fields are initialized at protocol
155 * attach time and never change, thus no lock required as long as
156 * a reference to it is valid, via if_proto_ref().
159 SLIST_ENTRY(if_proto
) next_hash
;
163 protocol_family_t protocol_family
;
167 proto_media_input input
;
168 proto_media_preout pre_output
;
169 proto_media_event event
;
170 proto_media_ioctl ioctl
;
171 proto_media_detached detached
;
172 proto_media_resolve_multi resolve_multi
;
173 proto_media_send_arp send_arp
;
176 proto_media_input_v2 input
;
177 proto_media_preout pre_output
;
178 proto_media_event event
;
179 proto_media_ioctl ioctl
;
180 proto_media_detached detached
;
181 proto_media_resolve_multi resolve_multi
;
182 proto_media_send_arp send_arp
;
187 SLIST_HEAD(proto_hash_entry
, if_proto
);
189 #define DLIL_SDLDATALEN \
190 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
193 struct ifnet dl_if
; /* public ifnet */
195 * DLIL private fields, protected by dl_if_lock
197 decl_lck_mtx_data(, dl_if_lock
);
198 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
199 u_int32_t dl_if_flags
; /* flags (below) */
200 u_int32_t dl_if_refcnt
; /* refcnt */
201 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
202 void *dl_if_uniqueid
; /* unique interface id */
203 size_t dl_if_uniqueid_len
; /* length of the unique id */
204 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
205 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
207 struct ifaddr ifa
; /* lladdr ifa */
208 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
209 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
211 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
212 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
213 ctrace_t dl_if_attach
; /* attach PC stacktrace */
214 ctrace_t dl_if_detach
; /* detach PC stacktrace */
217 /* Values for dl_if_flags (private to DLIL) */
218 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
219 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
220 #define DLIF_DEBUG 0x4 /* has debugging info */
222 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
225 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
227 struct dlil_ifnet_dbg
{
228 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
229 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
230 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
232 * Circular lists of ifnet_{reference,release} callers.
234 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
235 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
238 #define DLIL_TO_IFP(s) (&s->dl_if)
239 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
241 struct ifnet_filter
{
242 TAILQ_ENTRY(ifnet_filter
) filt_next
;
244 u_int32_t filt_flags
;
246 const char *filt_name
;
248 protocol_family_t filt_protocol
;
249 iff_input_func filt_input
;
250 iff_output_func filt_output
;
251 iff_event_func filt_event
;
252 iff_ioctl_func filt_ioctl
;
253 iff_detached_func filt_detached
;
256 struct proto_input_entry
;
258 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
259 static lck_grp_t
*dlil_lock_group
;
260 lck_grp_t
*ifnet_lock_group
;
261 static lck_grp_t
*ifnet_head_lock_group
;
262 static lck_grp_t
*ifnet_snd_lock_group
;
263 static lck_grp_t
*ifnet_rcv_lock_group
;
264 lck_attr_t
*ifnet_lock_attr
;
265 decl_lck_rw_data(static, ifnet_head_lock
);
266 decl_lck_mtx_data(static, dlil_ifnet_lock
);
267 u_int32_t dlil_filter_disable_tso_count
= 0;
270 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
272 static unsigned int ifnet_debug
; /* debugging (disabled) */
274 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
275 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
276 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
278 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
279 #define DLIF_ZONE_NAME "ifnet" /* zone name */
281 static unsigned int dlif_filt_size
; /* size of ifnet_filter */
282 static struct zone
*dlif_filt_zone
; /* zone for ifnet_filter */
284 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
285 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
287 static unsigned int dlif_phash_size
; /* size of ifnet proto hash table */
288 static struct zone
*dlif_phash_zone
; /* zone for ifnet proto hash table */
290 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
291 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
293 static unsigned int dlif_proto_size
; /* size of if_proto */
294 static struct zone
*dlif_proto_zone
; /* zone for if_proto */
296 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
297 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
299 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
300 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
301 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
303 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
304 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
306 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
307 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
308 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
310 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
311 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
313 static u_int32_t net_rtref
;
315 static struct dlil_main_threading_info dlil_main_input_thread_info
;
316 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
317 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
319 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
, bool update_generation
);
320 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
321 static void dlil_if_trace(struct dlil_ifnet
*, int);
322 static void if_proto_ref(struct if_proto
*);
323 static void if_proto_free(struct if_proto
*);
324 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
325 static int dlil_ifp_proto_count(struct ifnet
*);
326 static void if_flt_monitor_busy(struct ifnet
*);
327 static void if_flt_monitor_unbusy(struct ifnet
*);
328 static void if_flt_monitor_enter(struct ifnet
*);
329 static void if_flt_monitor_leave(struct ifnet
*);
330 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
331 char **, protocol_family_t
);
332 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
334 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
335 const struct sockaddr_dl
*);
336 static int ifnet_lookup(struct ifnet
*);
337 static void if_purgeaddrs(struct ifnet
*);
339 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
340 struct mbuf
*, char *);
341 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
343 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
344 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
345 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
346 const struct kev_msg
*);
347 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
348 unsigned long, void *);
349 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
350 struct sockaddr_dl
*, size_t);
351 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
352 const struct sockaddr_dl
*, const struct sockaddr
*,
353 const struct sockaddr_dl
*, const struct sockaddr
*);
355 static errno_t
ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
356 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
357 boolean_t poll
, struct thread
*tp
);
358 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
359 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
360 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
361 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
362 protocol_family_t
*);
363 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
364 const struct ifnet_demux_desc
*, u_int32_t
);
365 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
366 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
368 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
369 const struct sockaddr
*, const char *, const char *,
370 u_int32_t
*, u_int32_t
*);
372 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
373 const struct sockaddr
*, const char *, const char *);
374 #endif /* CONFIG_EMBEDDED */
375 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
376 const struct sockaddr
*, const char *, const char *,
377 u_int32_t
*, u_int32_t
*);
378 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
379 static void ifp_if_free(struct ifnet
*);
380 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
381 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
382 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
384 static void dlil_main_input_thread_func(void *, wait_result_t
);
385 static void dlil_input_thread_func(void *, wait_result_t
);
386 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
387 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*);
388 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
389 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
390 struct dlil_threading_info
*, boolean_t
);
391 static void dlil_input_stats_sync(struct ifnet
*, struct dlil_threading_info
*);
392 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
393 u_int32_t
, ifnet_model_t
, boolean_t
);
394 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
395 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
397 #if DEBUG || DEVELOPMENT
398 static void dlil_verify_sum16(void);
399 #endif /* DEBUG || DEVELOPMENT */
400 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
402 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
405 static void ifnet_detacher_thread_func(void *, wait_result_t
);
406 static int ifnet_detacher_thread_cont(int);
407 static void ifnet_detach_final(struct ifnet
*);
408 static void ifnet_detaching_enqueue(struct ifnet
*);
409 static struct ifnet
*ifnet_detaching_dequeue(void);
411 static void ifnet_start_thread_fn(void *, wait_result_t
);
412 static void ifnet_poll_thread_fn(void *, wait_result_t
);
413 static void ifnet_poll(struct ifnet
*);
414 static errno_t
ifnet_enqueue_common(struct ifnet
*, void *,
415 classq_pkt_type_t
, boolean_t
, boolean_t
*);
417 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
418 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
420 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
421 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
424 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
425 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
426 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
427 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
428 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
429 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
430 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
431 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
432 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
433 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
434 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
435 static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS
;
437 struct chain_len_stats tx_chain_len_stats
;
438 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
;
440 #if TEST_INPUT_THREAD_TERMINATION
441 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
;
442 #endif /* TEST_INPUT_THREAD_TERMINATION */
444 /* The following are protected by dlil_ifnet_lock */
445 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
446 static u_int32_t ifnet_detaching_cnt
;
447 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
449 decl_lck_mtx_data(static, ifnet_fc_lock
);
451 static uint32_t ifnet_flowhash_seed
;
453 struct ifnet_flowhash_key
{
454 char ifk_name
[IFNAMSIZ
];
458 uint32_t ifk_capabilities
;
459 uint32_t ifk_capenable
;
460 uint32_t ifk_output_sched_model
;
465 /* Flow control entry per interface */
466 struct ifnet_fc_entry
{
467 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
468 u_int32_t ifce_flowhash
;
469 struct ifnet
*ifce_ifp
;
472 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
473 static int ifce_cmp(const struct ifnet_fc_entry
*,
474 const struct ifnet_fc_entry
*);
475 static int ifnet_fc_add(struct ifnet
*);
476 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
477 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
479 /* protected by ifnet_fc_lock */
480 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
481 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
482 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
484 static unsigned int ifnet_fc_zone_size
; /* sizeof ifnet_fc_entry */
485 static struct zone
*ifnet_fc_zone
; /* ifnet_fc_entry zone */
487 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
488 #define IFNET_FC_ZONE_MAX 32
490 extern void bpfdetach(struct ifnet
*);
491 extern void proto_input_run(void);
493 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
495 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
498 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
501 #ifdef CONFIG_EMBEDDED
502 int dlil_lladdr_ckreq
= 1;
504 int dlil_lladdr_ckreq
= 0;
509 int dlil_verbose
= 1;
511 int dlil_verbose
= 0;
513 #if IFNET_INPUT_SANITY_CHK
514 /* sanity checking of input packet lists received */
515 static u_int32_t dlil_input_sanity_check
= 0;
516 #endif /* IFNET_INPUT_SANITY_CHK */
517 /* rate limit debug messages */
518 struct timespec dlil_dbgrate
= { 1, 0 };
520 SYSCTL_DECL(_net_link_generic_system
);
523 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_lladdr_ckreq
,
524 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_lladdr_ckreq
, 0,
525 "Require MACF system info check to expose link-layer address");
528 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
529 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
531 #define IF_SNDQ_MINLEN 32
532 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
533 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
534 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
535 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
537 #define IF_RCVQ_MINLEN 32
538 #define IF_RCVQ_MAXLEN 256
539 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
540 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
541 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
542 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
544 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
545 static u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
546 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
547 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
548 "ilog2 of EWMA decay rate of avg inbound packets");
550 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
551 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
552 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
553 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
554 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
555 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
556 "Q", "input poll mode freeze time");
558 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
559 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
560 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
561 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
562 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
563 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
564 "Q", "input poll sampling time");
566 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
567 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
568 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
569 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
570 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
571 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
572 "Q", "input poll interval (time)");
574 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
575 static u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
576 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
577 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
578 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
580 #define IF_RXPOLL_WLOWAT 10
581 static u_int32_t if_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
582 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
583 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_wlowat
,
584 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
585 "I", "input poll wakeup low watermark");
587 #define IF_RXPOLL_WHIWAT 100
588 static u_int32_t if_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
589 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
590 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_whiwat
,
591 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
592 "I", "input poll wakeup high watermark");
594 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
595 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
596 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
597 "max packets per poll call");
599 static u_int32_t if_rxpoll
= 1;
600 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
601 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
602 sysctl_rxpoll
, "I", "enable opportunistic input polling");
604 #if TEST_INPUT_THREAD_TERMINATION
605 static u_int32_t if_input_thread_termination_spin
= 0;
606 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, input_thread_termination_spin
,
607 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
608 &if_input_thread_termination_spin
, 0,
609 sysctl_input_thread_termination_spin
,
610 "I", "input thread termination spin limit");
611 #endif /* TEST_INPUT_THREAD_TERMINATION */
613 static u_int32_t cur_dlil_input_threads
= 0;
614 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
615 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
616 "Current number of DLIL input threads");
618 #if IFNET_INPUT_SANITY_CHK
619 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
620 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
621 "Turn on sanity checking in DLIL input");
622 #endif /* IFNET_INPUT_SANITY_CHK */
624 static u_int32_t if_flowadv
= 1;
625 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
626 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
627 "enable flow-advisory mechanism");
629 static u_int32_t if_delaybased_queue
= 1;
630 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
631 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
632 "enable delay based dynamic queue sizing");
634 static uint64_t hwcksum_in_invalidated
= 0;
635 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
636 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
637 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
639 uint32_t hwcksum_dbg
= 0;
640 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
641 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
642 "enable hardware cksum debugging");
644 u_int32_t ifnet_start_delayed
= 0;
645 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delayed
,
646 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_start_delayed
, 0,
647 "number of times start was delayed");
649 u_int32_t ifnet_delay_start_disabled
= 0;
650 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delay_disabled
,
651 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_delay_start_disabled
, 0,
652 "number of times start was delayed");
654 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
655 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
656 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
657 #define HWCKSUM_DBG_MASK \
658 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
659 HWCKSUM_DBG_FINALIZE_FORCED)
661 static uint32_t hwcksum_dbg_mode
= 0;
662 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
663 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
664 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
666 static uint64_t hwcksum_dbg_partial_forced
= 0;
667 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
668 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
669 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
671 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
672 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
673 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
674 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
676 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
677 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
678 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
679 &hwcksum_dbg_partial_rxoff_forced
, 0,
680 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
681 "forced partial cksum rx offset");
683 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
684 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
685 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
686 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
687 "adjusted partial cksum rx offset");
689 static uint64_t hwcksum_dbg_verified
= 0;
690 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
691 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
692 &hwcksum_dbg_verified
, "packets verified for having good checksum");
694 static uint64_t hwcksum_dbg_bad_cksum
= 0;
695 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
696 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
697 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
699 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
700 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
701 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
702 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
704 static uint64_t hwcksum_dbg_adjusted
= 0;
705 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
706 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
707 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
709 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
710 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
711 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
712 &hwcksum_dbg_finalized_hdr
, "finalized headers");
714 static uint64_t hwcksum_dbg_finalized_data
= 0;
715 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
716 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
717 &hwcksum_dbg_finalized_data
, "finalized payloads");
719 uint32_t hwcksum_tx
= 1;
720 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
721 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
722 "enable transmit hardware checksum offload");
724 uint32_t hwcksum_rx
= 1;
725 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
726 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
727 "enable receive hardware checksum offload");
729 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, tx_chain_len_stats
,
730 CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 9,
731 sysctl_tx_chain_len_stats
, "S", "");
733 uint32_t tx_chain_len_count
= 0;
734 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, tx_chain_len_count
,
735 CTLFLAG_RW
| CTLFLAG_LOCKED
, &tx_chain_len_count
, 0, "");
737 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_ports_used
,
738 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_ports_used
, "");
740 static uint32_t threshold_notify
= 1; /* enable/disable */
741 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_notify
,
742 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_notify
, 0, "");
744 static uint32_t threshold_interval
= 2; /* in seconds */
745 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_interval
,
746 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_interval
, 0, "");
748 #if (DEVELOPMENT || DEBUG)
749 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
;
750 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_kao_frames
,
751 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_kao_frames
, "");
752 #endif /* DEVELOPMENT || DEBUG */
754 struct net_api_stats net_api_stats
;
755 SYSCTL_STRUCT(_net
, OID_AUTO
, api_stats
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
756 &net_api_stats
, net_api_stats
, "");
759 unsigned int net_rxpoll
= 1;
760 unsigned int net_affinity
= 1;
761 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
763 extern u_int32_t inject_buckets
;
765 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
766 static lck_attr_t
*dlil_lck_attributes
= NULL
;
768 /* DLIL data threshold thread call */
769 static void dlil_dt_tcall_fn(thread_call_param_t
, thread_call_param_t
);
771 static void dlil_mit_tcall_fn(thread_call_param_t
, thread_call_param_t
);
773 uint32_t dlil_rcv_mit_pkts_min
= 5;
774 uint32_t dlil_rcv_mit_pkts_max
= 64;
775 uint32_t dlil_rcv_mit_interval
= (500 * 1000);
777 #if (DEVELOPMENT || DEBUG)
778 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_pkts_min
,
779 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_pkts_min
, 0, "");
780 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_pkts_max
,
781 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_pkts_max
, 0, "");
782 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_interval
,
783 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_interval
, 0, "");
784 #endif /* DEVELOPMENT || DEBUG */
787 #define DLIL_INPUT_CHECK(m, ifp) { \
788 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
789 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
790 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
791 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
796 #define DLIL_EWMA(old, new, decay) do { \
798 if ((_avg = (old)) > 0) \
799 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
805 #define MBPS (1ULL * 1000 * 1000)
806 #define GBPS (MBPS * 1000)
808 struct rxpoll_time_tbl
{
809 u_int64_t speed
; /* downlink speed */
810 u_int32_t plowat
; /* packets low watermark */
811 u_int32_t phiwat
; /* packets high watermark */
812 u_int32_t blowat
; /* bytes low watermark */
813 u_int32_t bhiwat
; /* bytes high watermark */
816 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
817 { 10 * MBPS
, 2, 8, (1 * 1024), (6 * 1024) },
818 { 100 * MBPS
, 10, 40, (4 * 1024), (64 * 1024) },
819 { 1 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
820 { 10 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
821 { 100 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
826 proto_hash_value(u_int32_t protocol_family
)
829 * dlil_proto_unplumb_all() depends on the mapping between
830 * the hash bucket index and the protocol family defined
831 * here; future changes must be applied there as well.
833 switch (protocol_family
) {
847 * Caller must already be holding ifnet lock.
849 static struct if_proto
*
850 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
852 struct if_proto
*proto
= NULL
;
853 u_int32_t i
= proto_hash_value(protocol_family
);
855 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
857 if (ifp
->if_proto_hash
!= NULL
)
858 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
860 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
)
861 proto
= SLIST_NEXT(proto
, next_hash
);
870 if_proto_ref(struct if_proto
*proto
)
872 atomic_add_32(&proto
->refcount
, 1);
875 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
878 if_proto_free(struct if_proto
*proto
)
881 struct ifnet
*ifp
= proto
->ifp
;
882 u_int32_t proto_family
= proto
->protocol_family
;
883 struct kev_dl_proto_data ev_pr_data
;
885 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
889 /* No more reference on this, protocol must have been detached */
890 VERIFY(proto
->detached
);
892 if (proto
->proto_kpi
== kProtoKPI_v1
) {
893 if (proto
->kpi
.v1
.detached
)
894 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
896 if (proto
->proto_kpi
== kProtoKPI_v2
) {
897 if (proto
->kpi
.v2
.detached
)
898 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
902 * Cleanup routes that may still be in the routing table for that
903 * interface/protocol pair.
905 if_rtproto_del(ifp
, proto_family
);
908 * The reserved field carries the number of protocol still attached
909 * (subject to change)
911 ifnet_lock_shared(ifp
);
912 ev_pr_data
.proto_family
= proto_family
;
913 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
914 ifnet_lock_done(ifp
);
916 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
917 (struct net_event_data
*)&ev_pr_data
,
918 sizeof (struct kev_dl_proto_data
));
920 zfree(dlif_proto_zone
, proto
);
923 __private_extern__
void
924 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
929 unsigned int type
= 0;
933 case IFNET_LCK_ASSERT_EXCLUSIVE
:
934 type
= LCK_RW_ASSERT_EXCLUSIVE
;
937 case IFNET_LCK_ASSERT_SHARED
:
938 type
= LCK_RW_ASSERT_SHARED
;
941 case IFNET_LCK_ASSERT_OWNED
:
942 type
= LCK_RW_ASSERT_HELD
;
945 case IFNET_LCK_ASSERT_NOTOWNED
:
946 /* nothing to do here for RW lock; bypass assert */
951 panic("bad ifnet assert type: %d", what
);
955 LCK_RW_ASSERT(&ifp
->if_lock
, type
);
958 __private_extern__
void
959 ifnet_lock_shared(struct ifnet
*ifp
)
961 lck_rw_lock_shared(&ifp
->if_lock
);
964 __private_extern__
void
965 ifnet_lock_exclusive(struct ifnet
*ifp
)
967 lck_rw_lock_exclusive(&ifp
->if_lock
);
970 __private_extern__
void
971 ifnet_lock_done(struct ifnet
*ifp
)
973 lck_rw_done(&ifp
->if_lock
);
977 __private_extern__
void
978 if_inetdata_lock_shared(struct ifnet
*ifp
)
980 lck_rw_lock_shared(&ifp
->if_inetdata_lock
);
983 __private_extern__
void
984 if_inetdata_lock_exclusive(struct ifnet
*ifp
)
986 lck_rw_lock_exclusive(&ifp
->if_inetdata_lock
);
989 __private_extern__
void
990 if_inetdata_lock_done(struct ifnet
*ifp
)
992 lck_rw_done(&ifp
->if_inetdata_lock
);
997 __private_extern__
void
998 if_inet6data_lock_shared(struct ifnet
*ifp
)
1000 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
1003 __private_extern__
void
1004 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
1006 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
1009 __private_extern__
void
1010 if_inet6data_lock_done(struct ifnet
*ifp
)
1012 lck_rw_done(&ifp
->if_inet6data_lock
);
1016 __private_extern__
void
1017 ifnet_head_lock_shared(void)
1019 lck_rw_lock_shared(&ifnet_head_lock
);
1022 __private_extern__
void
1023 ifnet_head_lock_exclusive(void)
1025 lck_rw_lock_exclusive(&ifnet_head_lock
);
1028 __private_extern__
void
1029 ifnet_head_done(void)
1031 lck_rw_done(&ifnet_head_lock
);
1034 __private_extern__
void
1035 ifnet_head_assert_exclusive(void)
1037 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
1041 * Caller must already be holding ifnet lock.
1044 dlil_ifp_proto_count(struct ifnet
*ifp
)
1048 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
1050 if (ifp
->if_proto_hash
== NULL
)
1053 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1054 struct if_proto
*proto
;
1055 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1063 __private_extern__
void
1064 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
1065 u_int32_t event_code
, struct net_event_data
*event_data
,
1066 u_int32_t event_data_len
)
1068 struct net_event_data ev_data
;
1069 struct kev_msg ev_msg
;
1071 bzero(&ev_msg
, sizeof (ev_msg
));
1072 bzero(&ev_data
, sizeof (ev_data
));
1074 * a net event always starts with a net_event_data structure
1075 * but the caller can generate a simple net event or
1076 * provide a longer event structure to post
1078 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1079 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1080 ev_msg
.kev_subclass
= event_subclass
;
1081 ev_msg
.event_code
= event_code
;
1083 if (event_data
== NULL
) {
1084 event_data
= &ev_data
;
1085 event_data_len
= sizeof (struct net_event_data
);
1088 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
1089 event_data
->if_family
= ifp
->if_family
;
1090 event_data
->if_unit
= (u_int32_t
)ifp
->if_unit
;
1092 ev_msg
.dv
[0].data_length
= event_data_len
;
1093 ev_msg
.dv
[0].data_ptr
= event_data
;
1094 ev_msg
.dv
[1].data_length
= 0;
1096 /* Don't update interface generation for quality and RRC state changess */
1097 bool update_generation
= (event_subclass
!= KEV_DL_SUBCLASS
||
1098 (event_code
!= KEV_DL_LINK_QUALITY_METRIC_CHANGED
&&
1099 event_code
!= KEV_DL_RRC_STATE_CHANGED
));
1101 dlil_event_internal(ifp
, &ev_msg
, update_generation
);
1104 __private_extern__
int
1105 dlil_alloc_local_stats(struct ifnet
*ifp
)
1108 void *buf
, *base
, **pbuf
;
1113 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1114 /* allocate tcpstat_local structure */
1115 buf
= zalloc(dlif_tcpstat_zone
);
1120 bzero(buf
, dlif_tcpstat_bufsize
);
1122 /* Get the 64-bit aligned base address for this object */
1123 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1124 sizeof (u_int64_t
));
1125 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1126 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1129 * Wind back a pointer size from the aligned base and
1130 * save the original address so we can free it later.
1132 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1134 ifp
->if_tcp_stat
= base
;
1136 /* allocate udpstat_local structure */
1137 buf
= zalloc(dlif_udpstat_zone
);
1142 bzero(buf
, dlif_udpstat_bufsize
);
1144 /* Get the 64-bit aligned base address for this object */
1145 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1146 sizeof (u_int64_t
));
1147 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1148 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1151 * Wind back a pointer size from the aligned base and
1152 * save the original address so we can free it later.
1154 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1156 ifp
->if_udp_stat
= base
;
1158 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof (u_int64_t
)) &&
1159 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof (u_int64_t
)));
1164 if (ifp
->if_ipv4_stat
== NULL
) {
1165 MALLOC(ifp
->if_ipv4_stat
, struct if_tcp_ecn_stat
*,
1166 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1167 if (ifp
->if_ipv4_stat
== NULL
) {
1173 if (ifp
->if_ipv6_stat
== NULL
) {
1174 MALLOC(ifp
->if_ipv6_stat
, struct if_tcp_ecn_stat
*,
1175 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1176 if (ifp
->if_ipv6_stat
== NULL
) {
1183 if (ifp
->if_tcp_stat
!= NULL
) {
1185 ((intptr_t)ifp
->if_tcp_stat
- sizeof (void *));
1186 zfree(dlif_tcpstat_zone
, *pbuf
);
1187 ifp
->if_tcp_stat
= NULL
;
1189 if (ifp
->if_udp_stat
!= NULL
) {
1191 ((intptr_t)ifp
->if_udp_stat
- sizeof (void *));
1192 zfree(dlif_udpstat_zone
, *pbuf
);
1193 ifp
->if_udp_stat
= NULL
;
1195 if (ifp
->if_ipv4_stat
!= NULL
) {
1196 FREE(ifp
->if_ipv4_stat
, M_TEMP
);
1197 ifp
->if_ipv4_stat
= NULL
;
1199 if (ifp
->if_ipv6_stat
!= NULL
) {
1200 FREE(ifp
->if_ipv6_stat
, M_TEMP
);
1201 ifp
->if_ipv6_stat
= NULL
;
1209 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
)
1211 thread_continue_t func
;
1215 /* NULL ifp indicates the main input thread, called at dlil_init time */
1217 func
= dlil_main_input_thread_func
;
1218 VERIFY(inp
== dlil_main_input_thread
);
1219 (void) strlcat(inp
->input_name
,
1220 "main_input", DLIL_THREADNAME_LEN
);
1221 } else if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1222 func
= dlil_rxpoll_input_thread_func
;
1223 VERIFY(inp
!= dlil_main_input_thread
);
1224 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1225 "%s_input_poll", if_name(ifp
));
1227 func
= dlil_input_thread_func
;
1228 VERIFY(inp
!= dlil_main_input_thread
);
1229 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1230 "%s_input", if_name(ifp
));
1232 VERIFY(inp
->input_thr
== THREAD_NULL
);
1234 inp
->lck_grp
= lck_grp_alloc_init(inp
->input_name
, dlil_grp_attributes
);
1235 lck_mtx_init(&inp
->input_lck
, inp
->lck_grp
, dlil_lck_attributes
);
1237 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1238 inp
->ifp
= ifp
; /* NULL for main input thread */
1240 net_timerclear(&inp
->mode_holdtime
);
1241 net_timerclear(&inp
->mode_lasttime
);
1242 net_timerclear(&inp
->sample_holdtime
);
1243 net_timerclear(&inp
->sample_lasttime
);
1244 net_timerclear(&inp
->dbg_lasttime
);
1247 * For interfaces that support opportunistic polling, set the
1248 * low and high watermarks for outstanding inbound packets/bytes.
1249 * Also define freeze times for transitioning between modes
1250 * and updating the average.
1252 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1253 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1254 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1256 limit
= (u_int32_t
)-1;
1259 _qinit(&inp
->rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1260 if (inp
== dlil_main_input_thread
) {
1261 struct dlil_main_threading_info
*inpm
=
1262 (struct dlil_main_threading_info
*)inp
;
1263 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1266 error
= kernel_thread_start(func
, inp
, &inp
->input_thr
);
1267 if (error
== KERN_SUCCESS
) {
1268 ml_thread_policy(inp
->input_thr
, MACHINE_GROUP
,
1269 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_NETISR
));
1271 * We create an affinity set so that the matching workloop
1272 * thread or the starter thread (for loopback) can be
1273 * scheduled on the same processor set as the input thread.
1276 struct thread
*tp
= inp
->input_thr
;
1279 * Randomize to reduce the probability
1280 * of affinity tag namespace collision.
1282 read_frandom(&tag
, sizeof (tag
));
1283 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1284 thread_reference(tp
);
1286 inp
->net_affinity
= TRUE
;
1289 } else if (inp
== dlil_main_input_thread
) {
1290 panic_plain("%s: couldn't create main input thread", __func__
);
1293 panic_plain("%s: couldn't create %s input thread", __func__
,
1297 OSAddAtomic(1, &cur_dlil_input_threads
);
1302 #if TEST_INPUT_THREAD_TERMINATION
1304 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1306 #pragma unused(arg1, arg2)
1310 i
= if_input_thread_termination_spin
;
1312 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
1313 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
1316 if (net_rxpoll
== 0)
1319 if_input_thread_termination_spin
= i
;
1322 #endif /* TEST_INPUT_THREAD_TERMINATION */
1325 dlil_clean_threading_info(struct dlil_threading_info
*inp
)
1327 lck_mtx_destroy(&inp
->input_lck
, inp
->lck_grp
);
1328 lck_grp_free(inp
->lck_grp
);
1330 inp
->input_waiting
= 0;
1332 bzero(inp
->input_name
, sizeof (inp
->input_name
));
1334 VERIFY(qhead(&inp
->rcvq_pkts
) == NULL
&& qempty(&inp
->rcvq_pkts
));
1335 qlimit(&inp
->rcvq_pkts
) = 0;
1336 bzero(&inp
->stats
, sizeof (inp
->stats
));
1338 VERIFY(!inp
->net_affinity
);
1339 inp
->input_thr
= THREAD_NULL
;
1340 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
1341 VERIFY(inp
->poll_thr
== THREAD_NULL
);
1342 VERIFY(inp
->tag
== 0);
1344 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1345 bzero(&inp
->tstats
, sizeof (inp
->tstats
));
1346 bzero(&inp
->pstats
, sizeof (inp
->pstats
));
1347 bzero(&inp
->sstats
, sizeof (inp
->sstats
));
1349 net_timerclear(&inp
->mode_holdtime
);
1350 net_timerclear(&inp
->mode_lasttime
);
1351 net_timerclear(&inp
->sample_holdtime
);
1352 net_timerclear(&inp
->sample_lasttime
);
1353 net_timerclear(&inp
->dbg_lasttime
);
1355 #if IFNET_INPUT_SANITY_CHK
1356 inp
->input_mbuf_cnt
= 0;
1357 #endif /* IFNET_INPUT_SANITY_CHK */
1361 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1363 struct ifnet
*ifp
= inp
->ifp
;
1365 VERIFY(current_thread() == inp
->input_thr
);
1366 VERIFY(inp
!= dlil_main_input_thread
);
1368 OSAddAtomic(-1, &cur_dlil_input_threads
);
1370 #if TEST_INPUT_THREAD_TERMINATION
1371 { /* do something useless that won't get optimized away */
1373 for (uint32_t i
= 0;
1374 i
< if_input_thread_termination_spin
;
1378 printf("the value is %d\n", v
);
1380 #endif /* TEST_INPUT_THREAD_TERMINATION */
1382 lck_mtx_lock_spin(&inp
->input_lck
);
1383 VERIFY((inp
->input_waiting
& DLIL_INPUT_TERMINATE
) != 0);
1384 inp
->input_waiting
|= DLIL_INPUT_TERMINATE_COMPLETE
;
1385 wakeup_one((caddr_t
)&inp
->input_waiting
);
1386 lck_mtx_unlock(&inp
->input_lck
);
1388 /* for the extra refcnt from kernel_thread_start() */
1389 thread_deallocate(current_thread());
1392 printf("%s: input thread terminated\n",
1396 /* this is the end */
1397 thread_terminate(current_thread());
1401 static kern_return_t
1402 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1404 thread_affinity_policy_data_t policy
;
1406 bzero(&policy
, sizeof (policy
));
1407 policy
.affinity_tag
= tag
;
1408 return (thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1409 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
));
1415 thread_t thread
= THREAD_NULL
;
1418 * The following fields must be 64-bit aligned for atomic operations.
1420 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1421 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1422 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1423 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1424 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1425 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1426 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1427 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1428 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1429 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1430 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1431 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1432 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1433 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1434 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1436 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1437 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1438 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1439 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1440 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1441 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1442 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1443 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1444 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1445 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1446 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1447 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1448 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1449 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1450 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1453 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1455 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1456 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1457 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1458 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1459 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1460 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1461 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1462 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1463 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1464 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT
== IFNET_CSUM_ZERO_INVERT
);
1465 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1466 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1467 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1468 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1471 * ... as well as the mbuf checksum flags counterparts.
1473 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1474 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1475 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1476 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1477 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1478 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1479 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1480 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1481 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1482 _CASSERT(CSUM_ZERO_INVERT
== IF_HWASSIST_CSUM_ZERO_INVERT
);
1483 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1486 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1488 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1489 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1491 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1492 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1493 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1494 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1496 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1497 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1498 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1500 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1501 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1502 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1503 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1504 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1505 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1506 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1507 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1508 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1509 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1510 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1511 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1512 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1513 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1514 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1515 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1517 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1518 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1519 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1520 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1521 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1522 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1523 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC
== IFNET_SUBFAMILY_INTCOPROC
);
1525 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1526 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1528 PE_parse_boot_argn("net_affinity", &net_affinity
,
1529 sizeof (net_affinity
));
1531 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof (net_rxpoll
));
1533 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof (net_rtref
));
1535 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof (ifnet_debug
));
1537 dlif_size
= (ifnet_debug
== 0) ? sizeof (struct dlil_ifnet
) :
1538 sizeof (struct dlil_ifnet_dbg
);
1539 /* Enforce 64-bit alignment for dlil_ifnet structure */
1540 dlif_bufsize
= dlif_size
+ sizeof (void *) + sizeof (u_int64_t
);
1541 dlif_bufsize
= P2ROUNDUP(dlif_bufsize
, sizeof (u_int64_t
));
1542 dlif_zone
= zinit(dlif_bufsize
, DLIF_ZONE_MAX
* dlif_bufsize
,
1544 if (dlif_zone
== NULL
) {
1545 panic_plain("%s: failed allocating %s", __func__
,
1549 zone_change(dlif_zone
, Z_EXPAND
, TRUE
);
1550 zone_change(dlif_zone
, Z_CALLERACCT
, FALSE
);
1552 dlif_filt_size
= sizeof (struct ifnet_filter
);
1553 dlif_filt_zone
= zinit(dlif_filt_size
,
1554 DLIF_FILT_ZONE_MAX
* dlif_filt_size
, 0, DLIF_FILT_ZONE_NAME
);
1555 if (dlif_filt_zone
== NULL
) {
1556 panic_plain("%s: failed allocating %s", __func__
,
1557 DLIF_FILT_ZONE_NAME
);
1560 zone_change(dlif_filt_zone
, Z_EXPAND
, TRUE
);
1561 zone_change(dlif_filt_zone
, Z_CALLERACCT
, FALSE
);
1563 dlif_phash_size
= sizeof (struct proto_hash_entry
) * PROTO_HASH_SLOTS
;
1564 dlif_phash_zone
= zinit(dlif_phash_size
,
1565 DLIF_PHASH_ZONE_MAX
* dlif_phash_size
, 0, DLIF_PHASH_ZONE_NAME
);
1566 if (dlif_phash_zone
== NULL
) {
1567 panic_plain("%s: failed allocating %s", __func__
,
1568 DLIF_PHASH_ZONE_NAME
);
1571 zone_change(dlif_phash_zone
, Z_EXPAND
, TRUE
);
1572 zone_change(dlif_phash_zone
, Z_CALLERACCT
, FALSE
);
1574 dlif_proto_size
= sizeof (struct if_proto
);
1575 dlif_proto_zone
= zinit(dlif_proto_size
,
1576 DLIF_PROTO_ZONE_MAX
* dlif_proto_size
, 0, DLIF_PROTO_ZONE_NAME
);
1577 if (dlif_proto_zone
== NULL
) {
1578 panic_plain("%s: failed allocating %s", __func__
,
1579 DLIF_PROTO_ZONE_NAME
);
1582 zone_change(dlif_proto_zone
, Z_EXPAND
, TRUE
);
1583 zone_change(dlif_proto_zone
, Z_CALLERACCT
, FALSE
);
1585 dlif_tcpstat_size
= sizeof (struct tcpstat_local
);
1586 /* Enforce 64-bit alignment for tcpstat_local structure */
1587 dlif_tcpstat_bufsize
=
1588 dlif_tcpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1589 dlif_tcpstat_bufsize
=
1590 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof (u_int64_t
));
1591 dlif_tcpstat_zone
= zinit(dlif_tcpstat_bufsize
,
1592 DLIF_TCPSTAT_ZONE_MAX
* dlif_tcpstat_bufsize
, 0,
1593 DLIF_TCPSTAT_ZONE_NAME
);
1594 if (dlif_tcpstat_zone
== NULL
) {
1595 panic_plain("%s: failed allocating %s", __func__
,
1596 DLIF_TCPSTAT_ZONE_NAME
);
1599 zone_change(dlif_tcpstat_zone
, Z_EXPAND
, TRUE
);
1600 zone_change(dlif_tcpstat_zone
, Z_CALLERACCT
, FALSE
);
1602 dlif_udpstat_size
= sizeof (struct udpstat_local
);
1603 /* Enforce 64-bit alignment for udpstat_local structure */
1604 dlif_udpstat_bufsize
=
1605 dlif_udpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1606 dlif_udpstat_bufsize
=
1607 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof (u_int64_t
));
1608 dlif_udpstat_zone
= zinit(dlif_udpstat_bufsize
,
1609 DLIF_TCPSTAT_ZONE_MAX
* dlif_udpstat_bufsize
, 0,
1610 DLIF_UDPSTAT_ZONE_NAME
);
1611 if (dlif_udpstat_zone
== NULL
) {
1612 panic_plain("%s: failed allocating %s", __func__
,
1613 DLIF_UDPSTAT_ZONE_NAME
);
1616 zone_change(dlif_udpstat_zone
, Z_EXPAND
, TRUE
);
1617 zone_change(dlif_udpstat_zone
, Z_CALLERACCT
, FALSE
);
1619 ifnet_llreach_init();
1620 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt
);
1622 TAILQ_INIT(&dlil_ifnet_head
);
1623 TAILQ_INIT(&ifnet_head
);
1624 TAILQ_INIT(&ifnet_detaching_head
);
1625 TAILQ_INIT(&ifnet_ordered_head
);
1627 /* Setup the lock groups we will use */
1628 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1630 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1631 dlil_grp_attributes
);
1632 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1633 dlil_grp_attributes
);
1634 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1635 dlil_grp_attributes
);
1636 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1637 dlil_grp_attributes
);
1638 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1639 dlil_grp_attributes
);
1641 /* Setup the lock attributes we will use */
1642 dlil_lck_attributes
= lck_attr_alloc_init();
1644 ifnet_lock_attr
= lck_attr_alloc_init();
1646 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1647 dlil_lck_attributes
);
1648 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1650 /* Setup interface flow control related items */
1651 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1653 ifnet_fc_zone_size
= sizeof (struct ifnet_fc_entry
);
1654 ifnet_fc_zone
= zinit(ifnet_fc_zone_size
,
1655 IFNET_FC_ZONE_MAX
* ifnet_fc_zone_size
, 0, IFNET_FC_ZONE_NAME
);
1656 if (ifnet_fc_zone
== NULL
) {
1657 panic_plain("%s: failed allocating %s", __func__
,
1658 IFNET_FC_ZONE_NAME
);
1661 zone_change(ifnet_fc_zone
, Z_EXPAND
, TRUE
);
1662 zone_change(ifnet_fc_zone
, Z_CALLERACCT
, FALSE
);
1664 /* Initialize interface address subsystem */
1668 /* Initialize the packet filter */
1672 /* Initialize queue algorithms */
1675 /* Initialize packet schedulers */
1678 /* Initialize flow advisory subsystem */
1681 /* Initialize the pktap virtual interface */
1684 /* Initialize the service class to dscp map */
1687 #if DEBUG || DEVELOPMENT
1688 /* Run self-tests */
1689 dlil_verify_sum16();
1690 #endif /* DEBUG || DEVELOPMENT */
1692 /* Initialize link layer table */
1693 lltable_glbl_init();
1696 * Create and start up the main DLIL input thread and the interface
1697 * detacher threads once everything is initialized.
1699 dlil_create_input_thread(NULL
, dlil_main_input_thread
);
1701 if (kernel_thread_start(ifnet_detacher_thread_func
,
1702 NULL
, &thread
) != KERN_SUCCESS
) {
1703 panic_plain("%s: couldn't create detacher thread", __func__
);
1706 thread_deallocate(thread
);
1711 if_flt_monitor_busy(struct ifnet
*ifp
)
1713 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1716 VERIFY(ifp
->if_flt_busy
!= 0);
1720 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1722 if_flt_monitor_leave(ifp
);
1726 if_flt_monitor_enter(struct ifnet
*ifp
)
1728 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1730 while (ifp
->if_flt_busy
) {
1731 ++ifp
->if_flt_waiters
;
1732 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1733 (PZERO
- 1), "if_flt_monitor", NULL
);
1735 if_flt_monitor_busy(ifp
);
1739 if_flt_monitor_leave(struct ifnet
*ifp
)
1741 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1743 VERIFY(ifp
->if_flt_busy
!= 0);
1746 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1747 ifp
->if_flt_waiters
= 0;
1748 wakeup(&ifp
->if_flt_head
);
1752 __private_extern__
int
1753 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1754 interface_filter_t
*filter_ref
, u_int32_t flags
)
1757 struct ifnet_filter
*filter
= NULL
;
1759 ifnet_head_lock_shared();
1760 /* Check that the interface is in the global list */
1761 if (!ifnet_lookup(ifp
)) {
1766 filter
= zalloc(dlif_filt_zone
);
1767 if (filter
== NULL
) {
1771 bzero(filter
, dlif_filt_size
);
1773 /* refcnt held above during lookup */
1774 filter
->filt_flags
= flags
;
1775 filter
->filt_ifp
= ifp
;
1776 filter
->filt_cookie
= if_filter
->iff_cookie
;
1777 filter
->filt_name
= if_filter
->iff_name
;
1778 filter
->filt_protocol
= if_filter
->iff_protocol
;
1780 * Do not install filter callbacks for internal coproc interface
1782 if (!IFNET_IS_INTCOPROC(ifp
)) {
1783 filter
->filt_input
= if_filter
->iff_input
;
1784 filter
->filt_output
= if_filter
->iff_output
;
1785 filter
->filt_event
= if_filter
->iff_event
;
1786 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1788 filter
->filt_detached
= if_filter
->iff_detached
;
1790 lck_mtx_lock(&ifp
->if_flt_lock
);
1791 if_flt_monitor_enter(ifp
);
1793 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1794 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1796 if_flt_monitor_leave(ifp
);
1797 lck_mtx_unlock(&ifp
->if_flt_lock
);
1799 *filter_ref
= filter
;
1802 * Bump filter count and route_generation ID to let TCP
1803 * know it shouldn't do TSO on this connection
1805 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1806 OSAddAtomic(1, &dlil_filter_disable_tso_count
);
1807 routegenid_update();
1809 OSIncrementAtomic64(&net_api_stats
.nas_iflt_attach_count
);
1810 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_total
);
1811 if ((filter
->filt_flags
& DLIL_IFF_INTERNAL
)) {
1812 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_os_total
);
1815 printf("%s: %s filter attached\n", if_name(ifp
),
1816 if_filter
->iff_name
);
1820 if (retval
!= 0 && ifp
!= NULL
) {
1821 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1822 if_name(ifp
), if_filter
->iff_name
, retval
);
1824 if (retval
!= 0 && filter
!= NULL
)
1825 zfree(dlif_filt_zone
, filter
);
1831 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1835 if (detached
== 0) {
1838 ifnet_head_lock_shared();
1839 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1840 interface_filter_t entry
= NULL
;
1842 lck_mtx_lock(&ifp
->if_flt_lock
);
1843 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1844 if (entry
!= filter
|| entry
->filt_skip
)
1847 * We've found a match; since it's possible
1848 * that the thread gets blocked in the monitor,
1849 * we do the lock dance. Interface should
1850 * not be detached since we still have a use
1851 * count held during filter attach.
1853 entry
->filt_skip
= 1; /* skip input/output */
1854 lck_mtx_unlock(&ifp
->if_flt_lock
);
1857 lck_mtx_lock(&ifp
->if_flt_lock
);
1858 if_flt_monitor_enter(ifp
);
1859 LCK_MTX_ASSERT(&ifp
->if_flt_lock
,
1860 LCK_MTX_ASSERT_OWNED
);
1862 /* Remove the filter from the list */
1863 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
1866 if_flt_monitor_leave(ifp
);
1867 lck_mtx_unlock(&ifp
->if_flt_lock
);
1869 printf("%s: %s filter detached\n",
1870 if_name(ifp
), filter
->filt_name
);
1874 lck_mtx_unlock(&ifp
->if_flt_lock
);
1878 /* filter parameter is not a valid filter ref */
1884 printf("%s filter detached\n", filter
->filt_name
);
1888 /* Call the detached function if there is one */
1889 if (filter
->filt_detached
)
1890 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
1893 * Decrease filter count and route_generation ID to let TCP
1894 * know it should reevalute doing TSO or not
1896 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1897 OSAddAtomic(-1, &dlil_filter_disable_tso_count
);
1898 routegenid_update();
1901 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_iflt_attach_count
) > 0);
1903 /* Free the filter */
1904 zfree(dlif_filt_zone
, filter
);
1907 if (retval
!= 0 && filter
!= NULL
) {
1908 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1909 filter
->filt_name
, retval
);
1915 __private_extern__
void
1916 dlil_detach_filter(interface_filter_t filter
)
1920 dlil_detach_filter_internal(filter
, 0);
1924 * Main input thread:
1926 * a) handles all inbound packets for lo0
1927 * b) handles all inbound packets for interfaces with no dedicated
1928 * input thread (e.g. anything but Ethernet/PDP or those that support
1929 * opportunistic polling.)
1930 * c) protocol registrations
1931 * d) packet injections
1933 __attribute__((noreturn
))
1935 dlil_main_input_thread_func(void *v
, wait_result_t w
)
1938 struct dlil_main_threading_info
*inpm
= v
;
1939 struct dlil_threading_info
*inp
= v
;
1941 VERIFY(inp
== dlil_main_input_thread
);
1942 VERIFY(inp
->ifp
== NULL
);
1943 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
1946 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
1947 u_int32_t m_cnt
, m_cnt_loop
;
1948 boolean_t proto_req
;
1950 lck_mtx_lock_spin(&inp
->input_lck
);
1952 /* Wait until there is work to be done */
1953 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1954 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1955 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1956 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1959 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1960 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1962 /* Main input thread cannot be terminated */
1963 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_TERMINATE
));
1965 proto_req
= (inp
->input_waiting
&
1966 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
1968 /* Packets for non-dedicated interfaces other than lo0 */
1969 m_cnt
= qlen(&inp
->rcvq_pkts
);
1970 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
1972 /* Packets exclusive to lo0 */
1973 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
1974 m_loop
= _getq_all(&inpm
->lo_rcvq_pkts
, NULL
, NULL
, NULL
);
1978 lck_mtx_unlock(&inp
->input_lck
);
1981 * NOTE warning %%% attention !!!!
1982 * We should think about putting some thread starvation
1983 * safeguards if we deal with long chains of packets.
1986 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
1987 m_cnt_loop
, inp
->mode
);
1990 dlil_input_packet_list_extended(NULL
, m
,
1998 VERIFY(0); /* we should never get here */
2002 * Input thread for interfaces with legacy input model.
2005 dlil_input_thread_func(void *v
, wait_result_t w
)
2008 char thread_name
[MAXTHREADNAMESIZE
];
2009 struct dlil_threading_info
*inp
= v
;
2010 struct ifnet
*ifp
= inp
->ifp
;
2012 /* Construct the name for this thread, and then apply it. */
2013 bzero(thread_name
, sizeof(thread_name
));
2014 snprintf(thread_name
, sizeof(thread_name
), "dlil_input_%s", ifp
->if_xname
);
2015 thread_set_thread_name(inp
->input_thr
, thread_name
);
2017 VERIFY(inp
!= dlil_main_input_thread
);
2018 VERIFY(ifp
!= NULL
);
2019 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
);
2020 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
2023 struct mbuf
*m
= NULL
;
2026 lck_mtx_lock_spin(&inp
->input_lck
);
2028 /* Wait until there is work to be done */
2029 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2030 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2031 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2032 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2035 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2036 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2039 * Protocol registration and injection must always use
2040 * the main input thread; in theory the latter can utilize
2041 * the corresponding input thread where the packet arrived
2042 * on, but that requires our knowing the interface in advance
2043 * (and the benefits might not worth the trouble.)
2045 VERIFY(!(inp
->input_waiting
&
2046 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
2048 /* Packets for this interface */
2049 m_cnt
= qlen(&inp
->rcvq_pkts
);
2050 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2052 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2053 lck_mtx_unlock(&inp
->input_lck
);
2055 /* Free up pending packets */
2059 dlil_terminate_input_thread(inp
);
2066 dlil_input_stats_sync(ifp
, inp
);
2068 lck_mtx_unlock(&inp
->input_lck
);
2071 * NOTE warning %%% attention !!!!
2072 * We should think about putting some thread starvation
2073 * safeguards if we deal with long chains of packets.
2076 dlil_input_packet_list_extended(NULL
, m
,
2081 VERIFY(0); /* we should never get here */
2085 * Input thread for interfaces with opportunistic polling input model.
2088 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
2091 struct dlil_threading_info
*inp
= v
;
2092 struct ifnet
*ifp
= inp
->ifp
;
2095 VERIFY(inp
!= dlil_main_input_thread
);
2096 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
));
2099 struct mbuf
*m
= NULL
;
2100 u_int32_t m_cnt
, m_size
, poll_req
= 0;
2102 struct timespec now
, delta
;
2105 lck_mtx_lock_spin(&inp
->input_lck
);
2107 if ((ival
= inp
->rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
)
2108 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2110 /* Link parameters changed? */
2111 if (ifp
->if_poll_update
!= 0) {
2112 ifp
->if_poll_update
= 0;
2113 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
2116 /* Current operating mode */
2119 /* Wait until there is work to be done */
2120 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2121 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2122 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2123 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2126 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2127 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2130 * Protocol registration and injection must always use
2131 * the main input thread; in theory the latter can utilize
2132 * the corresponding input thread where the packet arrived
2133 * on, but that requires our knowing the interface in advance
2134 * (and the benefits might not worth the trouble.)
2136 VERIFY(!(inp
->input_waiting
&
2137 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
2139 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2140 /* Free up pending packets */
2141 lck_mtx_convert_spin(&inp
->input_lck
);
2142 _flushq(&inp
->rcvq_pkts
);
2143 if (inp
->input_mit_tcall
!= NULL
) {
2144 if (thread_call_isactive(inp
->input_mit_tcall
))
2145 thread_call_cancel(inp
->input_mit_tcall
);
2147 lck_mtx_unlock(&inp
->input_lck
);
2149 dlil_terminate_input_thread(inp
);
2154 /* Total count of all packets */
2155 m_cnt
= qlen(&inp
->rcvq_pkts
);
2157 /* Total bytes of all packets */
2158 m_size
= qsize(&inp
->rcvq_pkts
);
2160 /* Packets for this interface */
2161 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2162 VERIFY(m
!= NULL
|| m_cnt
== 0);
2165 if (!net_timerisset(&inp
->sample_lasttime
))
2166 *(&inp
->sample_lasttime
) = *(&now
);
2168 net_timersub(&now
, &inp
->sample_lasttime
, &delta
);
2169 if (if_rxpoll
&& net_timerisset(&inp
->sample_holdtime
)) {
2170 u_int32_t ptot
, btot
;
2172 /* Accumulate statistics for current sampling */
2173 PKTCNTR_ADD(&inp
->sstats
, m_cnt
, m_size
);
2175 if (net_timercmp(&delta
, &inp
->sample_holdtime
, <))
2178 *(&inp
->sample_lasttime
) = *(&now
);
2180 /* Calculate min/max of inbound bytes */
2181 btot
= (u_int32_t
)inp
->sstats
.bytes
;
2182 if (inp
->rxpoll_bmin
== 0 || inp
->rxpoll_bmin
> btot
)
2183 inp
->rxpoll_bmin
= btot
;
2184 if (btot
> inp
->rxpoll_bmax
)
2185 inp
->rxpoll_bmax
= btot
;
2187 /* Calculate EWMA of inbound bytes */
2188 DLIL_EWMA(inp
->rxpoll_bavg
, btot
, if_rxpoll_decay
);
2190 /* Calculate min/max of inbound packets */
2191 ptot
= (u_int32_t
)inp
->sstats
.packets
;
2192 if (inp
->rxpoll_pmin
== 0 || inp
->rxpoll_pmin
> ptot
)
2193 inp
->rxpoll_pmin
= ptot
;
2194 if (ptot
> inp
->rxpoll_pmax
)
2195 inp
->rxpoll_pmax
= ptot
;
2197 /* Calculate EWMA of inbound packets */
2198 DLIL_EWMA(inp
->rxpoll_pavg
, ptot
, if_rxpoll_decay
);
2200 /* Reset sampling statistics */
2201 PKTCNTR_CLEAR(&inp
->sstats
);
2203 /* Calculate EWMA of wakeup requests */
2204 DLIL_EWMA(inp
->rxpoll_wavg
, inp
->wtot
, if_rxpoll_decay
);
2208 if (!net_timerisset(&inp
->dbg_lasttime
))
2209 *(&inp
->dbg_lasttime
) = *(&now
);
2210 net_timersub(&now
, &inp
->dbg_lasttime
, &delta
);
2211 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
2212 *(&inp
->dbg_lasttime
) = *(&now
);
2213 printf("%s: [%s] pkts avg %d max %d "
2214 "limits [%d/%d], wreq avg %d "
2215 "limits [%d/%d], bytes avg %d "
2216 "limits [%d/%d]\n", if_name(ifp
),
2218 IFNET_MODEL_INPUT_POLL_ON
) ?
2219 "ON" : "OFF", inp
->rxpoll_pavg
,
2228 inp
->rxpoll_bhiwat
);
2232 /* Perform mode transition, if necessary */
2233 if (!net_timerisset(&inp
->mode_lasttime
))
2234 *(&inp
->mode_lasttime
) = *(&now
);
2236 net_timersub(&now
, &inp
->mode_lasttime
, &delta
);
2237 if (net_timercmp(&delta
, &inp
->mode_holdtime
, <))
2240 if (inp
->rxpoll_pavg
<= inp
->rxpoll_plowat
&&
2241 inp
->rxpoll_bavg
<= inp
->rxpoll_blowat
&&
2242 inp
->mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2243 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2244 } else if (inp
->rxpoll_pavg
>= inp
->rxpoll_phiwat
&&
2245 (inp
->rxpoll_bavg
>= inp
->rxpoll_bhiwat
||
2246 inp
->rxpoll_wavg
>= inp
->rxpoll_whiwat
) &&
2247 inp
->mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2248 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2251 if (mode
!= inp
->mode
) {
2253 *(&inp
->mode_lasttime
) = *(&now
);
2258 dlil_input_stats_sync(ifp
, inp
);
2260 lck_mtx_unlock(&inp
->input_lck
);
2263 * If there's a mode change and interface is still attached,
2264 * perform a downcall to the driver for the new mode. Also
2265 * hold an IO refcnt on the interface to prevent it from
2266 * being detached (will be release below.)
2268 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2269 struct ifnet_model_params p
= { mode
, { 0 } };
2273 printf("%s: polling is now %s, "
2274 "pkts avg %d max %d limits [%d/%d], "
2275 "wreq avg %d limits [%d/%d], "
2276 "bytes avg %d limits [%d/%d]\n",
2278 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2279 "ON" : "OFF", inp
->rxpoll_pavg
,
2280 inp
->rxpoll_pmax
, inp
->rxpoll_plowat
,
2281 inp
->rxpoll_phiwat
, inp
->rxpoll_wavg
,
2282 inp
->rxpoll_wlowat
, inp
->rxpoll_whiwat
,
2283 inp
->rxpoll_bavg
, inp
->rxpoll_blowat
,
2284 inp
->rxpoll_bhiwat
);
2287 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2288 IFNET_CTL_SET_INPUT_MODEL
, sizeof (p
), &p
))) != 0) {
2289 printf("%s: error setting polling mode "
2290 "to %s (%d)\n", if_name(ifp
),
2291 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2296 case IFNET_MODEL_INPUT_POLL_OFF
:
2297 ifnet_set_poll_cycle(ifp
, NULL
);
2298 inp
->rxpoll_offreq
++;
2300 inp
->rxpoll_offerr
++;
2303 case IFNET_MODEL_INPUT_POLL_ON
:
2304 net_nsectimer(&ival
, &ts
);
2305 ifnet_set_poll_cycle(ifp
, &ts
);
2307 inp
->rxpoll_onreq
++;
2309 inp
->rxpoll_onerr
++;
2317 /* Release the IO refcnt */
2318 ifnet_decr_iorefcnt(ifp
);
2322 * NOTE warning %%% attention !!!!
2323 * We should think about putting some thread starvation
2324 * safeguards if we deal with long chains of packets.
2327 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2331 VERIFY(0); /* we should never get here */
2335 * Must be called on an attached ifnet (caller is expected to check.)
2336 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2339 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2342 struct dlil_threading_info
*inp
;
2343 u_int64_t sample_holdtime
, inbw
;
2345 VERIFY(ifp
!= NULL
);
2346 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2350 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2351 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0))
2353 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2354 p
->packets_lowat
>= p
->packets_hiwat
)
2356 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2357 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0))
2359 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2360 p
->bytes_lowat
>= p
->bytes_hiwat
)
2362 if (p
->interval_time
!= 0 &&
2363 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
)
2364 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2368 lck_mtx_lock(&inp
->input_lck
);
2370 LCK_MTX_ASSERT(&inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
2373 * Normally, we'd reset the parameters to the auto-tuned values
2374 * if the the input thread detects a change in link rate. If the
2375 * driver provides its own parameters right after a link rate
2376 * changes, but before the input thread gets to run, we want to
2377 * make sure to keep the driver's values. Clearing if_poll_update
2378 * will achieve that.
2380 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0)
2381 ifp
->if_poll_update
= 0;
2383 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2384 sample_holdtime
= 0; /* polling is disabled */
2385 inp
->rxpoll_wlowat
= inp
->rxpoll_plowat
=
2386 inp
->rxpoll_blowat
= 0;
2387 inp
->rxpoll_whiwat
= inp
->rxpoll_phiwat
=
2388 inp
->rxpoll_bhiwat
= (u_int32_t
)-1;
2389 inp
->rxpoll_plim
= 0;
2390 inp
->rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2392 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2396 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2397 if (inbw
< rxpoll_tbl
[i
].speed
)
2401 /* auto-tune if caller didn't specify a value */
2402 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2403 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2404 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2405 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2406 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2407 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2408 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2409 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2410 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2411 if_rxpoll_max
: p
->packets_limit
);
2412 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2413 if_rxpoll_interval_time
: p
->interval_time
);
2415 VERIFY(plowat
!= 0 && phiwat
!= 0);
2416 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2417 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2419 sample_holdtime
= if_rxpoll_sample_holdtime
;
2420 inp
->rxpoll_wlowat
= if_rxpoll_wlowat
;
2421 inp
->rxpoll_whiwat
= if_rxpoll_whiwat
;
2422 inp
->rxpoll_plowat
= plowat
;
2423 inp
->rxpoll_phiwat
= phiwat
;
2424 inp
->rxpoll_blowat
= blowat
;
2425 inp
->rxpoll_bhiwat
= bhiwat
;
2426 inp
->rxpoll_plim
= plim
;
2427 inp
->rxpoll_ival
= ival
;
2430 net_nsectimer(&if_rxpoll_mode_holdtime
, &inp
->mode_holdtime
);
2431 net_nsectimer(&sample_holdtime
, &inp
->sample_holdtime
);
2434 printf("%s: speed %llu bps, sample per %llu nsec, "
2435 "poll interval %llu nsec, pkts per poll %u, "
2436 "pkt limits [%u/%u], wreq limits [%u/%u], "
2437 "bytes limits [%u/%u]\n", if_name(ifp
),
2438 inbw
, sample_holdtime
, inp
->rxpoll_ival
, inp
->rxpoll_plim
,
2439 inp
->rxpoll_plowat
, inp
->rxpoll_phiwat
, inp
->rxpoll_wlowat
,
2440 inp
->rxpoll_whiwat
, inp
->rxpoll_blowat
, inp
->rxpoll_bhiwat
);
2444 lck_mtx_unlock(&inp
->input_lck
);
2450 * Must be called on an attached ifnet (caller is expected to check.)
2453 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2455 struct dlil_threading_info
*inp
;
2457 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2458 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2461 bzero(p
, sizeof (*p
));
2463 lck_mtx_lock(&inp
->input_lck
);
2464 p
->packets_limit
= inp
->rxpoll_plim
;
2465 p
->packets_lowat
= inp
->rxpoll_plowat
;
2466 p
->packets_hiwat
= inp
->rxpoll_phiwat
;
2467 p
->bytes_lowat
= inp
->rxpoll_blowat
;
2468 p
->bytes_hiwat
= inp
->rxpoll_bhiwat
;
2469 p
->interval_time
= inp
->rxpoll_ival
;
2470 lck_mtx_unlock(&inp
->input_lck
);
2476 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2477 const struct ifnet_stat_increment_param
*s
)
2479 return (ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
));
2483 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2484 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2486 return (ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
));
2490 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2491 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2493 dlil_input_func input_func
;
2494 struct ifnet_stat_increment_param _s
;
2495 u_int32_t m_cnt
= 0, m_size
= 0;
2499 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2501 mbuf_freem_list(m_head
);
2505 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2506 VERIFY(m_tail
== NULL
|| ext
);
2507 VERIFY(s
!= NULL
|| !ext
);
2510 * Drop the packet(s) if the parameters are invalid, or if the
2511 * interface is no longer attached; else hold an IO refcnt to
2512 * prevent it from being detached (will be released below.)
2514 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_is_attached(ifp
, 1))) {
2516 mbuf_freem_list(m_head
);
2520 input_func
= ifp
->if_input_dlil
;
2521 VERIFY(input_func
!= NULL
);
2523 if (m_tail
== NULL
) {
2525 while (m_head
!= NULL
) {
2526 #if IFNET_INPUT_SANITY_CHK
2527 if (dlil_input_sanity_check
!= 0)
2528 DLIL_INPUT_CHECK(last
, ifp
);
2529 #endif /* IFNET_INPUT_SANITY_CHK */
2531 m_size
+= m_length(last
);
2532 if (mbuf_nextpkt(last
) == NULL
)
2534 last
= mbuf_nextpkt(last
);
2538 #if IFNET_INPUT_SANITY_CHK
2539 if (dlil_input_sanity_check
!= 0) {
2542 DLIL_INPUT_CHECK(last
, ifp
);
2544 m_size
+= m_length(last
);
2545 if (mbuf_nextpkt(last
) == NULL
)
2547 last
= mbuf_nextpkt(last
);
2550 m_cnt
= s
->packets_in
;
2551 m_size
= s
->bytes_in
;
2555 m_cnt
= s
->packets_in
;
2556 m_size
= s
->bytes_in
;
2558 #endif /* IFNET_INPUT_SANITY_CHK */
2561 if (last
!= m_tail
) {
2562 panic_plain("%s: invalid input packet chain for %s, "
2563 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2568 * Assert packet count only for the extended variant, for backwards
2569 * compatibility, since this came directly from the device driver.
2570 * Relax this assertion for input bytes, as the driver may have
2571 * included the link-layer headers in the computation; hence
2572 * m_size is just an approximation.
2574 if (ext
&& s
->packets_in
!= m_cnt
) {
2575 panic_plain("%s: input packet count mismatch for %s, "
2576 "%d instead of %d\n", __func__
, if_name(ifp
),
2577 s
->packets_in
, m_cnt
);
2581 bzero(&_s
, sizeof (_s
));
2586 _s
.packets_in
= m_cnt
;
2587 _s
.bytes_in
= m_size
;
2589 err
= (*input_func
)(ifp
, m_head
, m_tail
, s
, poll
, current_thread());
2591 if (ifp
!= lo_ifp
) {
2592 /* Release the IO refcnt */
2593 ifnet_decr_iorefcnt(ifp
);
2601 dlil_output_handler(struct ifnet
*ifp
, struct mbuf
*m
)
2603 return (ifp
->if_output(ifp
, m
));
2607 dlil_input_handler(struct ifnet
*ifp
, struct mbuf
*m_head
,
2608 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
2609 boolean_t poll
, struct thread
*tp
)
2611 struct dlil_threading_info
*inp
;
2612 u_int32_t m_cnt
= s
->packets_in
;
2613 u_int32_t m_size
= s
->bytes_in
;
2615 if ((inp
= ifp
->if_inp
) == NULL
)
2616 inp
= dlil_main_input_thread
;
2619 * If there is a matching DLIL input thread associated with an
2620 * affinity set, associate this thread with the same set. We
2621 * will only do this once.
2623 lck_mtx_lock_spin(&inp
->input_lck
);
2624 if (inp
!= dlil_main_input_thread
&& inp
->net_affinity
&& tp
!= NULL
&&
2625 ((!poll
&& inp
->wloop_thr
== THREAD_NULL
) ||
2626 (poll
&& inp
->poll_thr
== THREAD_NULL
))) {
2627 u_int32_t tag
= inp
->tag
;
2630 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2633 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2634 inp
->wloop_thr
= tp
;
2636 lck_mtx_unlock(&inp
->input_lck
);
2638 /* Associate the current thread with the new affinity tag */
2639 (void) dlil_affinity_set(tp
, tag
);
2642 * Take a reference on the current thread; during detach,
2643 * we will need to refer to it in order to tear down its
2646 thread_reference(tp
);
2647 lck_mtx_lock_spin(&inp
->input_lck
);
2650 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
2653 * Because of loopbacked multicast we cannot stuff the ifp in
2654 * the rcvif of the packet header: loopback (lo0) packets use a
2655 * dedicated list so that we can later associate them with lo_ifp
2656 * on their way up the stack. Packets for other interfaces without
2657 * dedicated input threads go to the regular list.
2659 if (m_head
!= NULL
) {
2660 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
2661 struct dlil_main_threading_info
*inpm
=
2662 (struct dlil_main_threading_info
*)inp
;
2663 _addq_multi(&inpm
->lo_rcvq_pkts
, m_head
, m_tail
,
2666 _addq_multi(&inp
->rcvq_pkts
, m_head
, m_tail
,
2671 #if IFNET_INPUT_SANITY_CHK
2672 if (dlil_input_sanity_check
!= 0) {
2676 for (m0
= m_head
, count
= 0; m0
; m0
= mbuf_nextpkt(m0
))
2679 if (count
!= m_cnt
) {
2680 panic_plain("%s: invalid packet count %d "
2681 "(expected %d)\n", if_name(ifp
),
2686 inp
->input_mbuf_cnt
+= m_cnt
;
2688 #endif /* IFNET_INPUT_SANITY_CHK */
2690 dlil_input_stats_add(s
, inp
, poll
);
2692 * If we're using the main input thread, synchronize the
2693 * stats now since we have the interface context. All
2694 * other cases involving dedicated input threads will
2695 * have their stats synchronized there.
2697 if (inp
== dlil_main_input_thread
)
2698 dlil_input_stats_sync(ifp
, inp
);
2700 if (qlen(&inp
->rcvq_pkts
) >= dlil_rcv_mit_pkts_min
&&
2701 qlen(&inp
->rcvq_pkts
) < dlil_rcv_mit_pkts_max
&&
2702 (ifp
->if_family
== IFNET_FAMILY_ETHERNET
||
2703 ifp
->if_type
== IFT_CELLULAR
)
2705 if (!thread_call_isactive(inp
->input_mit_tcall
)) {
2707 clock_interval_to_deadline(dlil_rcv_mit_interval
,
2709 (void) thread_call_enter_delayed(
2710 inp
->input_mit_tcall
, deadline
);
2713 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
2714 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
2716 wakeup_one((caddr_t
)&inp
->input_waiting
);
2719 lck_mtx_unlock(&inp
->input_lck
);
2726 ifnet_start_common(struct ifnet
*ifp
, boolean_t resetfc
)
2728 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2731 * If the starter thread is inactive, signal it to do work,
2732 * unless the interface is being flow controlled from below,
2733 * e.g. a virtual interface being flow controlled by a real
2734 * network interface beneath it, or it's been disabled via
2735 * a call to ifnet_disable_output().
2737 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2739 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
2740 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
2741 lck_mtx_unlock(&ifp
->if_start_lock
);
2744 ifp
->if_start_req
++;
2745 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
&&
2746 (resetfc
|| !(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) ||
2747 IFCQ_LEN(&ifp
->if_snd
) >= ifp
->if_start_delay_qlen
||
2748 ifp
->if_start_delayed
== 0)) {
2749 (void) thread_wakeup_thread((caddr_t
)&ifp
->if_start_thread
,
2750 ifp
->if_start_thread
);
2752 lck_mtx_unlock(&ifp
->if_start_lock
);
2756 ifnet_start(struct ifnet
*ifp
)
2758 ifnet_start_common(ifp
, FALSE
);
2762 ifnet_start_thread_fn(void *v
, wait_result_t w
)
2765 struct ifnet
*ifp
= v
;
2766 char ifname
[IFNAMSIZ
+ 1];
2767 char thread_name
[MAXTHREADNAMESIZE
];
2768 struct timespec
*ts
= NULL
;
2769 struct ifclassq
*ifq
= &ifp
->if_snd
;
2770 struct timespec delay_start_ts
;
2772 /* Construct the name for this thread, and then apply it. */
2773 bzero(thread_name
, sizeof(thread_name
));
2774 (void) snprintf(thread_name
, sizeof (thread_name
),
2775 "ifnet_start_%s", ifp
->if_xname
);
2776 thread_set_thread_name(ifp
->if_start_thread
, thread_name
);
2779 * Treat the dedicated starter thread for lo0 as equivalent to
2780 * the driver workloop thread; if net_affinity is enabled for
2781 * the main input thread, associate this starter thread to it
2782 * by binding them with the same affinity tag. This is done
2783 * only once (as we only have one lo_ifp which never goes away.)
2785 if (ifp
== lo_ifp
) {
2786 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
2787 struct thread
*tp
= current_thread();
2789 lck_mtx_lock(&inp
->input_lck
);
2790 if (inp
->net_affinity
) {
2791 u_int32_t tag
= inp
->tag
;
2793 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2794 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2795 inp
->wloop_thr
= tp
;
2796 lck_mtx_unlock(&inp
->input_lck
);
2798 /* Associate this thread with the affinity tag */
2799 (void) dlil_affinity_set(tp
, tag
);
2801 lck_mtx_unlock(&inp
->input_lck
);
2805 (void) snprintf(ifname
, sizeof (ifname
), "%s_starter", if_name(ifp
));
2807 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2810 if (ifp
->if_start_thread
!= NULL
) {
2811 (void) msleep(&ifp
->if_start_thread
,
2812 &ifp
->if_start_lock
,
2813 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2815 /* interface is detached? */
2816 if (ifp
->if_start_thread
== THREAD_NULL
) {
2817 ifnet_set_start_cycle(ifp
, NULL
);
2818 lck_mtx_unlock(&ifp
->if_start_lock
);
2822 printf("%s: starter thread terminated\n",
2826 /* for the extra refcnt from kernel_thread_start() */
2827 thread_deallocate(current_thread());
2828 /* this is the end */
2829 thread_terminate(current_thread());
2834 ifp
->if_start_active
= 1;
2837 u_int32_t req
= ifp
->if_start_req
;
2838 if (!IFCQ_IS_EMPTY(ifq
) &&
2839 (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
2840 ifp
->if_start_delayed
== 0 &&
2841 IFCQ_LEN(ifq
) < ifp
->if_start_delay_qlen
&&
2842 (ifp
->if_eflags
& IFEF_DELAY_START
)) {
2843 ifp
->if_start_delayed
= 1;
2844 ifnet_start_delayed
++;
2847 ifp
->if_start_delayed
= 0;
2849 lck_mtx_unlock(&ifp
->if_start_lock
);
2852 * If no longer attached, don't call start because ifp
2853 * is being destroyed; else hold an IO refcnt to
2854 * prevent the interface from being detached (will be
2857 if (!ifnet_is_attached(ifp
, 1)) {
2858 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2862 /* invoke the driver's start routine */
2863 ((*ifp
->if_start
)(ifp
));
2866 * Release the io ref count taken by ifnet_is_attached.
2868 ifnet_decr_iorefcnt(ifp
);
2870 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2873 * If there's no pending request or if the
2874 * interface has been disabled, we're done.
2876 if (req
== ifp
->if_start_req
||
2877 (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
)) {
2882 ifp
->if_start_req
= 0;
2883 ifp
->if_start_active
= 0;
2886 * Wakeup N ns from now if rate-controlled by TBR, and if
2887 * there are still packets in the send queue which haven't
2888 * been dequeued so far; else sleep indefinitely (ts = NULL)
2889 * until ifnet_start() is called again.
2891 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
2892 &ifp
->if_start_cycle
: NULL
);
2894 if (ts
== NULL
&& ifp
->if_start_delayed
== 1) {
2895 delay_start_ts
.tv_sec
= 0;
2896 delay_start_ts
.tv_nsec
= ifp
->if_start_delay_timeout
;
2897 ts
= &delay_start_ts
;
2900 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
2908 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
2911 bzero(&ifp
->if_start_cycle
, sizeof (ifp
->if_start_cycle
));
2913 *(&ifp
->if_start_cycle
) = *ts
;
2915 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
2916 printf("%s: restart interval set to %lu nsec\n",
2917 if_name(ifp
), ts
->tv_nsec
);
2921 ifnet_poll(struct ifnet
*ifp
)
2924 * If the poller thread is inactive, signal it to do work.
2926 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2928 if (!ifp
->if_poll_active
&& ifp
->if_poll_thread
!= THREAD_NULL
) {
2929 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
2931 lck_mtx_unlock(&ifp
->if_poll_lock
);
2935 ifnet_poll_thread_fn(void *v
, wait_result_t w
)
2938 struct dlil_threading_info
*inp
;
2939 struct ifnet
*ifp
= v
;
2940 char ifname
[IFNAMSIZ
+ 1];
2941 struct timespec
*ts
= NULL
;
2942 struct ifnet_stat_increment_param s
;
2944 snprintf(ifname
, sizeof (ifname
), "%s_poller",
2946 bzero(&s
, sizeof (s
));
2948 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2951 VERIFY(inp
!= NULL
);
2954 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
2955 (void) msleep(&ifp
->if_poll_thread
, &ifp
->if_poll_lock
,
2956 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2959 /* interface is detached (maybe while asleep)? */
2960 if (ifp
->if_poll_thread
== THREAD_NULL
) {
2961 ifnet_set_poll_cycle(ifp
, NULL
);
2962 lck_mtx_unlock(&ifp
->if_poll_lock
);
2965 printf("%s: poller thread terminated\n",
2969 /* for the extra refcnt from kernel_thread_start() */
2970 thread_deallocate(current_thread());
2971 /* this is the end */
2972 thread_terminate(current_thread());
2977 ifp
->if_poll_active
= 1;
2979 struct mbuf
*m_head
, *m_tail
;
2980 u_int32_t m_lim
, m_cnt
, m_totlen
;
2981 u_int16_t req
= ifp
->if_poll_req
;
2983 lck_mtx_unlock(&ifp
->if_poll_lock
);
2986 * If no longer attached, there's nothing to do;
2987 * else hold an IO refcnt to prevent the interface
2988 * from being detached (will be released below.)
2990 if (!ifnet_is_attached(ifp
, 1)) {
2991 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2995 m_lim
= (inp
->rxpoll_plim
!= 0) ? inp
->rxpoll_plim
:
2996 MAX((qlimit(&inp
->rcvq_pkts
)),
2997 (inp
->rxpoll_phiwat
<< 2));
2999 if (dlil_verbose
> 1) {
3000 printf("%s: polling up to %d pkts, "
3001 "pkts avg %d max %d, wreq avg %d, "
3003 if_name(ifp
), m_lim
,
3004 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
3005 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
3008 /* invoke the driver's input poll routine */
3009 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
3010 &m_cnt
, &m_totlen
));
3012 if (m_head
!= NULL
) {
3013 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
3015 if (dlil_verbose
> 1) {
3016 printf("%s: polled %d pkts, "
3017 "pkts avg %d max %d, wreq avg %d, "
3019 if_name(ifp
), m_cnt
,
3020 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
3021 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
3024 /* stats are required for extended variant */
3025 s
.packets_in
= m_cnt
;
3026 s
.bytes_in
= m_totlen
;
3028 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
3031 if (dlil_verbose
> 1) {
3032 printf("%s: no packets, "
3033 "pkts avg %d max %d, wreq avg %d, "
3035 if_name(ifp
), inp
->rxpoll_pavg
,
3036 inp
->rxpoll_pmax
, inp
->rxpoll_wavg
,
3040 (void) ifnet_input_common(ifp
, NULL
, NULL
,
3044 /* Release the io ref count */
3045 ifnet_decr_iorefcnt(ifp
);
3047 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3049 /* if there's no pending request, we're done */
3050 if (req
== ifp
->if_poll_req
) {
3054 ifp
->if_poll_req
= 0;
3055 ifp
->if_poll_active
= 0;
3058 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3059 * until ifnet_poll() is called again.
3061 ts
= &ifp
->if_poll_cycle
;
3062 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
3070 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3073 bzero(&ifp
->if_poll_cycle
, sizeof (ifp
->if_poll_cycle
));
3075 *(&ifp
->if_poll_cycle
) = *ts
;
3077 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
3078 printf("%s: poll interval set to %lu nsec\n",
3079 if_name(ifp
), ts
->tv_nsec
);
3083 ifnet_purge(struct ifnet
*ifp
)
3085 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
))
3090 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
3092 IFCQ_LOCK_ASSERT_HELD(ifq
);
3094 if (!(IFCQ_IS_READY(ifq
)))
3097 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
3098 struct tb_profile tb
= { ifq
->ifcq_tbr
.tbr_rate_raw
,
3099 ifq
->ifcq_tbr
.tbr_percent
, 0 };
3100 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
3103 ifclassq_update(ifq
, ev
);
3107 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
3110 case CLASSQ_EV_LINK_BANDWIDTH
:
3111 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
))
3112 ifp
->if_poll_update
++;
3121 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
3123 struct ifclassq
*ifq
;
3127 if (ifp
== NULL
|| model
>= IFNET_SCHED_MODEL_MAX
)
3129 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3134 omodel
= ifp
->if_output_sched_model
;
3135 ifp
->if_output_sched_model
= model
;
3136 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0)
3137 ifp
->if_output_sched_model
= omodel
;
3144 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3148 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3151 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
3157 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3159 if (ifp
== NULL
|| maxqlen
== NULL
)
3161 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3164 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
3170 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
3174 if (ifp
== NULL
|| pkts
== NULL
)
3176 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3179 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
3186 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3187 u_int32_t
*pkts
, u_int32_t
*bytes
)
3191 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
3192 (pkts
== NULL
&& bytes
== NULL
))
3194 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3197 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
3203 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3205 struct dlil_threading_info
*inp
;
3209 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3213 maxqlen
= if_rcvq_maxlen
;
3214 else if (maxqlen
< IF_RCVQ_MINLEN
)
3215 maxqlen
= IF_RCVQ_MINLEN
;
3218 lck_mtx_lock(&inp
->input_lck
);
3219 qlimit(&inp
->rcvq_pkts
) = maxqlen
;
3220 lck_mtx_unlock(&inp
->input_lck
);
3226 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3228 struct dlil_threading_info
*inp
;
3230 if (ifp
== NULL
|| maxqlen
== NULL
)
3232 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3236 lck_mtx_lock(&inp
->input_lck
);
3237 *maxqlen
= qlimit(&inp
->rcvq_pkts
);
3238 lck_mtx_unlock(&inp
->input_lck
);
3243 ifnet_enqueue_multi_setup(struct ifnet
*ifp
, uint16_t delay_qlen
,
3244 uint16_t delay_timeout
)
3246 if (delay_qlen
> 0 && delay_timeout
> 0) {
3247 ifp
->if_eflags
|= IFEF_ENQUEUE_MULTI
;
3248 ifp
->if_start_delay_qlen
= min(100, delay_qlen
);
3249 ifp
->if_start_delay_timeout
= min(20000, delay_timeout
);
3250 /* convert timeout to nanoseconds */
3251 ifp
->if_start_delay_timeout
*= 1000;
3252 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3253 ifp
->if_xname
, (uint32_t)delay_qlen
,
3254 (uint32_t)delay_timeout
);
3256 ifp
->if_eflags
&= ~IFEF_ENQUEUE_MULTI
;
3260 static inline errno_t
3261 ifnet_enqueue_common(struct ifnet
*ifp
, void *p
, classq_pkt_type_t ptype
,
3262 boolean_t flush
, boolean_t
*pdrop
)
3264 volatile uint64_t *fg_ts
= NULL
;
3265 volatile uint64_t *rt_ts
= NULL
;
3267 struct timespec now
;
3268 u_int64_t now_nsec
= 0;
3271 ASSERT(ifp
->if_eflags
& IFEF_TXSTART
);
3274 * If packet already carries a timestamp, either from dlil_output()
3275 * or from flowswitch, use it here. Otherwise, record timestamp.
3276 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3277 * the timestamp value is used internally there.
3281 ASSERT(m
->m_flags
& M_PKTHDR
);
3282 ASSERT(m
->m_nextpkt
== NULL
);
3284 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_TS_VALID
) ||
3285 m
->m_pkthdr
.pkt_timestamp
== 0) {
3287 net_timernsec(&now
, &now_nsec
);
3288 m
->m_pkthdr
.pkt_timestamp
= now_nsec
;
3290 m
->m_pkthdr
.pkt_flags
&= ~PKTF_TS_VALID
;
3292 * If the packet service class is not background,
3293 * update the timestamp to indicate recent activity
3294 * on a foreground socket.
3296 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
3297 m
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
) {
3298 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_SO_BACKGROUND
)) {
3299 ifp
->if_fg_sendts
= _net_uptime
;
3301 *fg_ts
= _net_uptime
;
3303 if (m
->m_pkthdr
.pkt_flags
& PKTF_SO_REALTIME
) {
3304 ifp
->if_rt_sendts
= _net_uptime
;
3306 *rt_ts
= _net_uptime
;
3317 if (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) {
3318 if (now_nsec
== 0) {
3320 net_timernsec(&now
, &now_nsec
);
3323 * If the driver chose to delay start callback for
3324 * coalescing multiple packets, Then use the following
3325 * heuristics to make sure that start callback will
3326 * be delayed only when bulk data transfer is detected.
3327 * 1. number of packets enqueued in (delay_win * 2) is
3328 * greater than or equal to the delay qlen.
3329 * 2. If delay_start is enabled it will stay enabled for
3330 * another 10 idle windows. This is to take into account
3331 * variable RTT and burst traffic.
3332 * 3. If the time elapsed since last enqueue is more
3333 * than 200ms we disable delaying start callback. This is
3334 * is to take idle time into account.
3336 u_int64_t dwin
= (ifp
->if_start_delay_timeout
<< 1);
3337 if (ifp
->if_start_delay_swin
> 0) {
3338 if ((ifp
->if_start_delay_swin
+ dwin
) > now_nsec
) {
3339 ifp
->if_start_delay_cnt
++;
3340 } else if ((now_nsec
- ifp
->if_start_delay_swin
)
3341 >= (200 * 1000 * 1000)) {
3342 ifp
->if_start_delay_swin
= now_nsec
;
3343 ifp
->if_start_delay_cnt
= 1;
3344 ifp
->if_start_delay_idle
= 0;
3345 if (ifp
->if_eflags
& IFEF_DELAY_START
) {
3347 ~(IFEF_DELAY_START
);
3348 ifnet_delay_start_disabled
++;
3351 if (ifp
->if_start_delay_cnt
>=
3352 ifp
->if_start_delay_qlen
) {
3353 ifp
->if_eflags
|= IFEF_DELAY_START
;
3354 ifp
->if_start_delay_idle
= 0;
3356 if (ifp
->if_start_delay_idle
>= 10) {
3357 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3358 ifnet_delay_start_disabled
++;
3360 ifp
->if_start_delay_idle
++;
3363 ifp
->if_start_delay_swin
= now_nsec
;
3364 ifp
->if_start_delay_cnt
= 1;
3367 ifp
->if_start_delay_swin
= now_nsec
;
3368 ifp
->if_start_delay_cnt
= 1;
3369 ifp
->if_start_delay_idle
= 0;
3370 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3373 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3378 /* enqueue the packet (caller consumes object) */
3379 error
= ifclassq_enqueue(&ifp
->if_snd
, m
, QP_MBUF
, pdrop
);
3389 * Tell the driver to start dequeueing; do this even when the queue
3390 * for the packet is suspended (EQSUSPENDED), as the driver could still
3391 * be dequeueing from other unsuspended queues.
3393 if (!(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3394 ((error
== 0 && flush
) || error
== EQFULL
|| error
== EQSUSPENDED
))
3401 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
3404 return (ifnet_enqueue_mbuf(ifp
, m
, TRUE
, &pdrop
));
3408 ifnet_enqueue_mbuf(struct ifnet
*ifp
, struct mbuf
*m
, boolean_t flush
,
3411 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
3412 m
->m_nextpkt
!= NULL
) {
3418 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3419 !IF_FULLY_ATTACHED(ifp
)) {
3420 /* flag tested without lock for performance */
3424 } else if (!(ifp
->if_flags
& IFF_UP
)) {
3430 return (ifnet_enqueue_common(ifp
, m
, QP_MBUF
, flush
, pdrop
));
3435 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
3438 classq_pkt_type_t ptype
;
3439 if (ifp
== NULL
|| mp
== NULL
)
3441 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3442 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3444 if (!ifnet_is_attached(ifp
, 1))
3447 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
3448 (void **)mp
, NULL
, NULL
, NULL
, &ptype
);
3449 VERIFY((*mp
== NULL
) || (ptype
== QP_MBUF
));
3450 ifnet_decr_iorefcnt(ifp
);
3456 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3460 classq_pkt_type_t ptype
;
3461 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
))
3463 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3464 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3466 if (!ifnet_is_attached(ifp
, 1))
3469 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1,
3470 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)mp
, NULL
, NULL
,
3472 VERIFY((*mp
== NULL
) || (ptype
== QP_MBUF
));
3473 ifnet_decr_iorefcnt(ifp
);
3478 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t pkt_limit
,
3479 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3482 classq_pkt_type_t ptype
;
3483 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1)
3485 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3486 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3488 if (!ifnet_is_attached(ifp
, 1))
3491 rc
= ifclassq_dequeue(&ifp
->if_snd
, pkt_limit
,
3492 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)head
, (void **)tail
, cnt
,
3494 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3495 ifnet_decr_iorefcnt(ifp
);
3500 ifnet_dequeue_multi_bytes(struct ifnet
*ifp
, u_int32_t byte_limit
,
3501 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3504 classq_pkt_type_t ptype
;
3505 if (ifp
== NULL
|| head
== NULL
|| byte_limit
< 1)
3507 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3508 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3510 if (!ifnet_is_attached(ifp
, 1))
3513 rc
= ifclassq_dequeue(&ifp
->if_snd
, CLASSQ_DEQUEUE_MAX_PKT_LIMIT
,
3514 byte_limit
, (void **)head
, (void **)tail
, cnt
, len
, &ptype
);
3515 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3516 ifnet_decr_iorefcnt(ifp
);
3521 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3522 u_int32_t pkt_limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
3526 classq_pkt_type_t ptype
;
3527 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1 ||
3530 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3531 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3533 if (!ifnet_is_attached(ifp
, 1))
3536 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, pkt_limit
,
3537 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)head
,
3538 (void **)tail
, cnt
, len
, &ptype
);
3539 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3540 ifnet_decr_iorefcnt(ifp
);
3544 #if !CONFIG_EMBEDDED
3546 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
3547 const struct sockaddr
*dest
, const char *dest_linkaddr
,
3548 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
3555 return (ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
));
3557 #endif /* !CONFIG_EMBEDDED */
3560 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
3561 char **frame_header_p
, protocol_family_t protocol_family
)
3563 struct ifnet_filter
*filter
;
3566 * Pass the inbound packet to the interface filters
3568 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3569 /* prevent filter list from changing in case we drop the lock */
3570 if_flt_monitor_busy(ifp
);
3571 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3574 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
3575 (filter
->filt_protocol
== 0 ||
3576 filter
->filt_protocol
== protocol_family
)) {
3577 lck_mtx_unlock(&ifp
->if_flt_lock
);
3579 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
3580 ifp
, protocol_family
, m_p
, frame_header_p
);
3582 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3584 /* we're done with the filter list */
3585 if_flt_monitor_unbusy(ifp
);
3586 lck_mtx_unlock(&ifp
->if_flt_lock
);
3591 /* we're done with the filter list */
3592 if_flt_monitor_unbusy(ifp
);
3593 lck_mtx_unlock(&ifp
->if_flt_lock
);
3596 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3597 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3600 (*m_p
)->m_flags
&= ~M_PROTO1
;
3606 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
3607 protocol_family_t protocol_family
)
3609 struct ifnet_filter
*filter
;
3612 * Pass the outbound packet to the interface filters
3614 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3615 /* prevent filter list from changing in case we drop the lock */
3616 if_flt_monitor_busy(ifp
);
3617 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3620 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
3621 (filter
->filt_protocol
== 0 ||
3622 filter
->filt_protocol
== protocol_family
)) {
3623 lck_mtx_unlock(&ifp
->if_flt_lock
);
3625 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
3626 protocol_family
, m_p
);
3628 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3630 /* we're done with the filter list */
3631 if_flt_monitor_unbusy(ifp
);
3632 lck_mtx_unlock(&ifp
->if_flt_lock
);
3637 /* we're done with the filter list */
3638 if_flt_monitor_unbusy(ifp
);
3639 lck_mtx_unlock(&ifp
->if_flt_lock
);
3645 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
3649 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
3650 /* Version 1 protocols get one packet at a time */
3652 char * frame_header
;
3655 next_packet
= m
->m_nextpkt
;
3656 m
->m_nextpkt
= NULL
;
3657 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3658 m
->m_pkthdr
.pkt_hdr
= NULL
;
3659 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
3660 ifproto
->protocol_family
, m
, frame_header
);
3661 if (error
!= 0 && error
!= EJUSTRETURN
)
3665 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
3666 /* Version 2 protocols support packet lists */
3667 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
3668 ifproto
->protocol_family
, m
);
3669 if (error
!= 0 && error
!= EJUSTRETURN
)
3675 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
3676 struct dlil_threading_info
*inp
, boolean_t poll
)
3678 struct ifnet_stat_increment_param
*d
= &inp
->stats
;
3680 if (s
->packets_in
!= 0)
3681 d
->packets_in
+= s
->packets_in
;
3682 if (s
->bytes_in
!= 0)
3683 d
->bytes_in
+= s
->bytes_in
;
3684 if (s
->errors_in
!= 0)
3685 d
->errors_in
+= s
->errors_in
;
3687 if (s
->packets_out
!= 0)
3688 d
->packets_out
+= s
->packets_out
;
3689 if (s
->bytes_out
!= 0)
3690 d
->bytes_out
+= s
->bytes_out
;
3691 if (s
->errors_out
!= 0)
3692 d
->errors_out
+= s
->errors_out
;
3694 if (s
->collisions
!= 0)
3695 d
->collisions
+= s
->collisions
;
3696 if (s
->dropped
!= 0)
3697 d
->dropped
+= s
->dropped
;
3700 PKTCNTR_ADD(&inp
->tstats
, s
->packets_in
, s
->bytes_in
);
3704 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
3706 struct ifnet_stat_increment_param
*s
= &inp
->stats
;
3709 * Use of atomic operations is unavoidable here because
3710 * these stats may also be incremented elsewhere via KPIs.
3712 if (s
->packets_in
!= 0) {
3713 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
3716 if (s
->bytes_in
!= 0) {
3717 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
3720 if (s
->errors_in
!= 0) {
3721 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
3725 if (s
->packets_out
!= 0) {
3726 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
3729 if (s
->bytes_out
!= 0) {
3730 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
3733 if (s
->errors_out
!= 0) {
3734 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
3738 if (s
->collisions
!= 0) {
3739 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
3742 if (s
->dropped
!= 0) {
3743 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
3747 if (ifp
->if_data_threshold
!= 0) {
3748 lck_mtx_convert_spin(&inp
->input_lck
);
3749 ifnet_notify_data_threshold(ifp
);
3753 * No need for atomic operations as they are modified here
3754 * only from within the DLIL input thread context.
3756 if (inp
->tstats
.packets
!= 0) {
3757 inp
->pstats
.ifi_poll_packets
+= inp
->tstats
.packets
;
3758 inp
->tstats
.packets
= 0;
3760 if (inp
->tstats
.bytes
!= 0) {
3761 inp
->pstats
.ifi_poll_bytes
+= inp
->tstats
.bytes
;
3762 inp
->tstats
.bytes
= 0;
3766 __private_extern__
void
3767 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
3769 return (dlil_input_packet_list_common(ifp
, m
, 0,
3770 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
));
3773 __private_extern__
void
3774 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
3775 u_int32_t cnt
, ifnet_model_t mode
)
3777 return (dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
));
3781 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
3782 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
3785 protocol_family_t protocol_family
;
3787 ifnet_t ifp
= ifp_param
;
3788 char * frame_header
;
3789 struct if_proto
* last_ifproto
= NULL
;
3790 mbuf_t pkt_first
= NULL
;
3791 mbuf_t
* pkt_next
= NULL
;
3792 u_int32_t poll_thresh
= 0, poll_ival
= 0;
3794 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
3796 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
3797 (poll_ival
= if_rxpoll_interval_pkts
) > 0)
3801 struct if_proto
*ifproto
= NULL
;
3803 uint32_t pktf_mask
; /* pkt flags to preserve */
3805 if (ifp_param
== NULL
)
3806 ifp
= m
->m_pkthdr
.rcvif
;
3808 if ((ifp
->if_eflags
& IFEF_RXPOLL
) && poll_thresh
!= 0 &&
3809 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0)
3812 /* Check if this mbuf looks valid */
3813 MBUF_INPUT_CHECK(m
, ifp
);
3815 next_packet
= m
->m_nextpkt
;
3816 m
->m_nextpkt
= NULL
;
3817 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3818 m
->m_pkthdr
.pkt_hdr
= NULL
;
3821 * Get an IO reference count if the interface is not
3822 * loopback (lo0) and it is attached; lo0 never goes
3823 * away, so optimize for that.
3825 if (ifp
!= lo_ifp
) {
3826 if (!ifnet_is_attached(ifp
, 1)) {
3832 * Preserve the time stamp if it was set.
3834 pktf_mask
= PKTF_TS_VALID
;
3837 * If this arrived on lo0, preserve interface addr
3838 * info to allow for connectivity between loopback
3839 * and local interface addresses.
3841 pktf_mask
= (PKTF_LOOP
|PKTF_IFAINFO
);
3844 /* make sure packet comes in clean */
3845 m_classifier_init(m
, pktf_mask
);
3847 ifp_inc_traffic_class_in(ifp
, m
);
3849 /* find which protocol family this packet is for */
3850 ifnet_lock_shared(ifp
);
3851 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
3853 ifnet_lock_done(ifp
);
3855 if (error
== EJUSTRETURN
)
3857 protocol_family
= 0;
3860 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
3861 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
))
3862 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
3866 * For partial checksum offload, we expect the driver to
3867 * set the start offset indicating the start of the span
3868 * that is covered by the hardware-computed checksum;
3869 * adjust this start offset accordingly because the data
3870 * pointer has been advanced beyond the link-layer header.
3872 * Don't adjust if the interface is a bridge member, as
3873 * the adjustment will occur from the context of the
3874 * bridge interface during input.
3876 if (ifp
->if_bridge
== NULL
&& (m
->m_pkthdr
.csum_flags
&
3877 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
3878 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
3881 if (frame_header
== NULL
||
3882 frame_header
< (char *)mbuf_datastart(m
) ||
3883 frame_header
> (char *)m
->m_data
||
3884 (adj
= (m
->m_data
- frame_header
)) >
3885 m
->m_pkthdr
.csum_rx_start
) {
3886 m
->m_pkthdr
.csum_data
= 0;
3887 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
3888 hwcksum_in_invalidated
++;
3890 m
->m_pkthdr
.csum_rx_start
-= adj
;
3894 pktap_input(ifp
, protocol_family
, m
, frame_header
);
3896 if (m
->m_flags
& (M_BCAST
|M_MCAST
))
3897 atomic_add_64(&ifp
->if_imcasts
, 1);
3899 /* run interface filters, exclude VLAN packets PR-3586856 */
3900 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
3901 error
= dlil_interface_filters_input(ifp
, &m
,
3902 &frame_header
, protocol_family
);
3904 if (error
!= EJUSTRETURN
)
3909 if (error
!= 0 || ((m
->m_flags
& M_PROMISC
) != 0)) {
3914 /* Lookup the protocol attachment to this interface */
3915 if (protocol_family
== 0) {
3917 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
3918 (last_ifproto
->protocol_family
== protocol_family
)) {
3919 VERIFY(ifproto
== NULL
);
3920 ifproto
= last_ifproto
;
3921 if_proto_ref(last_ifproto
);
3923 VERIFY(ifproto
== NULL
);
3924 ifnet_lock_shared(ifp
);
3925 /* callee holds a proto refcnt upon success */
3926 ifproto
= find_attached_proto(ifp
, protocol_family
);
3927 ifnet_lock_done(ifp
);
3929 if (ifproto
== NULL
) {
3930 /* no protocol for this packet, discard */
3934 if (ifproto
!= last_ifproto
) {
3935 if (last_ifproto
!= NULL
) {
3936 /* pass up the list for the previous protocol */
3937 dlil_ifproto_input(last_ifproto
, pkt_first
);
3939 if_proto_free(last_ifproto
);
3941 last_ifproto
= ifproto
;
3942 if_proto_ref(ifproto
);
3944 /* extend the list */
3945 m
->m_pkthdr
.pkt_hdr
= frame_header
;
3946 if (pkt_first
== NULL
) {
3951 pkt_next
= &m
->m_nextpkt
;
3954 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
3955 /* pass up the last list of packets */
3956 dlil_ifproto_input(last_ifproto
, pkt_first
);
3957 if_proto_free(last_ifproto
);
3958 last_ifproto
= NULL
;
3960 if (ifproto
!= NULL
) {
3961 if_proto_free(ifproto
);
3967 /* update the driver's multicast filter, if needed */
3968 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
3969 ifp
->if_updatemcasts
= 0;
3971 ifnet_decr_iorefcnt(ifp
);
3974 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
3978 if_mcasts_update(struct ifnet
*ifp
)
3982 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
3983 if (err
== EAFNOSUPPORT
)
3985 printf("%s: %s %d suspended link-layer multicast membership(s) "
3986 "(err=%d)\n", if_name(ifp
),
3987 (err
== 0 ? "successfully restored" : "failed to restore"),
3988 ifp
->if_updatemcasts
, err
);
3990 /* just return success */
3994 /* If ifp is set, we will increment the generation for the interface */
3996 dlil_post_complete_msg(struct ifnet
*ifp
, struct kev_msg
*event
)
3999 ifnet_increment_generation(ifp
);
4003 necp_update_all_clients();
4006 return (kev_post_msg(event
));
4009 #define TMP_IF_PROTO_ARR_SIZE 10
4011 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
, bool update_generation
)
4013 struct ifnet_filter
*filter
= NULL
;
4014 struct if_proto
*proto
= NULL
;
4015 int if_proto_count
= 0;
4016 struct if_proto
**tmp_ifproto_arr
= NULL
;
4017 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
4018 int tmp_ifproto_arr_idx
= 0;
4019 bool tmp_malloc
= false;
4022 * Pass the event to the interface filters
4024 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4025 /* prevent filter list from changing in case we drop the lock */
4026 if_flt_monitor_busy(ifp
);
4027 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4028 if (filter
->filt_event
!= NULL
) {
4029 lck_mtx_unlock(&ifp
->if_flt_lock
);
4031 filter
->filt_event(filter
->filt_cookie
, ifp
,
4032 filter
->filt_protocol
, event
);
4034 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4037 /* we're done with the filter list */
4038 if_flt_monitor_unbusy(ifp
);
4039 lck_mtx_unlock(&ifp
->if_flt_lock
);
4041 /* Get an io ref count if the interface is attached */
4042 if (!ifnet_is_attached(ifp
, 1))
4046 * An embedded tmp_list_entry in if_proto may still get
4047 * over-written by another thread after giving up ifnet lock,
4048 * therefore we are avoiding embedded pointers here.
4050 ifnet_lock_shared(ifp
);
4051 if_proto_count
= dlil_ifp_proto_count(ifp
);
4052 if (if_proto_count
) {
4054 VERIFY(ifp
->if_proto_hash
!= NULL
);
4055 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
4056 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
4058 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
4059 sizeof (*tmp_ifproto_arr
) * if_proto_count
,
4061 if (tmp_ifproto_arr
== NULL
) {
4062 ifnet_lock_done(ifp
);
4068 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
4069 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
4071 if_proto_ref(proto
);
4072 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
4073 tmp_ifproto_arr_idx
++;
4076 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
4078 ifnet_lock_done(ifp
);
4080 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
4081 tmp_ifproto_arr_idx
++) {
4082 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
4083 VERIFY(proto
!= NULL
);
4084 proto_media_event eventp
=
4085 (proto
->proto_kpi
== kProtoKPI_v1
?
4086 proto
->kpi
.v1
.event
:
4087 proto
->kpi
.v2
.event
);
4089 if (eventp
!= NULL
) {
4090 eventp(ifp
, proto
->protocol_family
,
4093 if_proto_free(proto
);
4098 FREE(tmp_ifproto_arr
, M_TEMP
);
4101 /* Pass the event to the interface */
4102 if (ifp
->if_event
!= NULL
)
4103 ifp
->if_event(ifp
, event
);
4105 /* Release the io ref count */
4106 ifnet_decr_iorefcnt(ifp
);
4108 return (dlil_post_complete_msg(update_generation
? ifp
: NULL
, event
));
4112 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
4114 struct kev_msg kev_msg
;
4117 if (ifp
== NULL
|| event
== NULL
)
4120 bzero(&kev_msg
, sizeof (kev_msg
));
4121 kev_msg
.vendor_code
= event
->vendor_code
;
4122 kev_msg
.kev_class
= event
->kev_class
;
4123 kev_msg
.kev_subclass
= event
->kev_subclass
;
4124 kev_msg
.event_code
= event
->event_code
;
4125 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
4126 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
4127 kev_msg
.dv
[1].data_length
= 0;
4129 result
= dlil_event_internal(ifp
, &kev_msg
, TRUE
);
4135 #include <netinet/ip6.h>
4136 #include <netinet/ip.h>
4138 dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
4142 struct ip6_hdr
*ip6
;
4143 int type
= SOCK_RAW
;
4148 m
= m_pullup(*mp
, sizeof(struct ip
));
4152 ip
= mtod(m
, struct ip
*);
4153 if (ip
->ip_p
== IPPROTO_TCP
)
4155 else if (ip
->ip_p
== IPPROTO_UDP
)
4159 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
4163 ip6
= mtod(m
, struct ip6_hdr
*);
4164 if (ip6
->ip6_nxt
== IPPROTO_TCP
)
4166 else if (ip6
->ip6_nxt
== IPPROTO_UDP
)
4177 dlil_count_chain_len(mbuf_t m
, struct chain_len_stats
*cls
)
4190 atomic_add_64(&cls
->cls_one
, 1);
4193 atomic_add_64(&cls
->cls_two
, 1);
4196 atomic_add_64(&cls
->cls_three
, 1);
4199 atomic_add_64(&cls
->cls_four
, 1);
4203 atomic_add_64(&cls
->cls_five_or_more
, 1);
4211 * Caller should have a lock on the protocol domain if the protocol
4212 * doesn't support finer grained locking. In most cases, the lock
4213 * will be held from the socket layer and won't be released until
4214 * we return back to the socket layer.
4216 * This does mean that we must take a protocol lock before we take
4217 * an interface lock if we're going to take both. This makes sense
4218 * because a protocol is likely to interact with an ifp while it
4219 * is under the protocol lock.
4221 * An advisory code will be returned if adv is not null. This
4222 * can be used to provide feedback about interface queues to the
4226 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
4227 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
4229 char *frame_type
= NULL
;
4230 char *dst_linkaddr
= NULL
;
4232 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
4233 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
4234 struct if_proto
*proto
= NULL
;
4236 mbuf_t send_head
= NULL
;
4237 mbuf_t
*send_tail
= &send_head
;
4239 u_int32_t pre
= 0, post
= 0;
4240 u_int32_t fpkts
= 0, fbytes
= 0;
4242 struct timespec now
;
4245 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4248 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4249 * from happening while this operation is in progress
4251 if (!ifnet_is_attached(ifp
, 1)) {
4257 VERIFY(ifp
->if_output_dlil
!= NULL
);
4259 /* update the driver's multicast filter, if needed */
4260 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
4261 ifp
->if_updatemcasts
= 0;
4263 frame_type
= frame_type_buffer
;
4264 dst_linkaddr
= dst_linkaddr_buffer
;
4267 ifnet_lock_shared(ifp
);
4268 /* callee holds a proto refcnt upon success */
4269 proto
= find_attached_proto(ifp
, proto_family
);
4270 if (proto
== NULL
) {
4271 ifnet_lock_done(ifp
);
4275 ifnet_lock_done(ifp
);
4279 if (packetlist
== NULL
)
4283 packetlist
= packetlist
->m_nextpkt
;
4284 m
->m_nextpkt
= NULL
;
4287 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4288 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
4290 if (preoutp
!= NULL
) {
4291 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
4292 frame_type
, dst_linkaddr
);
4295 if (retval
== EJUSTRETURN
)
4304 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
4305 dlil_get_socket_type(&m
, proto_family
, raw
));
4314 if (!raw
&& proto_family
== PF_INET
) {
4315 struct ip
*ip
= mtod(m
, struct ip
*);
4316 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4317 struct ip
*, ip
, struct ifnet
*, ifp
,
4318 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
4320 } else if (!raw
&& proto_family
== PF_INET6
) {
4321 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
4322 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4323 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
4324 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
4326 #endif /* CONFIG_DTRACE */
4328 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
4332 * If this is a broadcast packet that needs to be
4333 * looped back into the system, set the inbound ifp
4334 * to that of the outbound ifp. This will allow
4335 * us to determine that it is a legitimate packet
4336 * for the system. Only set the ifp if it's not
4337 * already set, just to be safe.
4339 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
4340 m
->m_pkthdr
.rcvif
== NULL
) {
4341 m
->m_pkthdr
.rcvif
= ifp
;
4345 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
4346 frame_type
, &pre
, &post
);
4348 if (retval
!= EJUSTRETURN
)
4354 * For partial checksum offload, adjust the start
4355 * and stuff offsets based on the prepended header.
4357 if ((m
->m_pkthdr
.csum_flags
&
4358 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4359 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4360 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
4361 m
->m_pkthdr
.csum_tx_start
+= pre
;
4364 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
))
4365 dlil_output_cksum_dbg(ifp
, m
, pre
,
4369 * Clear the ifp if it was set above, and to be
4370 * safe, only if it is still the same as the
4371 * outbound ifp we have in context. If it was
4372 * looped back, then a copy of it was sent to the
4373 * loopback interface with the rcvif set, and we
4374 * are clearing the one that will go down to the
4377 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
)
4378 m
->m_pkthdr
.rcvif
= NULL
;
4382 * Let interface filters (if any) do their thing ...
4384 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4385 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
4386 retval
= dlil_interface_filters_output(ifp
,
4389 if (retval
!= EJUSTRETURN
)
4395 * Strip away M_PROTO1 bit prior to sending packet
4396 * to the driver as this field may be used by the driver
4398 m
->m_flags
&= ~M_PROTO1
;
4401 * If the underlying interface is not capable of handling a
4402 * packet whose data portion spans across physically disjoint
4403 * pages, we need to "normalize" the packet so that we pass
4404 * down a chain of mbufs where each mbuf points to a span that
4405 * resides in the system page boundary. If the packet does
4406 * not cross page(s), the following is a no-op.
4408 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
4409 if ((m
= m_normalize(m
)) == NULL
)
4414 * If this is a TSO packet, make sure the interface still
4415 * advertise TSO capability.
4417 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
4423 ifp_inc_traffic_class_out(ifp
, m
);
4424 pktap_output(ifp
, proto_family
, m
, pre
, post
);
4427 * Count the number of elements in the mbuf chain
4429 if (tx_chain_len_count
) {
4430 dlil_count_chain_len(m
, &tx_chain_len_stats
);
4434 * Record timestamp; ifnet_enqueue() will use this info
4435 * rather than redoing the work. An optimization could
4436 * involve doing this just once at the top, if there are
4437 * no interface filters attached, but that's probably
4441 net_timernsec(&now
, &now_nsec
);
4442 (void) mbuf_set_timestamp(m
, now_nsec
, TRUE
);
4445 * Discard partial sum information if this packet originated
4446 * from another interface; the packet would already have the
4447 * final checksum and we shouldn't recompute it.
4449 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) &&
4450 (m
->m_pkthdr
.csum_flags
& (CSUM_DATA_VALID
|CSUM_PARTIAL
)) ==
4451 (CSUM_DATA_VALID
|CSUM_PARTIAL
)) {
4452 m
->m_pkthdr
.csum_flags
&= ~CSUM_TX_FLAGS
;
4453 m
->m_pkthdr
.csum_data
= 0;
4457 * Finally, call the driver.
4459 if (ifp
->if_eflags
& (IFEF_SENDLIST
| IFEF_ENQUEUE_MULTI
)) {
4460 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4461 flen
+= (m_pktlen(m
) - (pre
+ post
));
4462 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4465 send_tail
= &m
->m_nextpkt
;
4467 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4468 flen
= (m_pktlen(m
) - (pre
+ post
));
4469 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4473 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4475 retval
= (*ifp
->if_output_dlil
)(ifp
, m
);
4476 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4477 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
4478 adv
->code
= (retval
== EQFULL
?
4479 FADV_FLOW_CONTROLLED
:
4484 if (retval
== 0 && flen
> 0) {
4488 if (retval
!= 0 && dlil_verbose
) {
4489 printf("%s: output error on %s retval = %d\n",
4490 __func__
, if_name(ifp
),
4493 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
4496 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4501 packetlist
= packetlist
->m_nextpkt
;
4502 m
->m_nextpkt
= NULL
;
4504 } while (m
!= NULL
);
4506 if (send_head
!= NULL
) {
4507 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4509 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
4510 retval
= (*ifp
->if_output_dlil
)(ifp
, send_head
);
4511 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4513 adv
->code
= (retval
== EQFULL
?
4514 FADV_FLOW_CONTROLLED
:
4519 if (retval
== 0 && flen
> 0) {
4523 if (retval
!= 0 && dlil_verbose
) {
4524 printf("%s: output error on %s retval = %d\n",
4525 __func__
, if_name(ifp
), retval
);
4528 struct mbuf
*send_m
;
4530 VERIFY(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
);
4531 while (send_head
!= NULL
) {
4533 send_head
= send_m
->m_nextpkt
;
4534 send_m
->m_nextpkt
= NULL
;
4535 retval
= (*ifp
->if_output_dlil
)(ifp
, send_m
);
4536 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4538 adv
->code
= (retval
== EQFULL
?
4539 FADV_FLOW_CONTROLLED
:
4549 if (retval
!= 0 && dlil_verbose
) {
4550 printf("%s: output error on %s "
4552 __func__
, if_name(ifp
), retval
);
4560 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4563 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4567 ifp
->if_fbytes
+= fbytes
;
4569 ifp
->if_fpackets
+= fpkts
;
4571 if_proto_free(proto
);
4572 if (packetlist
) /* if any packets are left, clean up */
4573 mbuf_freem_list(packetlist
);
4574 if (retval
== EJUSTRETURN
)
4577 ifnet_decr_iorefcnt(ifp
);
4583 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
4586 struct ifnet_filter
*filter
;
4587 int retval
= EOPNOTSUPP
;
4590 if (ifp
== NULL
|| ioctl_code
== 0)
4593 /* Get an io ref count if the interface is attached */
4594 if (!ifnet_is_attached(ifp
, 1))
4595 return (EOPNOTSUPP
);
4598 * Run the interface filters first.
4599 * We want to run all filters before calling the protocol,
4600 * interface family, or interface.
4602 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4603 /* prevent filter list from changing in case we drop the lock */
4604 if_flt_monitor_busy(ifp
);
4605 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4606 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
4607 filter
->filt_protocol
== proto_fam
)) {
4608 lck_mtx_unlock(&ifp
->if_flt_lock
);
4610 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
4611 proto_fam
, ioctl_code
, ioctl_arg
);
4613 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4615 /* Only update retval if no one has handled the ioctl */
4616 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4617 if (result
== ENOTSUP
)
4618 result
= EOPNOTSUPP
;
4620 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
4621 /* we're done with the filter list */
4622 if_flt_monitor_unbusy(ifp
);
4623 lck_mtx_unlock(&ifp
->if_flt_lock
);
4629 /* we're done with the filter list */
4630 if_flt_monitor_unbusy(ifp
);
4631 lck_mtx_unlock(&ifp
->if_flt_lock
);
4633 /* Allow the protocol to handle the ioctl */
4634 if (proto_fam
!= 0) {
4635 struct if_proto
*proto
;
4637 /* callee holds a proto refcnt upon success */
4638 ifnet_lock_shared(ifp
);
4639 proto
= find_attached_proto(ifp
, proto_fam
);
4640 ifnet_lock_done(ifp
);
4641 if (proto
!= NULL
) {
4642 proto_media_ioctl ioctlp
=
4643 (proto
->proto_kpi
== kProtoKPI_v1
?
4644 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
4645 result
= EOPNOTSUPP
;
4647 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
4649 if_proto_free(proto
);
4651 /* Only update retval if no one has handled the ioctl */
4652 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4653 if (result
== ENOTSUP
)
4654 result
= EOPNOTSUPP
;
4656 if (retval
&& retval
!= EOPNOTSUPP
)
4662 /* retval is either 0 or EOPNOTSUPP */
4665 * Let the interface handle this ioctl.
4666 * If it returns EOPNOTSUPP, ignore that, we may have
4667 * already handled this in the protocol or family.
4670 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
4672 /* Only update retval if no one has handled the ioctl */
4673 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4674 if (result
== ENOTSUP
)
4675 result
= EOPNOTSUPP
;
4677 if (retval
&& retval
!= EOPNOTSUPP
) {
4683 if (retval
== EJUSTRETURN
)
4686 ifnet_decr_iorefcnt(ifp
);
4691 __private_extern__ errno_t
4692 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
4697 if (ifp
->if_set_bpf_tap
) {
4698 /* Get an io reference on the interface if it is attached */
4699 if (!ifnet_is_attached(ifp
, 1))
4701 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
4702 ifnet_decr_iorefcnt(ifp
);
4708 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
4709 struct sockaddr
*ll_addr
, size_t ll_len
)
4711 errno_t result
= EOPNOTSUPP
;
4712 struct if_proto
*proto
;
4713 const struct sockaddr
*verify
;
4714 proto_media_resolve_multi resolvep
;
4716 if (!ifnet_is_attached(ifp
, 1))
4719 bzero(ll_addr
, ll_len
);
4721 /* Call the protocol first; callee holds a proto refcnt upon success */
4722 ifnet_lock_shared(ifp
);
4723 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
4724 ifnet_lock_done(ifp
);
4725 if (proto
!= NULL
) {
4726 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
4727 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
4728 if (resolvep
!= NULL
)
4729 result
= resolvep(ifp
, proto_addr
,
4730 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
4731 if_proto_free(proto
);
4734 /* Let the interface verify the multicast address */
4735 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
4739 verify
= proto_addr
;
4740 result
= ifp
->if_check_multi(ifp
, verify
);
4743 ifnet_decr_iorefcnt(ifp
);
4747 __private_extern__ errno_t
4748 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
4749 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
4750 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
4752 struct if_proto
*proto
;
4755 /* callee holds a proto refcnt upon success */
4756 ifnet_lock_shared(ifp
);
4757 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
4758 ifnet_lock_done(ifp
);
4759 if (proto
== NULL
) {
4762 proto_media_send_arp arpp
;
4763 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4764 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
4770 arpstat
.txrequests
++;
4771 if (target_hw
!= NULL
)
4772 arpstat
.txurequests
++;
4775 arpstat
.txreplies
++;
4778 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
4779 target_hw
, target_proto
);
4781 if_proto_free(proto
);
4787 struct net_thread_marks
{ };
4788 static const struct net_thread_marks net_thread_marks_base
= { };
4790 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
4791 &net_thread_marks_base
;
4793 __private_extern__ net_thread_marks_t
4794 net_thread_marks_push(u_int32_t push
)
4796 static const char *const base
= (const void*)&net_thread_marks_base
;
4800 struct uthread
*uth
= get_bsdthread_info(current_thread());
4802 pop
= push
& ~uth
->uu_network_marks
;
4804 uth
->uu_network_marks
|= pop
;
4807 return ((net_thread_marks_t
)&base
[pop
]);
4810 __private_extern__ net_thread_marks_t
4811 net_thread_unmarks_push(u_int32_t unpush
)
4813 static const char *const base
= (const void*)&net_thread_marks_base
;
4814 u_int32_t unpop
= 0;
4817 struct uthread
*uth
= get_bsdthread_info(current_thread());
4819 unpop
= unpush
& uth
->uu_network_marks
;
4821 uth
->uu_network_marks
&= ~unpop
;
4824 return ((net_thread_marks_t
)&base
[unpop
]);
4827 __private_extern__
void
4828 net_thread_marks_pop(net_thread_marks_t popx
)
4830 static const char *const base
= (const void*)&net_thread_marks_base
;
4831 const ptrdiff_t pop
= (const char *)popx
- (const char *)base
;
4834 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4835 struct uthread
*uth
= get_bsdthread_info(current_thread());
4837 VERIFY((pop
& ones
) == pop
);
4838 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
4839 uth
->uu_network_marks
&= ~pop
;
4843 __private_extern__
void
4844 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
4846 static const char *const base
= (const void*)&net_thread_marks_base
;
4847 ptrdiff_t unpop
= (const char *)unpopx
- (const char *)base
;
4850 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4851 struct uthread
*uth
= get_bsdthread_info(current_thread());
4853 VERIFY((unpop
& ones
) == unpop
);
4854 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
4855 uth
->uu_network_marks
|= unpop
;
4859 __private_extern__ u_int32_t
4860 net_thread_is_marked(u_int32_t check
)
4863 struct uthread
*uth
= get_bsdthread_info(current_thread());
4864 return (uth
->uu_network_marks
& check
);
4870 __private_extern__ u_int32_t
4871 net_thread_is_unmarked(u_int32_t check
)
4874 struct uthread
*uth
= get_bsdthread_info(current_thread());
4875 return (~uth
->uu_network_marks
& check
);
4881 static __inline__
int
4882 _is_announcement(const struct sockaddr_in
* sender_sin
,
4883 const struct sockaddr_in
* target_sin
)
4885 if (sender_sin
== NULL
) {
4888 return (sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
);
4891 __private_extern__ errno_t
4892 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
*sender_hw
,
4893 const struct sockaddr
*sender_proto
, const struct sockaddr_dl
*target_hw
,
4894 const struct sockaddr
*target_proto0
, u_int32_t rtflags
)
4897 const struct sockaddr_in
* sender_sin
;
4898 const struct sockaddr_in
* target_sin
;
4899 struct sockaddr_inarp target_proto_sinarp
;
4900 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
4902 if (target_proto
== NULL
|| (sender_proto
!= NULL
&&
4903 sender_proto
->sa_family
!= target_proto
->sa_family
))
4907 * If the target is a (default) router, provide that
4908 * information to the send_arp callback routine.
4910 if (rtflags
& RTF_ROUTER
) {
4911 bcopy(target_proto
, &target_proto_sinarp
,
4912 sizeof (struct sockaddr_in
));
4913 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
4914 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
4918 * If this is an ARP request and the target IP is IPv4LL,
4919 * send the request on all interfaces. The exception is
4920 * an announcement, which must only appear on the specific
4923 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
4924 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
4925 if (target_proto
->sa_family
== AF_INET
&&
4926 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
4927 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
4928 !_is_announcement(target_sin
, sender_sin
)) {
4935 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
4936 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
4938 ifaddr_t source_hw
= NULL
;
4939 ifaddr_t source_ip
= NULL
;
4940 struct sockaddr_in source_ip_copy
;
4941 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
4944 * Only arp on interfaces marked for IPv4LL
4945 * ARPing. This may mean that we don't ARP on
4946 * the interface the subnet route points to.
4948 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
))
4951 /* Find the source IP address */
4952 ifnet_lock_shared(cur_ifp
);
4953 source_hw
= cur_ifp
->if_lladdr
;
4954 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
4956 IFA_LOCK(source_ip
);
4957 if (source_ip
->ifa_addr
!= NULL
&&
4958 source_ip
->ifa_addr
->sa_family
==
4960 /* Copy the source IP address */
4962 *(struct sockaddr_in
*)
4963 (void *)source_ip
->ifa_addr
;
4964 IFA_UNLOCK(source_ip
);
4967 IFA_UNLOCK(source_ip
);
4970 /* No IP Source, don't arp */
4971 if (source_ip
== NULL
) {
4972 ifnet_lock_done(cur_ifp
);
4976 IFA_ADDREF(source_hw
);
4977 ifnet_lock_done(cur_ifp
);
4980 new_result
= dlil_send_arp_internal(cur_ifp
,
4981 arpop
, (struct sockaddr_dl
*)(void *)
4982 source_hw
->ifa_addr
,
4983 (struct sockaddr
*)&source_ip_copy
, NULL
,
4986 IFA_REMREF(source_hw
);
4987 if (result
== ENOTSUP
) {
4988 result
= new_result
;
4991 ifnet_list_free(ifp_list
);
4994 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
4995 sender_proto
, target_hw
, target_proto
);
5002 * Caller must hold ifnet head lock.
5005 ifnet_lookup(struct ifnet
*ifp
)
5009 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
5010 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
5014 return (_ifp
!= NULL
);
5018 * Caller has to pass a non-zero refio argument to get a
5019 * IO reference count. This will prevent ifnet_detach from
5020 * being called when there are outstanding io reference counts.
5023 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
5027 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5028 if ((ret
= IF_FULLY_ATTACHED(ifp
))) {
5032 lck_mtx_unlock(&ifp
->if_ref_lock
);
5038 * Caller must ensure the interface is attached; the assumption is that
5039 * there is at least an outstanding IO reference count held already.
5040 * Most callers would call ifnet_is_attached() instead.
5043 ifnet_incr_iorefcnt(struct ifnet
*ifp
)
5045 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5046 VERIFY(IF_FULLY_ATTACHED(ifp
));
5047 VERIFY(ifp
->if_refio
> 0);
5049 lck_mtx_unlock(&ifp
->if_ref_lock
);
5053 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
5055 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5056 VERIFY(ifp
->if_refio
> 0);
5057 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
5061 * if there are no more outstanding io references, wakeup the
5062 * ifnet_detach thread if detaching flag is set.
5064 if (ifp
->if_refio
== 0 && (ifp
->if_refflags
& IFRF_DETACHING
))
5065 wakeup(&(ifp
->if_refio
));
5067 lck_mtx_unlock(&ifp
->if_ref_lock
);
5071 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
5073 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
5078 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
5079 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
5084 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
5085 tr
= dl_if_dbg
->dldbg_if_refhold
;
5087 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
5088 tr
= dl_if_dbg
->dldbg_if_refrele
;
5091 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
5092 ctrace_record(&tr
[idx
]);
5096 dlil_if_ref(struct ifnet
*ifp
)
5098 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5103 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
5104 ++dl_if
->dl_if_refcnt
;
5105 if (dl_if
->dl_if_refcnt
== 0) {
5106 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
5109 if (dl_if
->dl_if_trace
!= NULL
)
5110 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
5111 lck_mtx_unlock(&dl_if
->dl_if_lock
);
5117 dlil_if_free(struct ifnet
*ifp
)
5119 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5120 bool need_release
= FALSE
;
5125 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
5126 switch (dl_if
->dl_if_refcnt
) {
5128 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
5132 if ((ifp
->if_refflags
& IFRF_EMBRYONIC
) != 0) {
5133 need_release
= TRUE
;
5139 --dl_if
->dl_if_refcnt
;
5140 if (dl_if
->dl_if_trace
!= NULL
)
5141 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
5142 lck_mtx_unlock(&dl_if
->dl_if_lock
);
5144 dlil_if_release(ifp
);
5150 dlil_attach_protocol_internal(struct if_proto
*proto
,
5151 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
,
5152 uint32_t * proto_count
)
5154 struct kev_dl_proto_data ev_pr_data
;
5155 struct ifnet
*ifp
= proto
->ifp
;
5157 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
5158 struct if_proto
*prev_proto
;
5159 struct if_proto
*_proto
;
5161 /* callee holds a proto refcnt upon success */
5162 ifnet_lock_exclusive(ifp
);
5163 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
5164 if (_proto
!= NULL
) {
5165 ifnet_lock_done(ifp
);
5166 if_proto_free(_proto
);
5171 * Call family module add_proto routine so it can refine the
5172 * demux descriptors as it wishes.
5174 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
5177 ifnet_lock_done(ifp
);
5182 * Insert the protocol in the hash
5184 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
5185 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
)
5186 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
5188 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
5190 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
5193 /* hold a proto refcnt for attach */
5194 if_proto_ref(proto
);
5197 * The reserved field carries the number of protocol still attached
5198 * (subject to change)
5200 ev_pr_data
.proto_family
= proto
->protocol_family
;
5201 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
5202 ifnet_lock_done(ifp
);
5204 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
5205 (struct net_event_data
*)&ev_pr_data
,
5206 sizeof (struct kev_dl_proto_data
));
5207 if (proto_count
!= NULL
) {
5208 *proto_count
= ev_pr_data
.proto_remaining_count
;
5214 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
5215 const struct ifnet_attach_proto_param
*proto_details
)
5218 struct if_proto
*ifproto
= NULL
;
5219 uint32_t proto_count
= 0;
5221 ifnet_head_lock_shared();
5222 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5226 /* Check that the interface is in the global list */
5227 if (!ifnet_lookup(ifp
)) {
5232 ifproto
= zalloc(dlif_proto_zone
);
5233 if (ifproto
== NULL
) {
5237 bzero(ifproto
, dlif_proto_size
);
5239 /* refcnt held above during lookup */
5241 ifproto
->protocol_family
= protocol
;
5242 ifproto
->proto_kpi
= kProtoKPI_v1
;
5243 ifproto
->kpi
.v1
.input
= proto_details
->input
;
5244 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
5245 ifproto
->kpi
.v1
.event
= proto_details
->event
;
5246 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
5247 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
5248 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
5249 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
5251 retval
= dlil_attach_protocol_internal(ifproto
,
5252 proto_details
->demux_list
, proto_details
->demux_count
,
5256 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5257 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5258 if_name(ifp
), protocol
, retval
);
5261 printf("%s: attached v1 protocol %d (count = %d)\n",
5263 protocol
, proto_count
);
5268 } else if (ifproto
!= NULL
) {
5269 zfree(dlif_proto_zone
, ifproto
);
5275 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
5276 const struct ifnet_attach_proto_param_v2
*proto_details
)
5279 struct if_proto
*ifproto
= NULL
;
5280 uint32_t proto_count
= 0;
5282 ifnet_head_lock_shared();
5283 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5287 /* Check that the interface is in the global list */
5288 if (!ifnet_lookup(ifp
)) {
5293 ifproto
= zalloc(dlif_proto_zone
);
5294 if (ifproto
== NULL
) {
5298 bzero(ifproto
, sizeof(*ifproto
));
5300 /* refcnt held above during lookup */
5302 ifproto
->protocol_family
= protocol
;
5303 ifproto
->proto_kpi
= kProtoKPI_v2
;
5304 ifproto
->kpi
.v2
.input
= proto_details
->input
;
5305 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
5306 ifproto
->kpi
.v2
.event
= proto_details
->event
;
5307 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
5308 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
5309 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
5310 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
5312 retval
= dlil_attach_protocol_internal(ifproto
,
5313 proto_details
->demux_list
, proto_details
->demux_count
,
5317 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5318 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5319 if_name(ifp
), protocol
, retval
);
5322 printf("%s: attached v2 protocol %d (count = %d)\n",
5324 protocol
, proto_count
);
5329 } else if (ifproto
!= NULL
) {
5330 zfree(dlif_proto_zone
, ifproto
);
5336 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
5338 struct if_proto
*proto
= NULL
;
5341 if (ifp
== NULL
|| proto_family
== 0) {
5346 ifnet_lock_exclusive(ifp
);
5347 /* callee holds a proto refcnt upon success */
5348 proto
= find_attached_proto(ifp
, proto_family
);
5349 if (proto
== NULL
) {
5351 ifnet_lock_done(ifp
);
5355 /* call family module del_proto */
5356 if (ifp
->if_del_proto
)
5357 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
5359 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
5360 proto
, if_proto
, next_hash
);
5362 if (proto
->proto_kpi
== kProtoKPI_v1
) {
5363 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
5364 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
5365 proto
->kpi
.v1
.event
= ifproto_media_event
;
5366 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
5367 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
5368 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
5370 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
5371 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
5372 proto
->kpi
.v2
.event
= ifproto_media_event
;
5373 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
5374 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
5375 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
5377 proto
->detached
= 1;
5378 ifnet_lock_done(ifp
);
5381 printf("%s: detached %s protocol %d\n", if_name(ifp
),
5382 (proto
->proto_kpi
== kProtoKPI_v1
) ?
5383 "v1" : "v2", proto_family
);
5386 /* release proto refcnt held during protocol attach */
5387 if_proto_free(proto
);
5390 * Release proto refcnt held during lookup; the rest of
5391 * protocol detach steps will happen when the last proto
5392 * reference is released.
5394 if_proto_free(proto
);
5402 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
5403 struct mbuf
*packet
, char *header
)
5405 #pragma unused(ifp, protocol, packet, header)
5410 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
5411 struct mbuf
*packet
)
5413 #pragma unused(ifp, protocol, packet)
5419 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
5420 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
5421 char *link_layer_dest
)
5423 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5429 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
5430 const struct kev_msg
*event
)
5432 #pragma unused(ifp, protocol, event)
5436 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
5437 unsigned long command
, void *argument
)
5439 #pragma unused(ifp, protocol, command, argument)
5444 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
5445 struct sockaddr_dl
*out_ll
, size_t ll_len
)
5447 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5452 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
5453 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
5454 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
5456 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5460 extern int if_next_index(void);
5461 extern int tcp_ecn_outbound
;
5464 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
5466 struct ifnet
*tmp_if
;
5468 struct if_data_internal if_data_saved
;
5469 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5470 struct dlil_threading_info
*dl_inp
;
5471 u_int32_t sflags
= 0;
5478 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5479 * prevent the interface from being configured while it is
5480 * embryonic, as ifnet_head_lock is dropped and reacquired
5481 * below prior to marking the ifnet with IFRF_ATTACHED.
5484 ifnet_head_lock_exclusive();
5485 /* Verify we aren't already on the list */
5486 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
5487 if (tmp_if
== ifp
) {
5494 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5495 if (!(ifp
->if_refflags
& IFRF_EMBRYONIC
)) {
5496 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
5500 lck_mtx_unlock(&ifp
->if_ref_lock
);
5502 ifnet_lock_exclusive(ifp
);
5505 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
5506 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
5508 if (ll_addr
!= NULL
) {
5509 if (ifp
->if_addrlen
== 0) {
5510 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
5511 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
5512 ifnet_lock_done(ifp
);
5520 * Allow interfaces without protocol families to attach
5521 * only if they have the necessary fields filled out.
5523 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
5524 DLIL_PRINTF("%s: Attempt to attach interface without "
5525 "family module - %d\n", __func__
, ifp
->if_family
);
5526 ifnet_lock_done(ifp
);
5532 /* Allocate protocol hash table */
5533 VERIFY(ifp
->if_proto_hash
== NULL
);
5534 ifp
->if_proto_hash
= zalloc(dlif_phash_zone
);
5535 if (ifp
->if_proto_hash
== NULL
) {
5536 ifnet_lock_done(ifp
);
5541 bzero(ifp
->if_proto_hash
, dlif_phash_size
);
5543 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5544 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
5545 TAILQ_INIT(&ifp
->if_flt_head
);
5546 VERIFY(ifp
->if_flt_busy
== 0);
5547 VERIFY(ifp
->if_flt_waiters
== 0);
5548 lck_mtx_unlock(&ifp
->if_flt_lock
);
5550 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
5551 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
5552 LIST_INIT(&ifp
->if_multiaddrs
);
5555 VERIFY(ifp
->if_allhostsinm
== NULL
);
5556 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
5557 TAILQ_INIT(&ifp
->if_addrhead
);
5559 if (ifp
->if_index
== 0) {
5560 int idx
= if_next_index();
5564 ifnet_lock_done(ifp
);
5569 ifp
->if_index
= idx
;
5571 /* There should not be anything occupying this slot */
5572 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
5574 /* allocate (if needed) and initialize a link address */
5575 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
5577 ifnet_lock_done(ifp
);
5583 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
5584 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
5586 /* make this address the first on the list */
5588 /* hold a reference for ifnet_addrs[] */
5589 IFA_ADDREF_LOCKED(ifa
);
5590 /* if_attach_link_ifa() holds a reference for ifa_link */
5591 if_attach_link_ifa(ifp
, ifa
);
5595 mac_ifnet_label_associate(ifp
);
5598 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
5599 ifindex2ifnet
[ifp
->if_index
] = ifp
;
5601 /* Hold a reference to the underlying dlil_ifnet */
5602 ifnet_reference(ifp
);
5604 /* Clear stats (save and restore other fields that we care) */
5605 if_data_saved
= ifp
->if_data
;
5606 bzero(&ifp
->if_data
, sizeof (ifp
->if_data
));
5607 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
5608 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
5609 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
5610 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
5611 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
5612 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
5613 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
5614 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
5615 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
5616 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
5617 ifnet_touch_lastchange(ifp
);
5619 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
5620 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
||
5621 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_FQ_CODEL
);
5623 /* By default, use SFB and enable flow advisory */
5624 sflags
= PKTSCHEDF_QALG_SFB
;
5626 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
5628 if (if_delaybased_queue
)
5629 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
5631 if (ifp
->if_output_sched_model
==
5632 IFNET_SCHED_MODEL_DRIVER_MANAGED
)
5633 sflags
|= PKTSCHEDF_QALG_DRIVER_MANAGED
;
5635 /* Initialize transmit queue(s) */
5636 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5638 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5639 "err=%d", __func__
, ifp
, err
);
5643 /* Sanity checks on the input thread storage */
5644 dl_inp
= &dl_if
->dl_if_inpstorage
;
5645 bzero(&dl_inp
->stats
, sizeof (dl_inp
->stats
));
5646 VERIFY(dl_inp
->input_waiting
== 0);
5647 VERIFY(dl_inp
->wtot
== 0);
5648 VERIFY(dl_inp
->ifp
== NULL
);
5649 VERIFY(qhead(&dl_inp
->rcvq_pkts
) == NULL
&& qempty(&dl_inp
->rcvq_pkts
));
5650 VERIFY(qlimit(&dl_inp
->rcvq_pkts
) == 0);
5651 VERIFY(!dl_inp
->net_affinity
);
5652 VERIFY(ifp
->if_inp
== NULL
);
5653 VERIFY(dl_inp
->input_thr
== THREAD_NULL
);
5654 VERIFY(dl_inp
->wloop_thr
== THREAD_NULL
);
5655 VERIFY(dl_inp
->poll_thr
== THREAD_NULL
);
5656 VERIFY(dl_inp
->tag
== 0);
5657 VERIFY(dl_inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
5658 bzero(&dl_inp
->tstats
, sizeof (dl_inp
->tstats
));
5659 bzero(&dl_inp
->pstats
, sizeof (dl_inp
->pstats
));
5660 bzero(&dl_inp
->sstats
, sizeof (dl_inp
->sstats
));
5661 #if IFNET_INPUT_SANITY_CHK
5662 VERIFY(dl_inp
->input_mbuf_cnt
== 0);
5663 #endif /* IFNET_INPUT_SANITY_CHK */
5666 * A specific DLIL input thread is created per Ethernet/cellular
5667 * interface or for an interface which supports opportunistic
5668 * input polling. Pseudo interfaces or other types of interfaces
5669 * use the main input thread instead.
5671 if ((net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) ||
5672 ifp
->if_type
== IFT_ETHER
|| ifp
->if_type
== IFT_CELLULAR
) {
5673 ifp
->if_inp
= dl_inp
;
5674 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
);
5676 panic_plain("%s: ifp=%p couldn't get an input thread; "
5677 "err=%d", __func__
, ifp
, err
);
5682 if (ifp
->if_inp
!= NULL
&& ifp
->if_inp
->input_mit_tcall
== NULL
) {
5683 ifp
->if_inp
->input_mit_tcall
=
5684 thread_call_allocate_with_priority(dlil_mit_tcall_fn
,
5685 ifp
, THREAD_CALL_PRIORITY_KERNEL
);
5689 * If the driver supports the new transmit model, calculate flow hash
5690 * and create a workloop starter thread to invoke the if_start callback
5691 * where the packets may be dequeued and transmitted.
5693 if (ifp
->if_eflags
& IFEF_TXSTART
) {
5694 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
5695 VERIFY(ifp
->if_flowhash
!= 0);
5696 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
5698 ifnet_set_start_cycle(ifp
, NULL
);
5699 ifp
->if_start_active
= 0;
5700 ifp
->if_start_req
= 0;
5701 ifp
->if_start_flags
= 0;
5702 VERIFY(ifp
->if_start
!= NULL
);
5703 if ((err
= kernel_thread_start(ifnet_start_thread_fn
,
5704 ifp
, &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
5706 "ifp=%p couldn't get a start thread; "
5707 "err=%d", __func__
, ifp
, err
);
5710 ml_thread_policy(ifp
->if_start_thread
, MACHINE_GROUP
,
5711 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5713 ifp
->if_flowhash
= 0;
5717 * If the driver supports the new receive model, create a poller
5718 * thread to invoke if_input_poll callback where the packets may
5719 * be dequeued from the driver and processed for reception.
5721 if (ifp
->if_eflags
& IFEF_RXPOLL
) {
5722 VERIFY(ifp
->if_input_poll
!= NULL
);
5723 VERIFY(ifp
->if_input_ctl
!= NULL
);
5724 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
5726 ifnet_set_poll_cycle(ifp
, NULL
);
5727 ifp
->if_poll_update
= 0;
5728 ifp
->if_poll_active
= 0;
5729 ifp
->if_poll_req
= 0;
5730 if ((err
= kernel_thread_start(ifnet_poll_thread_fn
, ifp
,
5731 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
5732 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5733 "err=%d", __func__
, ifp
, err
);
5736 ml_thread_policy(ifp
->if_poll_thread
, MACHINE_GROUP
,
5737 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5740 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
5741 VERIFY(ifp
->if_desc
.ifd_len
== 0);
5742 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
5744 /* Record attach PC stacktrace */
5745 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
5747 ifp
->if_updatemcasts
= 0;
5748 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
5749 struct ifmultiaddr
*ifma
;
5750 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
5752 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
5753 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
)
5754 ifp
->if_updatemcasts
++;
5758 printf("%s: attached with %d suspended link-layer multicast "
5759 "membership(s)\n", if_name(ifp
),
5760 ifp
->if_updatemcasts
);
5763 /* Clear logging parameters */
5764 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
5766 /* Clear foreground/realtime activity timestamps */
5767 ifp
->if_fg_sendts
= 0;
5768 ifp
->if_rt_sendts
= 0;
5770 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
5771 VERIFY(ifp
->if_delegated
.type
== 0);
5772 VERIFY(ifp
->if_delegated
.family
== 0);
5773 VERIFY(ifp
->if_delegated
.subfamily
== 0);
5774 VERIFY(ifp
->if_delegated
.expensive
== 0);
5776 VERIFY(ifp
->if_agentids
== NULL
);
5777 VERIFY(ifp
->if_agentcount
== 0);
5779 /* Reset interface state */
5780 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
5781 ifp
->if_interface_state
.valid_bitmask
|=
5782 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
5783 ifp
->if_interface_state
.interface_availability
=
5784 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
;
5786 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5787 if (ifp
== lo_ifp
) {
5788 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_GOOD
;
5789 ifp
->if_interface_state
.valid_bitmask
|=
5790 IF_INTERFACE_STATE_LQM_STATE_VALID
;
5792 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_UNKNOWN
;
5796 * Enable ECN capability on this interface depending on the
5797 * value of ECN global setting
5799 if (tcp_ecn_outbound
== 2 && !IFNET_IS_CELLULAR(ifp
)) {
5800 ifp
->if_eflags
|= IFEF_ECN_ENABLE
;
5801 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
5805 * Built-in Cyclops always on policy for WiFi infra
5807 if (IFNET_IS_WIFI_INFRA(ifp
) && net_qos_policy_wifi_enabled
!= 0) {
5810 error
= if_set_qosmarking_mode(ifp
,
5811 IFRTYPE_QOSMARKING_FASTLANE
);
5813 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5814 __func__
, ifp
->if_xname
, error
);
5816 ifp
->if_eflags
|= IFEF_QOSMARKING_ENABLED
;
5817 #if (DEVELOPMENT || DEBUG)
5818 printf("%s fastlane enabled on %s\n",
5819 __func__
, ifp
->if_xname
);
5820 #endif /* (DEVELOPMENT || DEBUG) */
5824 ifnet_lock_done(ifp
);
5828 lck_mtx_lock(&ifp
->if_cached_route_lock
);
5829 /* Enable forwarding cached route */
5830 ifp
->if_fwd_cacheok
= 1;
5831 /* Clean up any existing cached routes */
5832 ROUTE_RELEASE(&ifp
->if_fwd_route
);
5833 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
5834 ROUTE_RELEASE(&ifp
->if_src_route
);
5835 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
5836 ROUTE_RELEASE(&ifp
->if_src_route6
);
5837 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
5838 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
5840 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5843 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5844 * and trees; do this before the ifnet is marked as attached.
5845 * The ifnet keeps the reference to the info structures even after
5846 * the ifnet is detached, since the network-layer records still
5847 * refer to the info structures even after that. This also
5848 * makes it possible for them to still function after the ifnet
5849 * is recycled or reattached.
5852 if (IGMP_IFINFO(ifp
) == NULL
) {
5853 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, M_WAITOK
);
5854 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
5856 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
5857 igmp_domifreattach(IGMP_IFINFO(ifp
));
5861 if (MLD_IFINFO(ifp
) == NULL
) {
5862 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, M_WAITOK
);
5863 VERIFY(MLD_IFINFO(ifp
) != NULL
);
5865 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
5866 mld_domifreattach(MLD_IFINFO(ifp
));
5870 VERIFY(ifp
->if_data_threshold
== 0);
5871 VERIFY(ifp
->if_dt_tcall
!= NULL
);
5874 * Finally, mark this ifnet as attached.
5876 lck_mtx_lock(rnh_lock
);
5877 ifnet_lock_exclusive(ifp
);
5878 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5879 ifp
->if_refflags
= IFRF_ATTACHED
; /* clears embryonic */
5880 lck_mtx_unlock(&ifp
->if_ref_lock
);
5882 /* boot-args override; enable idle notification */
5883 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
5886 /* apply previous request(s) to set the idle flags, if any */
5887 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
5888 ifp
->if_idle_new_flags_mask
);
5891 ifnet_lock_done(ifp
);
5892 lck_mtx_unlock(rnh_lock
);
5897 * Attach packet filter to this interface, if enabled.
5899 pf_ifnet_hook(ifp
, 1);
5902 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
5905 printf("%s: attached%s\n", if_name(ifp
),
5906 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
5913 * Prepare the storage for the first/permanent link address, which must
5914 * must have the same lifetime as the ifnet itself. Although the link
5915 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5916 * its location in memory must never change as it may still be referred
5917 * to by some parts of the system afterwards (unfortunate implementation
5918 * artifacts inherited from BSD.)
5920 * Caller must hold ifnet lock as writer.
5922 static struct ifaddr
*
5923 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
5925 struct ifaddr
*ifa
, *oifa
;
5926 struct sockaddr_dl
*asdl
, *msdl
;
5927 char workbuf
[IFNAMSIZ
*2];
5928 int namelen
, masklen
, socksize
;
5929 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5931 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
5932 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
5934 namelen
= snprintf(workbuf
, sizeof (workbuf
), "%s",
5936 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0])
5937 + ((namelen
> 0) ? namelen
: 0);
5938 socksize
= masklen
+ ifp
->if_addrlen
;
5939 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
5940 if ((u_int32_t
)socksize
< sizeof (struct sockaddr_dl
))
5941 socksize
= sizeof(struct sockaddr_dl
);
5942 socksize
= ROUNDUP(socksize
);
5945 ifa
= ifp
->if_lladdr
;
5946 if (socksize
> DLIL_SDLMAXLEN
||
5947 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
5949 * Rare, but in the event that the link address requires
5950 * more storage space than DLIL_SDLMAXLEN, allocate the
5951 * largest possible storages for address and mask, such
5952 * that we can reuse the same space when if_addrlen grows.
5953 * This same space will be used when if_addrlen shrinks.
5955 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
5956 int ifasize
= sizeof (*ifa
) + 2 * SOCK_MAXADDRLEN
;
5957 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
5961 /* Don't set IFD_ALLOC, as this is permanent */
5962 ifa
->ifa_debug
= IFD_LINK
;
5965 /* address and mask sockaddr_dl locations */
5966 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
5967 bzero(asdl
, SOCK_MAXADDRLEN
);
5968 msdl
= (struct sockaddr_dl
*)(void *)
5969 ((char *)asdl
+ SOCK_MAXADDRLEN
);
5970 bzero(msdl
, SOCK_MAXADDRLEN
);
5972 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
5974 * Use the storage areas for address and mask within the
5975 * dlil_ifnet structure. This is the most common case.
5978 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
5980 /* Don't set IFD_ALLOC, as this is permanent */
5981 ifa
->ifa_debug
= IFD_LINK
;
5984 /* address and mask sockaddr_dl locations */
5985 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
5986 bzero(asdl
, sizeof (dl_if
->dl_if_lladdr
.asdl
));
5987 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
5988 bzero(msdl
, sizeof (dl_if
->dl_if_lladdr
.msdl
));
5991 /* hold a permanent reference for the ifnet itself */
5992 IFA_ADDREF_LOCKED(ifa
);
5993 oifa
= ifp
->if_lladdr
;
5994 ifp
->if_lladdr
= ifa
;
5996 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
5998 ifa
->ifa_rtrequest
= link_rtrequest
;
5999 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
6000 asdl
->sdl_len
= socksize
;
6001 asdl
->sdl_family
= AF_LINK
;
6003 bcopy(workbuf
, asdl
->sdl_data
, min(namelen
,
6004 sizeof (asdl
->sdl_data
)));
6005 asdl
->sdl_nlen
= namelen
;
6009 asdl
->sdl_index
= ifp
->if_index
;
6010 asdl
->sdl_type
= ifp
->if_type
;
6011 if (ll_addr
!= NULL
) {
6012 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
6013 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
6017 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
6018 msdl
->sdl_len
= masklen
;
6020 msdl
->sdl_data
[--namelen
] = 0xff;
6030 if_purgeaddrs(struct ifnet
*ifp
)
6036 in6_purgeaddrs(ifp
);
6041 ifnet_detach(ifnet_t ifp
)
6043 struct ifnet
*delegated_ifp
;
6044 struct nd_ifinfo
*ndi
= NULL
;
6049 ndi
= ND_IFINFO(ifp
);
6051 ndi
->cga_initialized
= FALSE
;
6053 lck_mtx_lock(rnh_lock
);
6054 ifnet_head_lock_exclusive();
6055 ifnet_lock_exclusive(ifp
);
6058 * Check to see if this interface has previously triggered
6059 * aggressive protocol draining; if so, decrement the global
6060 * refcnt and clear PR_AGGDRAIN on the route domain if
6061 * there are no more of such an interface around.
6063 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
6065 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6066 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
6067 lck_mtx_unlock(&ifp
->if_ref_lock
);
6068 ifnet_lock_done(ifp
);
6070 lck_mtx_unlock(rnh_lock
);
6072 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
6073 /* Interface has already been detached */
6074 lck_mtx_unlock(&ifp
->if_ref_lock
);
6075 ifnet_lock_done(ifp
);
6077 lck_mtx_unlock(rnh_lock
);
6080 VERIFY(!(ifp
->if_refflags
& IFRF_EMBRYONIC
));
6081 /* Indicate this interface is being detached */
6082 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
6083 ifp
->if_refflags
|= IFRF_DETACHING
;
6084 lck_mtx_unlock(&ifp
->if_ref_lock
);
6087 printf("%s: detaching\n", if_name(ifp
));
6090 /* clean up flow control entry object if there's any */
6091 if (ifp
->if_eflags
& IFEF_TXSTART
) {
6092 ifnet_flowadv(ifp
->if_flowhash
);
6095 /* Reset ECN enable/disable flags */
6096 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
6097 ifp
->if_eflags
&= ~IFEF_ECN_ENABLE
;
6100 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6101 * no longer be visible during lookups from this point.
6103 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
6104 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
6105 ifp
->if_link
.tqe_next
= NULL
;
6106 ifp
->if_link
.tqe_prev
= NULL
;
6107 if (ifp
->if_ordered_link
.tqe_next
!= NULL
||
6108 ifp
->if_ordered_link
.tqe_prev
!= NULL
) {
6109 ifnet_remove_from_ordered_list(ifp
);
6111 ifindex2ifnet
[ifp
->if_index
] = NULL
;
6113 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6114 ifp
->if_eflags
&= ~(IFEF_IPV4_ROUTER
| IFEF_IPV6_ROUTER
);
6116 /* Record detach PC stacktrace */
6117 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
6119 /* Clear logging parameters */
6120 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
6122 /* Clear delegated interface info (reference released below) */
6123 delegated_ifp
= ifp
->if_delegated
.ifp
;
6124 bzero(&ifp
->if_delegated
, sizeof (ifp
->if_delegated
));
6126 /* Reset interface state */
6127 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
6129 ifnet_lock_done(ifp
);
6131 lck_mtx_unlock(rnh_lock
);
6134 /* Release reference held on the delegated interface */
6135 if (delegated_ifp
!= NULL
)
6136 ifnet_release(delegated_ifp
);
6138 /* Reset Link Quality Metric (unless loopback [lo0]) */
6140 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
, 0);
6142 /* Reset TCP local statistics */
6143 if (ifp
->if_tcp_stat
!= NULL
)
6144 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
6146 /* Reset UDP local statistics */
6147 if (ifp
->if_udp_stat
!= NULL
)
6148 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
6150 /* Reset ifnet IPv4 stats */
6151 if (ifp
->if_ipv4_stat
!= NULL
)
6152 bzero(ifp
->if_ipv4_stat
, sizeof(*ifp
->if_ipv4_stat
));
6154 /* Reset ifnet IPv6 stats */
6155 if (ifp
->if_ipv6_stat
!= NULL
)
6156 bzero(ifp
->if_ipv6_stat
, sizeof(*ifp
->if_ipv6_stat
));
6158 /* Release memory held for interface link status report */
6159 if (ifp
->if_link_status
!= NULL
) {
6160 FREE(ifp
->if_link_status
, M_TEMP
);
6161 ifp
->if_link_status
= NULL
;
6164 /* Clear agent IDs */
6165 if (ifp
->if_agentids
!= NULL
) {
6166 FREE(ifp
->if_agentids
, M_NETAGENT
);
6167 ifp
->if_agentids
= NULL
;
6169 ifp
->if_agentcount
= 0;
6172 /* Let BPF know we're detaching */
6175 /* Mark the interface as DOWN */
6178 /* Disable forwarding cached route */
6179 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6180 ifp
->if_fwd_cacheok
= 0;
6181 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6183 /* Disable data threshold and wait for any pending event posting */
6184 ifp
->if_data_threshold
= 0;
6185 VERIFY(ifp
->if_dt_tcall
!= NULL
);
6186 (void) thread_call_cancel_wait(ifp
->if_dt_tcall
);
6189 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6190 * references to the info structures and leave them attached to
6194 igmp_domifdetach(ifp
);
6197 mld_domifdetach(ifp
);
6200 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
6202 /* Let worker thread take care of the rest, to avoid reentrancy */
6204 ifnet_detaching_enqueue(ifp
);
6211 ifnet_detaching_enqueue(struct ifnet
*ifp
)
6213 dlil_if_lock_assert();
6215 ++ifnet_detaching_cnt
;
6216 VERIFY(ifnet_detaching_cnt
!= 0);
6217 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6218 wakeup((caddr_t
)&ifnet_delayed_run
);
6221 static struct ifnet
*
6222 ifnet_detaching_dequeue(void)
6226 dlil_if_lock_assert();
6228 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
6229 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
6231 VERIFY(ifnet_detaching_cnt
!= 0);
6232 --ifnet_detaching_cnt
;
6233 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6234 ifp
->if_detaching_link
.tqe_next
= NULL
;
6235 ifp
->if_detaching_link
.tqe_prev
= NULL
;
6241 ifnet_detacher_thread_cont(int err
)
6247 dlil_if_lock_assert();
6248 while (ifnet_detaching_cnt
== 0) {
6249 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6250 (PZERO
- 1), "ifnet_detacher_cont", 0,
6251 ifnet_detacher_thread_cont
);
6255 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
6257 /* Take care of detaching ifnet */
6258 ifp
= ifnet_detaching_dequeue();
6261 ifnet_detach_final(ifp
);
6268 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
6270 #pragma unused(v, w)
6272 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6273 (PZERO
- 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont
);
6275 * msleep0() shouldn't have returned as PCATCH was not set;
6276 * therefore assert in this case.
6283 ifnet_detach_final(struct ifnet
*ifp
)
6285 struct ifnet_filter
*filter
, *filter_next
;
6286 struct ifnet_filter_head fhead
;
6287 struct dlil_threading_info
*inp
;
6289 ifnet_detached_func if_free
;
6292 lck_mtx_lock(&ifp
->if_ref_lock
);
6293 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6294 panic("%s: flags mismatch (detaching not set) ifp=%p",
6300 * Wait until the existing IO references get released
6301 * before we proceed with ifnet_detach. This is not a
6302 * common case, so block without using a continuation.
6304 while (ifp
->if_refio
> 0) {
6305 printf("%s: Waiting for IO references on %s interface "
6306 "to be released\n", __func__
, if_name(ifp
));
6307 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
6308 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
6310 lck_mtx_unlock(&ifp
->if_ref_lock
);
6312 /* Drain and destroy send queue */
6313 ifclassq_teardown(ifp
);
6315 /* Detach interface filters */
6316 lck_mtx_lock(&ifp
->if_flt_lock
);
6317 if_flt_monitor_enter(ifp
);
6319 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
6320 fhead
= ifp
->if_flt_head
;
6321 TAILQ_INIT(&ifp
->if_flt_head
);
6323 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
6324 filter_next
= TAILQ_NEXT(filter
, filt_next
);
6325 lck_mtx_unlock(&ifp
->if_flt_lock
);
6327 dlil_detach_filter_internal(filter
, 1);
6328 lck_mtx_lock(&ifp
->if_flt_lock
);
6330 if_flt_monitor_leave(ifp
);
6331 lck_mtx_unlock(&ifp
->if_flt_lock
);
6333 /* Tell upper layers to drop their network addresses */
6336 ifnet_lock_exclusive(ifp
);
6338 /* Uplumb all protocols */
6339 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
6340 struct if_proto
*proto
;
6342 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6343 while (proto
!= NULL
) {
6344 protocol_family_t family
= proto
->protocol_family
;
6345 ifnet_lock_done(ifp
);
6346 proto_unplumb(family
, ifp
);
6347 ifnet_lock_exclusive(ifp
);
6348 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6350 /* There should not be any protocols left */
6351 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
6353 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
6354 ifp
->if_proto_hash
= NULL
;
6356 /* Detach (permanent) link address from if_addrhead */
6357 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
6358 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
6360 if_detach_link_ifa(ifp
, ifa
);
6363 /* Remove (permanent) link address from ifnet_addrs[] */
6365 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
6367 /* This interface should not be on {ifnet_head,detaching} */
6368 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
6369 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
6370 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
6371 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
6372 VERIFY(ifp
->if_ordered_link
.tqe_next
== NULL
);
6373 VERIFY(ifp
->if_ordered_link
.tqe_prev
== NULL
);
6375 /* The slot should have been emptied */
6376 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
6378 /* There should not be any addresses left */
6379 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
6382 * Signal the starter thread to terminate itself.
6384 if (ifp
->if_start_thread
!= THREAD_NULL
) {
6385 lck_mtx_lock_spin(&ifp
->if_start_lock
);
6386 ifp
->if_start_flags
= 0;
6387 ifp
->if_start_thread
= THREAD_NULL
;
6388 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
6389 lck_mtx_unlock(&ifp
->if_start_lock
);
6393 * Signal the poller thread to terminate itself.
6395 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
6396 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
6397 ifp
->if_poll_thread
= THREAD_NULL
;
6398 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
6399 lck_mtx_unlock(&ifp
->if_poll_lock
);
6403 * If thread affinity was set for the workloop thread, we will need
6404 * to tear down the affinity and release the extra reference count
6405 * taken at attach time. Does not apply to lo0 or other interfaces
6406 * without dedicated input threads.
6408 if ((inp
= ifp
->if_inp
) != NULL
) {
6409 VERIFY(inp
!= dlil_main_input_thread
);
6411 if (inp
->net_affinity
) {
6412 struct thread
*tp
, *wtp
, *ptp
;
6414 lck_mtx_lock_spin(&inp
->input_lck
);
6415 wtp
= inp
->wloop_thr
;
6416 inp
->wloop_thr
= THREAD_NULL
;
6417 ptp
= inp
->poll_thr
;
6418 inp
->poll_thr
= THREAD_NULL
;
6419 tp
= inp
->input_thr
; /* don't nullify now */
6421 inp
->net_affinity
= FALSE
;
6422 lck_mtx_unlock(&inp
->input_lck
);
6424 /* Tear down poll thread affinity */
6426 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
6427 (void) dlil_affinity_set(ptp
,
6428 THREAD_AFFINITY_TAG_NULL
);
6429 thread_deallocate(ptp
);
6432 /* Tear down workloop thread affinity */
6434 (void) dlil_affinity_set(wtp
,
6435 THREAD_AFFINITY_TAG_NULL
);
6436 thread_deallocate(wtp
);
6439 /* Tear down DLIL input thread affinity */
6440 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
6441 thread_deallocate(tp
);
6444 /* disassociate ifp DLIL input thread */
6447 /* tell the input thread to terminate */
6448 lck_mtx_lock_spin(&inp
->input_lck
);
6449 inp
->input_waiting
|= DLIL_INPUT_TERMINATE
;
6450 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
6451 wakeup_one((caddr_t
)&inp
->input_waiting
);
6453 lck_mtx_unlock(&inp
->input_lck
);
6454 ifnet_lock_done(ifp
);
6456 /* wait for the input thread to terminate */
6457 lck_mtx_lock_spin(&inp
->input_lck
);
6458 while ((inp
->input_waiting
& DLIL_INPUT_TERMINATE_COMPLETE
)
6460 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
6461 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
6463 lck_mtx_unlock(&inp
->input_lck
);
6464 ifnet_lock_exclusive(ifp
);
6466 /* clean-up input thread state */
6467 dlil_clean_threading_info(inp
);
6471 /* The driver might unload, so point these to ourselves */
6472 if_free
= ifp
->if_free
;
6473 ifp
->if_output_dlil
= ifp_if_output
;
6474 ifp
->if_output
= ifp_if_output
;
6475 ifp
->if_pre_enqueue
= ifp_if_output
;
6476 ifp
->if_start
= ifp_if_start
;
6477 ifp
->if_output_ctl
= ifp_if_ctl
;
6478 ifp
->if_input_dlil
= ifp_if_input
;
6479 ifp
->if_input_poll
= ifp_if_input_poll
;
6480 ifp
->if_input_ctl
= ifp_if_ctl
;
6481 ifp
->if_ioctl
= ifp_if_ioctl
;
6482 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
6483 ifp
->if_free
= ifp_if_free
;
6484 ifp
->if_demux
= ifp_if_demux
;
6485 ifp
->if_event
= ifp_if_event
;
6486 ifp
->if_framer_legacy
= ifp_if_framer
;
6487 ifp
->if_framer
= ifp_if_framer_extended
;
6488 ifp
->if_add_proto
= ifp_if_add_proto
;
6489 ifp
->if_del_proto
= ifp_if_del_proto
;
6490 ifp
->if_check_multi
= ifp_if_check_multi
;
6492 /* wipe out interface description */
6493 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
6494 ifp
->if_desc
.ifd_len
= 0;
6495 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
6496 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
6498 /* there shouldn't be any delegation by now */
6499 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
6500 VERIFY(ifp
->if_delegated
.type
== 0);
6501 VERIFY(ifp
->if_delegated
.family
== 0);
6502 VERIFY(ifp
->if_delegated
.subfamily
== 0);
6503 VERIFY(ifp
->if_delegated
.expensive
== 0);
6505 /* QoS marking get cleared */
6506 ifp
->if_eflags
&= ~IFEF_QOSMARKING_ENABLED
;
6507 if_set_qosmarking_mode(ifp
, IFRTYPE_QOSMARKING_MODE_NONE
);
6510 ifnet_lock_done(ifp
);
6514 * Detach this interface from packet filter, if enabled.
6516 pf_ifnet_hook(ifp
, 0);
6519 /* Filter list should be empty */
6520 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
6521 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
6522 VERIFY(ifp
->if_flt_busy
== 0);
6523 VERIFY(ifp
->if_flt_waiters
== 0);
6524 lck_mtx_unlock(&ifp
->if_flt_lock
);
6526 /* Last chance to drain send queue */
6529 /* Last chance to cleanup any cached route */
6530 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6531 VERIFY(!ifp
->if_fwd_cacheok
);
6532 ROUTE_RELEASE(&ifp
->if_fwd_route
);
6533 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
6534 ROUTE_RELEASE(&ifp
->if_src_route
);
6535 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
6536 ROUTE_RELEASE(&ifp
->if_src_route6
);
6537 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
6538 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6540 VERIFY(ifp
->if_data_threshold
== 0);
6541 VERIFY(ifp
->if_dt_tcall
!= NULL
);
6542 VERIFY(!thread_call_isactive(ifp
->if_dt_tcall
));
6544 ifnet_llreach_ifdetach(ifp
);
6546 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
6549 * Finally, mark this ifnet as detached.
6551 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6552 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6553 panic("%s: flags mismatch (detaching not set) ifp=%p",
6557 ifp
->if_refflags
&= ~IFRF_DETACHING
;
6558 lck_mtx_unlock(&ifp
->if_ref_lock
);
6559 if (if_free
!= NULL
)
6563 printf("%s: detached\n", if_name(ifp
));
6565 /* Release reference held during ifnet attach */
6570 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
6578 ifp_if_start(struct ifnet
*ifp
)
6584 ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
6585 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
6586 boolean_t poll
, struct thread
*tp
)
6588 #pragma unused(ifp, m_tail, s, poll, tp)
6589 m_freem_list(m_head
);
6594 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
6595 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
6597 #pragma unused(ifp, flags, max_cnt)
6609 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
6611 #pragma unused(ifp, cmd, arglen, arg)
6612 return (EOPNOTSUPP
);
6616 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
6618 #pragma unused(ifp, fh, pf)
6620 return (EJUSTRETURN
);
6624 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
6625 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
6627 #pragma unused(ifp, pf, da, dc)
6632 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
6634 #pragma unused(ifp, pf)
6639 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
6641 #pragma unused(ifp, sa)
6642 return (EOPNOTSUPP
);
6647 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
6648 const struct sockaddr
*sa
, const char *ll
, const char *t
,
6649 u_int32_t
*pre
, u_int32_t
*post
)
6652 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
6653 const struct sockaddr
*sa
, const char *ll
, const char *t
)
6654 #endif /* !CONFIG_EMBEDDED */
6656 #pragma unused(ifp, m, sa, ll, t)
6658 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, pre
, post
));
6660 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
));
6661 #endif /* !CONFIG_EMBEDDED */
6665 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
6666 const struct sockaddr
*sa
, const char *ll
, const char *t
,
6667 u_int32_t
*pre
, u_int32_t
*post
)
6669 #pragma unused(ifp, sa, ll, t)
6678 return (EJUSTRETURN
);
6682 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
6684 #pragma unused(ifp, cmd, arg)
6685 return (EOPNOTSUPP
);
6689 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
6691 #pragma unused(ifp, tm, f)
6692 /* XXX not sure what to do here */
6697 ifp_if_free(struct ifnet
*ifp
)
6703 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
6705 #pragma unused(ifp, e)
6709 int dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
6710 size_t uniqueid_len
, struct ifnet
**ifp
)
6712 struct ifnet
*ifp1
= NULL
;
6713 struct dlil_ifnet
*dlifp1
= NULL
;
6714 void *buf
, *base
, **pbuf
;
6718 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
6719 ifp1
= (struct ifnet
*)dlifp1
;
6721 if (ifp1
->if_family
!= family
)
6724 lck_mtx_lock(&dlifp1
->dl_if_lock
);
6725 /* same uniqueid and same len or no unique id specified */
6726 if ((uniqueid_len
== dlifp1
->dl_if_uniqueid_len
) &&
6727 bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
) == 0) {
6728 /* check for matching interface in use */
6729 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
6732 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6736 dlifp1
->dl_if_flags
|= (DLIF_INUSE
|DLIF_REUSE
);
6737 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6742 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6745 /* no interface found, allocate a new one */
6746 buf
= zalloc(dlif_zone
);
6751 bzero(buf
, dlif_bufsize
);
6753 /* Get the 64-bit aligned base address for this object */
6754 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
6755 sizeof (u_int64_t
));
6756 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
6759 * Wind back a pointer size from the aligned base and
6760 * save the original address so we can free it later.
6762 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
6767 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
6769 if (dlifp1
->dl_if_uniqueid
== NULL
) {
6770 zfree(dlif_zone
, buf
);
6774 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
6775 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
6778 ifp1
= (struct ifnet
*)dlifp1
;
6779 dlifp1
->dl_if_flags
= DLIF_INUSE
;
6781 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
6782 dlifp1
->dl_if_trace
= dlil_if_trace
;
6784 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
6785 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
6787 /* initialize interface description */
6788 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
6789 ifp1
->if_desc
.ifd_len
= 0;
6790 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
6794 mac_ifnet_label_init(ifp1
);
6797 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
6798 DLIL_PRINTF("%s: failed to allocate if local stats, "
6799 "error: %d\n", __func__
, ret
);
6800 /* This probably shouldn't be fatal */
6804 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6805 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6806 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6807 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6808 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
6810 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6812 lck_rw_init(&ifp1
->if_inetdata_lock
, ifnet_lock_group
,
6814 ifp1
->if_inetdata
= NULL
;
6817 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
,
6819 ifp1
->if_inet6data
= NULL
;
6821 lck_rw_init(&ifp1
->if_link_status_lock
, ifnet_lock_group
,
6823 ifp1
->if_link_status
= NULL
;
6825 /* for send data paths */
6826 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
6828 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
6830 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
6833 /* for receive data paths */
6834 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
6837 /* thread call allocation is done with sleeping zalloc */
6838 ifp1
->if_dt_tcall
= thread_call_allocate_with_options(dlil_dt_tcall_fn
,
6839 ifp1
, THREAD_CALL_PRIORITY_KERNEL
, THREAD_CALL_OPTIONS_ONCE
);
6840 if (ifp1
->if_dt_tcall
== NULL
) {
6841 panic_plain("%s: couldn't create if_dt_tcall", __func__
);
6845 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
6852 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof (u_int64_t
)) &&
6853 IS_P2ALIGNED(&ifp1
->if_data
, sizeof (u_int64_t
))));
6858 __private_extern__
void
6859 dlil_if_release(ifnet_t ifp
)
6861 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
6863 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_count
) > 0);
6864 if (!(ifp
->if_xflags
& IFXF_ALLOC_KPI
)) {
6865 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_os_count
) > 0);
6868 ifnet_lock_exclusive(ifp
);
6869 lck_mtx_lock(&dlifp
->dl_if_lock
);
6870 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
6871 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
6872 ifp
->if_name
= dlifp
->dl_if_namestorage
;
6873 /* Reset external name (name + unit) */
6874 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
6875 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
6876 "%s?", ifp
->if_name
);
6877 lck_mtx_unlock(&dlifp
->dl_if_lock
);
6880 * We can either recycle the MAC label here or in dlil_if_acquire().
6881 * It seems logical to do it here but this means that anything that
6882 * still has a handle on ifp will now see it as unlabeled.
6883 * Since the interface is "dead" that may be OK. Revisit later.
6885 mac_ifnet_label_recycle(ifp
);
6887 ifnet_lock_done(ifp
);
6890 __private_extern__
void
6893 lck_mtx_lock(&dlil_ifnet_lock
);
6896 __private_extern__
void
6897 dlil_if_unlock(void)
6899 lck_mtx_unlock(&dlil_ifnet_lock
);
6902 __private_extern__
void
6903 dlil_if_lock_assert(void)
6905 LCK_MTX_ASSERT(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
6908 __private_extern__
void
6909 dlil_proto_unplumb_all(struct ifnet
*ifp
)
6912 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6913 * each bucket contains exactly one entry; PF_VLAN does not need an
6916 * if_proto_hash[3] is for other protocols; we expect anything
6917 * in this bucket to respond to the DETACHING event (which would
6918 * have happened by now) and do the unplumb then.
6920 (void) proto_unplumb(PF_INET
, ifp
);
6922 (void) proto_unplumb(PF_INET6
, ifp
);
6927 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
6929 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6930 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6932 route_copyout(dst
, &ifp
->if_src_route
, sizeof (*dst
));
6934 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6938 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
6940 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6941 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6943 if (ifp
->if_fwd_cacheok
) {
6944 route_copyin(src
, &ifp
->if_src_route
, sizeof (*src
));
6948 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6953 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
6955 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6956 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6958 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
6961 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6965 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
6967 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6968 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6970 if (ifp
->if_fwd_cacheok
) {
6971 route_copyin((struct route
*)src
,
6972 (struct route
*)&ifp
->if_src_route6
, sizeof (*src
));
6976 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6981 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
6983 struct route src_rt
;
6984 struct sockaddr_in
*dst
;
6986 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
6988 ifp_src_route_copyout(ifp
, &src_rt
);
6990 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
6991 ROUTE_RELEASE(&src_rt
);
6992 if (dst
->sin_family
!= AF_INET
) {
6993 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
6994 dst
->sin_len
= sizeof (src_rt
.ro_dst
);
6995 dst
->sin_family
= AF_INET
;
6997 dst
->sin_addr
= src_ip
;
6999 VERIFY(src_rt
.ro_rt
== NULL
);
7000 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
7001 0, 0, ifp
->if_index
);
7003 if (src_rt
.ro_rt
!= NULL
) {
7004 /* retain a ref, copyin consumes one */
7005 struct rtentry
*rte
= src_rt
.ro_rt
;
7007 ifp_src_route_copyin(ifp
, &src_rt
);
7012 return (src_rt
.ro_rt
);
7017 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
7019 struct route_in6 src_rt
;
7021 ifp_src_route6_copyout(ifp
, &src_rt
);
7023 if (ROUTE_UNUSABLE(&src_rt
) ||
7024 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
7025 ROUTE_RELEASE(&src_rt
);
7026 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
7027 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
7028 src_rt
.ro_dst
.sin6_len
= sizeof (src_rt
.ro_dst
);
7029 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
7031 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
7032 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
7033 sizeof (src_rt
.ro_dst
.sin6_addr
));
7035 if (src_rt
.ro_rt
== NULL
) {
7036 src_rt
.ro_rt
= rtalloc1_scoped(
7037 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
7040 if (src_rt
.ro_rt
!= NULL
) {
7041 /* retain a ref, copyin consumes one */
7042 struct rtentry
*rte
= src_rt
.ro_rt
;
7044 ifp_src_route6_copyin(ifp
, &src_rt
);
7050 return (src_rt
.ro_rt
);
7055 if_lqm_update(struct ifnet
*ifp
, int lqm
, int locked
)
7057 struct kev_dl_link_quality_metric_data ev_lqm_data
;
7059 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
7061 /* Normalize to edge */
7062 if (lqm
>= 0 && lqm
<= IFNET_LQM_THRESH_ABORT
) {
7063 lqm
= IFNET_LQM_THRESH_ABORT
;
7064 atomic_bitset_32(&tcbinfo
.ipi_flags
,
7065 INPCBINFO_HANDLE_LQM_ABORT
);
7066 inpcb_timer_sched(&tcbinfo
, INPCB_TIMER_FAST
);
7067 } else if (lqm
> IFNET_LQM_THRESH_ABORT
&&
7068 lqm
<= IFNET_LQM_THRESH_MINIMALLY_VIABLE
) {
7069 lqm
= IFNET_LQM_THRESH_MINIMALLY_VIABLE
;
7070 } else if (lqm
> IFNET_LQM_THRESH_MINIMALLY_VIABLE
&&
7071 lqm
<= IFNET_LQM_THRESH_POOR
) {
7072 lqm
= IFNET_LQM_THRESH_POOR
;
7073 } else if (lqm
> IFNET_LQM_THRESH_POOR
&&
7074 lqm
<= IFNET_LQM_THRESH_GOOD
) {
7075 lqm
= IFNET_LQM_THRESH_GOOD
;
7079 * Take the lock if needed
7082 ifnet_lock_exclusive(ifp
);
7084 if (lqm
== ifp
->if_interface_state
.lqm_state
&&
7085 (ifp
->if_interface_state
.valid_bitmask
&
7086 IF_INTERFACE_STATE_LQM_STATE_VALID
)) {
7088 * Release the lock if was not held by the caller
7091 ifnet_lock_done(ifp
);
7092 return; /* nothing to update */
7094 ifp
->if_interface_state
.valid_bitmask
|=
7095 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7096 ifp
->if_interface_state
.lqm_state
= lqm
;
7099 * Don't want to hold the lock when issuing kernel events
7101 ifnet_lock_done(ifp
);
7103 bzero(&ev_lqm_data
, sizeof (ev_lqm_data
));
7104 ev_lqm_data
.link_quality_metric
= lqm
;
7106 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
7107 (struct net_event_data
*)&ev_lqm_data
, sizeof (ev_lqm_data
));
7110 * Reacquire the lock for the caller
7113 ifnet_lock_exclusive(ifp
);
7117 if_rrc_state_update(struct ifnet
*ifp
, unsigned int rrc_state
)
7119 struct kev_dl_rrc_state kev
;
7121 if (rrc_state
== ifp
->if_interface_state
.rrc_state
&&
7122 (ifp
->if_interface_state
.valid_bitmask
&
7123 IF_INTERFACE_STATE_RRC_STATE_VALID
))
7126 ifp
->if_interface_state
.valid_bitmask
|=
7127 IF_INTERFACE_STATE_RRC_STATE_VALID
;
7129 ifp
->if_interface_state
.rrc_state
= rrc_state
;
7132 * Don't want to hold the lock when issuing kernel events
7134 ifnet_lock_done(ifp
);
7136 bzero(&kev
, sizeof(struct kev_dl_rrc_state
));
7137 kev
.rrc_state
= rrc_state
;
7139 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_RRC_STATE_CHANGED
,
7140 (struct net_event_data
*)&kev
, sizeof(struct kev_dl_rrc_state
));
7142 ifnet_lock_exclusive(ifp
);
7146 if_state_update(struct ifnet
*ifp
,
7147 struct if_interface_state
*if_interface_state
)
7149 u_short if_index_available
= 0;
7151 ifnet_lock_exclusive(ifp
);
7153 if ((ifp
->if_type
!= IFT_CELLULAR
) &&
7154 (if_interface_state
->valid_bitmask
&
7155 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
7156 ifnet_lock_done(ifp
);
7159 if ((if_interface_state
->valid_bitmask
&
7160 IF_INTERFACE_STATE_LQM_STATE_VALID
) &&
7161 (if_interface_state
->lqm_state
< IFNET_LQM_MIN
||
7162 if_interface_state
->lqm_state
> IFNET_LQM_MAX
)) {
7163 ifnet_lock_done(ifp
);
7166 if ((if_interface_state
->valid_bitmask
&
7167 IF_INTERFACE_STATE_RRC_STATE_VALID
) &&
7168 if_interface_state
->rrc_state
!=
7169 IF_INTERFACE_STATE_RRC_STATE_IDLE
&&
7170 if_interface_state
->rrc_state
!=
7171 IF_INTERFACE_STATE_RRC_STATE_CONNECTED
) {
7172 ifnet_lock_done(ifp
);
7176 if (if_interface_state
->valid_bitmask
&
7177 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
7178 if_lqm_update(ifp
, if_interface_state
->lqm_state
, 1);
7180 if (if_interface_state
->valid_bitmask
&
7181 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
7182 if_rrc_state_update(ifp
, if_interface_state
->rrc_state
);
7184 if (if_interface_state
->valid_bitmask
&
7185 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
7186 ifp
->if_interface_state
.valid_bitmask
|=
7187 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7188 ifp
->if_interface_state
.interface_availability
=
7189 if_interface_state
->interface_availability
;
7191 if (ifp
->if_interface_state
.interface_availability
==
7192 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
) {
7193 if_index_available
= ifp
->if_index
;
7196 ifnet_lock_done(ifp
);
7199 * Check if the TCP connections going on this interface should be
7200 * forced to send probe packets instead of waiting for TCP timers
7201 * to fire. This will be done when there is an explicit
7202 * notification that the interface became available.
7204 if (if_index_available
> 0)
7205 tcp_interface_send_probe(if_index_available
);
7211 if_get_state(struct ifnet
*ifp
,
7212 struct if_interface_state
*if_interface_state
)
7214 ifnet_lock_shared(ifp
);
7216 if_interface_state
->valid_bitmask
= 0;
7218 if (ifp
->if_interface_state
.valid_bitmask
&
7219 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
7220 if_interface_state
->valid_bitmask
|=
7221 IF_INTERFACE_STATE_RRC_STATE_VALID
;
7222 if_interface_state
->rrc_state
=
7223 ifp
->if_interface_state
.rrc_state
;
7225 if (ifp
->if_interface_state
.valid_bitmask
&
7226 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
7227 if_interface_state
->valid_bitmask
|=
7228 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7229 if_interface_state
->lqm_state
=
7230 ifp
->if_interface_state
.lqm_state
;
7232 if (ifp
->if_interface_state
.valid_bitmask
&
7233 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
7234 if_interface_state
->valid_bitmask
|=
7235 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7236 if_interface_state
->interface_availability
=
7237 ifp
->if_interface_state
.interface_availability
;
7240 ifnet_lock_done(ifp
);
7244 if_probe_connectivity(struct ifnet
*ifp
, u_int32_t conn_probe
)
7246 ifnet_lock_exclusive(ifp
);
7247 if (conn_probe
> 1) {
7248 ifnet_lock_done(ifp
);
7251 if (conn_probe
== 0)
7252 ifp
->if_eflags
&= ~IFEF_PROBE_CONNECTIVITY
;
7254 ifp
->if_eflags
|= IFEF_PROBE_CONNECTIVITY
;
7255 ifnet_lock_done(ifp
);
7258 necp_update_all_clients();
7261 tcp_probe_connectivity(ifp
, conn_probe
);
7267 uuid_get_ethernet(u_int8_t
*node
)
7270 struct sockaddr_dl
*sdl
;
7272 ifnet_head_lock_shared();
7273 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
7274 ifnet_lock_shared(ifp
);
7275 IFA_LOCK_SPIN(ifp
->if_lladdr
);
7276 sdl
= (struct sockaddr_dl
*)(void *)ifp
->if_lladdr
->ifa_addr
;
7277 if (sdl
->sdl_type
== IFT_ETHER
) {
7278 memcpy(node
, LLADDR(sdl
), ETHER_ADDR_LEN
);
7279 IFA_UNLOCK(ifp
->if_lladdr
);
7280 ifnet_lock_done(ifp
);
7284 IFA_UNLOCK(ifp
->if_lladdr
);
7285 ifnet_lock_done(ifp
);
7293 sysctl_rxpoll SYSCTL_HANDLER_ARGS
7295 #pragma unused(arg1, arg2)
7301 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7302 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7305 if (net_rxpoll
== 0)
7313 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7315 #pragma unused(arg1, arg2)
7319 q
= if_rxpoll_mode_holdtime
;
7321 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7322 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7325 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
)
7326 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
7328 if_rxpoll_mode_holdtime
= q
;
7334 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7336 #pragma unused(arg1, arg2)
7340 q
= if_rxpoll_sample_holdtime
;
7342 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7343 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7346 if (q
< IF_RXPOLL_SAMPLETIME_MIN
)
7347 q
= IF_RXPOLL_SAMPLETIME_MIN
;
7349 if_rxpoll_sample_holdtime
= q
;
7355 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7357 #pragma unused(arg1, arg2)
7361 q
= if_rxpoll_interval_time
;
7363 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7364 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7367 if (q
< IF_RXPOLL_INTERVALTIME_MIN
)
7368 q
= IF_RXPOLL_INTERVALTIME_MIN
;
7370 if_rxpoll_interval_time
= q
;
7376 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7378 #pragma unused(arg1, arg2)
7382 i
= if_rxpoll_wlowat
;
7384 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7385 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7388 if (i
== 0 || i
>= if_rxpoll_whiwat
)
7391 if_rxpoll_wlowat
= i
;
7396 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7398 #pragma unused(arg1, arg2)
7402 i
= if_rxpoll_whiwat
;
7404 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7405 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7408 if (i
<= if_rxpoll_wlowat
)
7411 if_rxpoll_whiwat
= i
;
7416 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
7418 #pragma unused(arg1, arg2)
7423 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7424 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7427 if (i
< IF_SNDQ_MINLEN
)
7435 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
7437 #pragma unused(arg1, arg2)
7442 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7443 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7446 if (i
< IF_RCVQ_MINLEN
)
7454 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
7455 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
7457 struct kev_dl_node_presence kev
;
7458 struct sockaddr_dl
*sdl
;
7459 struct sockaddr_in6
*sin6
;
7463 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
7465 bzero(&kev
, sizeof (kev
));
7466 sin6
= &kev
.sin6_node_address
;
7467 sdl
= &kev
.sdl_node_address
;
7468 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
7470 kev
.link_quality_metric
= lqm
;
7471 kev
.node_proximity_metric
= npm
;
7472 bcopy(srvinfo
, kev
.node_service_info
, sizeof (kev
.node_service_info
));
7474 nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
7475 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
7476 &kev
.link_data
, sizeof (kev
));
7480 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
7482 struct kev_dl_node_absence kev
;
7483 struct sockaddr_in6
*sin6
;
7484 struct sockaddr_dl
*sdl
;
7488 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
7490 bzero(&kev
, sizeof (kev
));
7491 sin6
= &kev
.sin6_node_address
;
7492 sdl
= &kev
.sdl_node_address
;
7493 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
7495 nd6_alt_node_absent(ifp
, sin6
);
7496 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
7497 &kev
.link_data
, sizeof (kev
));
7501 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
7502 kauth_cred_t
*credp
)
7504 const u_int8_t
*bytes
;
7507 bytes
= CONST_LLADDR(sdl
);
7508 size
= sdl
->sdl_alen
;
7511 if (dlil_lladdr_ckreq
) {
7512 switch (sdl
->sdl_type
) {
7521 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
7522 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
7530 #pragma unused(credp)
7533 if (sizep
!= NULL
) *sizep
= size
;
7538 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
7539 u_int8_t info
[DLIL_MODARGLEN
])
7541 struct kev_dl_issues kev
;
7544 VERIFY(ifp
!= NULL
);
7545 VERIFY(modid
!= NULL
);
7546 _CASSERT(sizeof (kev
.modid
) == DLIL_MODIDLEN
);
7547 _CASSERT(sizeof (kev
.info
) == DLIL_MODARGLEN
);
7549 bzero(&kev
, sizeof (kev
));
7552 kev
.timestamp
= tv
.tv_sec
;
7553 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
7555 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
7557 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
7558 &kev
.link_data
, sizeof (kev
));
7562 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7565 u_int32_t level
= IFNET_THROTTLE_OFF
;
7568 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
7570 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
7572 * XXX: Use priv_check_cred() instead of root check?
7574 if ((result
= proc_suser(p
)) != 0)
7577 if (ifr
->ifr_opportunistic
.ifo_flags
==
7578 IFRIFOF_BLOCK_OPPORTUNISTIC
)
7579 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
7580 else if (ifr
->ifr_opportunistic
.ifo_flags
== 0)
7581 level
= IFNET_THROTTLE_OFF
;
7586 result
= ifnet_set_throttle(ifp
, level
);
7587 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
7588 ifr
->ifr_opportunistic
.ifo_flags
= 0;
7589 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
7590 ifr
->ifr_opportunistic
.ifo_flags
|=
7591 IFRIFOF_BLOCK_OPPORTUNISTIC
;
7596 * Return the count of current opportunistic connections
7597 * over the interface.
7601 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
7602 INPCB_OPPORTUNISTIC_SETCMD
: 0;
7603 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
7604 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
7605 ifr
->ifr_opportunistic
.ifo_inuse
=
7606 udp_count_opportunistic(ifp
->if_index
, flags
) +
7607 tcp_count_opportunistic(ifp
->if_index
, flags
);
7610 if (result
== EALREADY
)
7617 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
7619 struct ifclassq
*ifq
;
7622 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7625 *level
= IFNET_THROTTLE_OFF
;
7629 /* Throttling works only for IFCQ, not ALTQ instances */
7630 if (IFCQ_IS_ENABLED(ifq
))
7631 IFCQ_GET_THROTTLE(ifq
, *level
, err
);
7638 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
7640 struct ifclassq
*ifq
;
7643 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7649 case IFNET_THROTTLE_OFF
:
7650 case IFNET_THROTTLE_OPPORTUNISTIC
:
7657 if (IFCQ_IS_ENABLED(ifq
))
7658 IFCQ_SET_THROTTLE(ifq
, level
, err
);
7662 printf("%s: throttling level set to %d\n", if_name(ifp
),
7664 if (level
== IFNET_THROTTLE_OFF
)
7672 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7678 int level
, category
, subcategory
;
7680 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
7682 if (cmd
== SIOCSIFLOG
) {
7683 if ((result
= priv_check_cred(kauth_cred_get(),
7684 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0)
7687 level
= ifr
->ifr_log
.ifl_level
;
7688 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
)
7691 flags
= ifr
->ifr_log
.ifl_flags
;
7692 if ((flags
&= IFNET_LOGF_MASK
) == 0)
7695 category
= ifr
->ifr_log
.ifl_category
;
7696 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
7699 result
= ifnet_set_log(ifp
, level
, flags
,
7700 category
, subcategory
);
7702 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
7705 ifr
->ifr_log
.ifl_level
= level
;
7706 ifr
->ifr_log
.ifl_flags
= flags
;
7707 ifr
->ifr_log
.ifl_category
= category
;
7708 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
7716 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
7717 int32_t category
, int32_t subcategory
)
7721 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
7722 VERIFY(flags
& IFNET_LOGF_MASK
);
7725 * The logging level applies to all facilities; make sure to
7726 * update them all with the most current level.
7728 flags
|= ifp
->if_log
.flags
;
7730 if (ifp
->if_output_ctl
!= NULL
) {
7731 struct ifnet_log_params l
;
7733 bzero(&l
, sizeof (l
));
7736 l
.flags
&= ~IFNET_LOGF_DLIL
;
7737 l
.category
= category
;
7738 l
.subcategory
= subcategory
;
7740 /* Send this request to lower layers */
7742 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
7745 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
7747 * If targeted to the lower layers without an output
7748 * control callback registered on the interface, just
7749 * silently ignore facilities other than ours.
7751 flags
&= IFNET_LOGF_DLIL
;
7752 if (flags
== 0 && (!(ifp
->if_log
.flags
& IFNET_LOGF_DLIL
)))
7757 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
)
7758 ifp
->if_log
.flags
= 0;
7760 ifp
->if_log
.flags
|= flags
;
7762 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
7763 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
7764 ifp
->if_log
.level
, ifp
->if_log
.flags
,
7765 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
7766 category
, subcategory
);
7773 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
7774 int32_t *category
, int32_t *subcategory
)
7777 *level
= ifp
->if_log
.level
;
7779 *flags
= ifp
->if_log
.flags
;
7780 if (category
!= NULL
)
7781 *category
= ifp
->if_log
.category
;
7782 if (subcategory
!= NULL
)
7783 *subcategory
= ifp
->if_log
.subcategory
;
7789 ifnet_notify_address(struct ifnet
*ifp
, int af
)
7791 struct ifnet_notify_address_params na
;
7794 (void) pf_ifaddr_hook(ifp
);
7797 if (ifp
->if_output_ctl
== NULL
)
7798 return (EOPNOTSUPP
);
7800 bzero(&na
, sizeof (na
));
7801 na
.address_family
= af
;
7803 return (ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
7808 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
7810 if (ifp
== NULL
|| flowid
== NULL
) {
7812 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7813 !IF_FULLY_ATTACHED(ifp
)) {
7817 *flowid
= ifp
->if_flowhash
;
7823 ifnet_disable_output(struct ifnet
*ifp
)
7829 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7830 !IF_FULLY_ATTACHED(ifp
)) {
7834 if ((err
= ifnet_fc_add(ifp
)) == 0) {
7835 lck_mtx_lock_spin(&ifp
->if_start_lock
);
7836 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
7837 lck_mtx_unlock(&ifp
->if_start_lock
);
7843 ifnet_enable_output(struct ifnet
*ifp
)
7847 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7848 !IF_FULLY_ATTACHED(ifp
)) {
7852 ifnet_start_common(ifp
, TRUE
);
7857 ifnet_flowadv(uint32_t flowhash
)
7859 struct ifnet_fc_entry
*ifce
;
7862 ifce
= ifnet_fc_get(flowhash
);
7866 VERIFY(ifce
->ifce_ifp
!= NULL
);
7867 ifp
= ifce
->ifce_ifp
;
7869 /* flow hash gets recalculated per attach, so check */
7870 if (ifnet_is_attached(ifp
, 1)) {
7871 if (ifp
->if_flowhash
== flowhash
)
7872 (void) ifnet_enable_output(ifp
);
7873 ifnet_decr_iorefcnt(ifp
);
7875 ifnet_fc_entry_free(ifce
);
7879 * Function to compare ifnet_fc_entries in ifnet flow control tree
7882 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
7884 return (fc1
->ifce_flowhash
- fc2
->ifce_flowhash
);
7888 ifnet_fc_add(struct ifnet
*ifp
)
7890 struct ifnet_fc_entry keyfc
, *ifce
;
7893 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
7894 VERIFY(ifp
->if_flowhash
!= 0);
7895 flowhash
= ifp
->if_flowhash
;
7897 bzero(&keyfc
, sizeof (keyfc
));
7898 keyfc
.ifce_flowhash
= flowhash
;
7900 lck_mtx_lock_spin(&ifnet_fc_lock
);
7901 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
7902 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
7903 /* Entry is already in ifnet_fc_tree, return */
7904 lck_mtx_unlock(&ifnet_fc_lock
);
7910 * There is a different fc entry with the same flow hash
7911 * but different ifp pointer. There can be a collision
7912 * on flow hash but the probability is low. Let's just
7913 * avoid adding a second one when there is a collision.
7915 lck_mtx_unlock(&ifnet_fc_lock
);
7919 /* become regular mutex */
7920 lck_mtx_convert_spin(&ifnet_fc_lock
);
7922 ifce
= zalloc(ifnet_fc_zone
);
7924 /* memory allocation failed */
7925 lck_mtx_unlock(&ifnet_fc_lock
);
7928 bzero(ifce
, ifnet_fc_zone_size
);
7930 ifce
->ifce_flowhash
= flowhash
;
7931 ifce
->ifce_ifp
= ifp
;
7933 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
7934 lck_mtx_unlock(&ifnet_fc_lock
);
7938 static struct ifnet_fc_entry
*
7939 ifnet_fc_get(uint32_t flowhash
)
7941 struct ifnet_fc_entry keyfc
, *ifce
;
7944 bzero(&keyfc
, sizeof (keyfc
));
7945 keyfc
.ifce_flowhash
= flowhash
;
7947 lck_mtx_lock_spin(&ifnet_fc_lock
);
7948 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
7950 /* Entry is not present in ifnet_fc_tree, return */
7951 lck_mtx_unlock(&ifnet_fc_lock
);
7955 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
7957 VERIFY(ifce
->ifce_ifp
!= NULL
);
7958 ifp
= ifce
->ifce_ifp
;
7960 /* become regular mutex */
7961 lck_mtx_convert_spin(&ifnet_fc_lock
);
7963 if (!ifnet_is_attached(ifp
, 0)) {
7965 * This ifp is not attached or in the process of being
7966 * detached; just don't process it.
7968 ifnet_fc_entry_free(ifce
);
7971 lck_mtx_unlock(&ifnet_fc_lock
);
7977 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
7979 zfree(ifnet_fc_zone
, ifce
);
7983 ifnet_calc_flowhash(struct ifnet
*ifp
)
7985 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
7986 uint32_t flowhash
= 0;
7988 if (ifnet_flowhash_seed
== 0)
7989 ifnet_flowhash_seed
= RandomULong();
7991 bzero(&fh
, sizeof (fh
));
7993 (void) snprintf(fh
.ifk_name
, sizeof (fh
.ifk_name
), "%s", ifp
->if_name
);
7994 fh
.ifk_unit
= ifp
->if_unit
;
7995 fh
.ifk_flags
= ifp
->if_flags
;
7996 fh
.ifk_eflags
= ifp
->if_eflags
;
7997 fh
.ifk_capabilities
= ifp
->if_capabilities
;
7998 fh
.ifk_capenable
= ifp
->if_capenable
;
7999 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
8000 fh
.ifk_rand1
= RandomULong();
8001 fh
.ifk_rand2
= RandomULong();
8004 flowhash
= net_flowhash(&fh
, sizeof (fh
), ifnet_flowhash_seed
);
8005 if (flowhash
== 0) {
8006 /* try to get a non-zero flowhash */
8007 ifnet_flowhash_seed
= RandomULong();
8015 ifnet_set_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t len
,
8016 uint16_t flags
, uint8_t *data
)
8018 #pragma unused(flags)
8023 if_inetdata_lock_exclusive(ifp
);
8024 if (IN_IFEXTRA(ifp
) != NULL
) {
8026 /* Allow clearing the signature */
8027 IN_IFEXTRA(ifp
)->netsig_len
= 0;
8028 bzero(IN_IFEXTRA(ifp
)->netsig
,
8029 sizeof (IN_IFEXTRA(ifp
)->netsig
));
8030 if_inetdata_lock_done(ifp
);
8032 } else if (len
> sizeof (IN_IFEXTRA(ifp
)->netsig
)) {
8034 if_inetdata_lock_done(ifp
);
8037 IN_IFEXTRA(ifp
)->netsig_len
= len
;
8038 bcopy(data
, IN_IFEXTRA(ifp
)->netsig
, len
);
8042 if_inetdata_lock_done(ifp
);
8046 if_inet6data_lock_exclusive(ifp
);
8047 if (IN6_IFEXTRA(ifp
) != NULL
) {
8049 /* Allow clearing the signature */
8050 IN6_IFEXTRA(ifp
)->netsig_len
= 0;
8051 bzero(IN6_IFEXTRA(ifp
)->netsig
,
8052 sizeof (IN6_IFEXTRA(ifp
)->netsig
));
8053 if_inet6data_lock_done(ifp
);
8055 } else if (len
> sizeof (IN6_IFEXTRA(ifp
)->netsig
)) {
8057 if_inet6data_lock_done(ifp
);
8060 IN6_IFEXTRA(ifp
)->netsig_len
= len
;
8061 bcopy(data
, IN6_IFEXTRA(ifp
)->netsig
, len
);
8065 if_inet6data_lock_done(ifp
);
8077 ifnet_get_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t *len
,
8078 uint16_t *flags
, uint8_t *data
)
8082 if (ifp
== NULL
|| len
== NULL
|| data
== NULL
)
8087 if_inetdata_lock_shared(ifp
);
8088 if (IN_IFEXTRA(ifp
) != NULL
) {
8089 if (*len
== 0 || *len
< IN_IFEXTRA(ifp
)->netsig_len
) {
8091 if_inetdata_lock_done(ifp
);
8094 if ((*len
= IN_IFEXTRA(ifp
)->netsig_len
) > 0)
8095 bcopy(IN_IFEXTRA(ifp
)->netsig
, data
, *len
);
8101 if_inetdata_lock_done(ifp
);
8105 if_inet6data_lock_shared(ifp
);
8106 if (IN6_IFEXTRA(ifp
) != NULL
) {
8107 if (*len
== 0 || *len
< IN6_IFEXTRA(ifp
)->netsig_len
) {
8109 if_inet6data_lock_done(ifp
);
8112 if ((*len
= IN6_IFEXTRA(ifp
)->netsig_len
) > 0)
8113 bcopy(IN6_IFEXTRA(ifp
)->netsig
, data
, *len
);
8119 if_inet6data_lock_done(ifp
);
8127 if (error
== 0 && flags
!= NULL
)
8135 ifnet_set_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
8137 int i
, error
= 0, one_set
= 0;
8139 if_inet6data_lock_exclusive(ifp
);
8141 if (IN6_IFEXTRA(ifp
) == NULL
) {
8146 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
8147 uint32_t prefix_len
=
8148 prefixes
[i
].prefix_len
;
8149 struct in6_addr
*prefix
=
8150 &prefixes
[i
].ipv6_prefix
;
8152 if (prefix_len
== 0) {
8153 /* Allow clearing the signature */
8154 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= 0;
8155 bzero(&IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
8156 sizeof(struct in6_addr
));
8159 } else if (prefix_len
!= NAT64_PREFIX_LEN_32
&&
8160 prefix_len
!= NAT64_PREFIX_LEN_40
&&
8161 prefix_len
!= NAT64_PREFIX_LEN_48
&&
8162 prefix_len
!= NAT64_PREFIX_LEN_56
&&
8163 prefix_len
!= NAT64_PREFIX_LEN_64
&&
8164 prefix_len
!= NAT64_PREFIX_LEN_96
) {
8169 if (IN6_IS_SCOPE_EMBED(prefix
)) {
8174 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= prefix_len
;
8175 bcopy(prefix
, &IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
8176 sizeof(struct in6_addr
));
8181 if_inet6data_lock_done(ifp
);
8183 if (error
== 0 && one_set
!= 0)
8184 necp_update_all_clients();
8190 ifnet_get_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
8192 int i
, found_one
= 0, error
= 0;
8197 if_inet6data_lock_shared(ifp
);
8199 if (IN6_IFEXTRA(ifp
) == NULL
) {
8204 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
8205 if (IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
!= 0)
8209 if (found_one
== 0) {
8215 bcopy(IN6_IFEXTRA(ifp
)->nat64_prefixes
, prefixes
,
8216 sizeof(IN6_IFEXTRA(ifp
)->nat64_prefixes
));
8219 if_inet6data_lock_done(ifp
);
8226 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
8227 protocol_family_t pf
)
8232 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
8233 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
|CSUM_TSO_IPV6
)))
8238 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
8239 if (did_sw
& CSUM_DELAY_IP
)
8240 hwcksum_dbg_finalized_hdr
++;
8241 if (did_sw
& CSUM_DELAY_DATA
)
8242 hwcksum_dbg_finalized_data
++;
8247 * Checksum offload should not have been enabled when
8248 * extension headers exist; that also means that we
8249 * cannot force-finalize packets with extension headers.
8250 * Indicate to the callee should it skip such case by
8251 * setting optlen to -1.
8253 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
8254 m
->m_pkthdr
.csum_flags
);
8255 if (did_sw
& CSUM_DELAY_IPV6_DATA
)
8256 hwcksum_dbg_finalized_data
++;
8265 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
8266 protocol_family_t pf
)
8271 if (frame_header
== NULL
||
8272 frame_header
< (char *)mbuf_datastart(m
) ||
8273 frame_header
> (char *)m
->m_data
) {
8274 printf("%s: frame header pointer 0x%llx out of range "
8275 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
8276 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
8277 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
8278 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
8279 (uint64_t)VM_KERNEL_ADDRPERM(m
));
8282 hlen
= (m
->m_data
- frame_header
);
8295 * Force partial checksum offload; useful to simulate cases
8296 * where the hardware does not support partial checksum offload,
8297 * in order to validate correctness throughout the layers above.
8299 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
8300 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
8302 if (foff
> (uint32_t)m
->m_pkthdr
.len
)
8305 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
8307 /* Compute 16-bit 1's complement sum from forced offset */
8308 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
8310 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
8311 m
->m_pkthdr
.csum_rx_val
= sum
;
8312 m
->m_pkthdr
.csum_rx_start
= (foff
+ hlen
);
8314 hwcksum_dbg_partial_forced
++;
8315 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
8319 * Partial checksum offload verification (and adjustment);
8320 * useful to validate and test cases where the hardware
8321 * supports partial checksum offload.
8323 if ((m
->m_pkthdr
.csum_flags
&
8324 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
8325 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
8328 /* Start offset must begin after frame header */
8329 rxoff
= m
->m_pkthdr
.csum_rx_start
;
8331 hwcksum_dbg_bad_rxoff
++;
8333 printf("%s: partial cksum start offset %d "
8334 "is less than frame header length %d for "
8335 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
8336 (uint64_t)VM_KERNEL_ADDRPERM(m
));
8342 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
8344 * Compute the expected 16-bit 1's complement sum;
8345 * skip this if we've already computed it above
8346 * when partial checksum offload is forced.
8348 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
8350 /* Hardware or driver is buggy */
8351 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
8352 hwcksum_dbg_bad_cksum
++;
8354 printf("%s: bad partial cksum value "
8355 "0x%x (expected 0x%x) for mbuf "
8356 "0x%llx [rx_start %d]\n",
8358 m
->m_pkthdr
.csum_rx_val
, sum
,
8359 (uint64_t)VM_KERNEL_ADDRPERM(m
),
8360 m
->m_pkthdr
.csum_rx_start
);
8365 hwcksum_dbg_verified
++;
8368 * This code allows us to emulate various hardwares that
8369 * perform 16-bit 1's complement sum beginning at various
8370 * start offset values.
8372 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
8373 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
8375 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
)
8378 sum
= m_adj_sum16(m
, rxoff
, aoff
,
8379 m_pktlen(m
) - aoff
, sum
);
8381 m
->m_pkthdr
.csum_rx_val
= sum
;
8382 m
->m_pkthdr
.csum_rx_start
= (aoff
+ hlen
);
8384 hwcksum_dbg_adjusted
++;
8390 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8392 #pragma unused(arg1, arg2)
8396 i
= hwcksum_dbg_mode
;
8398 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8399 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8402 if (hwcksum_dbg
== 0)
8405 if ((i
& ~HWCKSUM_DBG_MASK
) != 0)
8408 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
8414 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8416 #pragma unused(arg1, arg2)
8420 i
= hwcksum_dbg_partial_rxoff_forced
;
8422 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8423 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8426 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
))
8429 hwcksum_dbg_partial_rxoff_forced
= i
;
8435 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8437 #pragma unused(arg1, arg2)
8441 i
= hwcksum_dbg_partial_rxoff_adj
;
8443 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8444 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8447 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
))
8450 hwcksum_dbg_partial_rxoff_adj
= i
;
8456 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8458 #pragma unused(oidp, arg1, arg2)
8461 if (req
->oldptr
== USER_ADDR_NULL
) {
8464 if (req
->newptr
!= USER_ADDR_NULL
) {
8467 err
= SYSCTL_OUT(req
, &tx_chain_len_stats
,
8468 sizeof(struct chain_len_stats
));
8474 #if DEBUG || DEVELOPMENT
8475 /* Blob for sum16 verification */
8476 static uint8_t sumdata
[] = {
8477 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8478 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8479 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8480 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8481 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8482 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8483 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8484 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8485 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8486 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8487 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8488 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8489 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8490 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8491 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8492 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8493 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8494 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8495 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8496 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8497 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8498 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8499 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8500 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8501 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8502 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8503 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8504 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8505 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8506 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8507 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8508 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8509 0xc8, 0x28, 0x02, 0x00, 0x00
8512 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
8516 uint16_t sumr
; /* reference */
8517 uint16_t sumrp
; /* reference, precomputed */
8519 { FALSE
, 0, 0, 0x0000 },
8520 { FALSE
, 1, 0, 0x001f },
8521 { FALSE
, 2, 0, 0x8b1f },
8522 { FALSE
, 3, 0, 0x8b27 },
8523 { FALSE
, 7, 0, 0x790e },
8524 { FALSE
, 11, 0, 0xcb6d },
8525 { FALSE
, 20, 0, 0x20dd },
8526 { FALSE
, 27, 0, 0xbabd },
8527 { FALSE
, 32, 0, 0xf3e8 },
8528 { FALSE
, 37, 0, 0x197d },
8529 { FALSE
, 43, 0, 0x9eae },
8530 { FALSE
, 64, 0, 0x4678 },
8531 { FALSE
, 127, 0, 0x9399 },
8532 { FALSE
, 256, 0, 0xd147 },
8533 { FALSE
, 325, 0, 0x0358 },
8535 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8538 dlil_verify_sum16(void)
8544 /* Make sure test data plus extra room for alignment fits in cluster */
8545 _CASSERT((sizeof (sumdata
) + (sizeof (uint64_t) * 2)) <= MCLBYTES
);
8547 kprintf("DLIL: running SUM16 self-tests ... ");
8549 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
8550 MH_ALIGN(m
, sizeof (uint32_t)); /* 32-bit starting alignment */
8551 buf
= mtod(m
, uint8_t *); /* base address */
8553 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
8554 uint16_t len
= sumtbl
[n
].len
;
8557 /* Verify for all possible alignments */
8558 for (i
= 0; i
< (int)sizeof (uint64_t); i
++) {
8562 /* Copy over test data to mbuf */
8563 VERIFY(len
<= sizeof (sumdata
));
8565 bcopy(sumdata
, c
, len
);
8567 /* Zero-offset test (align by data pointer) */
8568 m
->m_data
= (caddr_t
)c
;
8570 sum
= m_sum16(m
, 0, len
);
8572 if (!sumtbl
[n
].init
) {
8573 sumr
= in_cksum_mbuf_ref(m
, len
, 0, 0);
8574 sumtbl
[n
].sumr
= sumr
;
8575 sumtbl
[n
].init
= TRUE
;
8577 sumr
= sumtbl
[n
].sumr
;
8580 /* Something is horribly broken; stop now */
8581 if (sumr
!= sumtbl
[n
].sumrp
) {
8582 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
8583 "for len=%d align=%d sum=0x%04x "
8584 "[expected=0x%04x]\n", __func__
,
8587 } else if (sum
!= sumr
) {
8588 panic_plain("\n%s: broken m_sum16() for len=%d "
8589 "align=%d sum=0x%04x [expected=0x%04x]\n",
8590 __func__
, len
, i
, sum
, sumr
);
8594 /* Alignment test by offset (fixed data pointer) */
8595 m
->m_data
= (caddr_t
)buf
;
8597 sum
= m_sum16(m
, i
, len
);
8599 /* Something is horribly broken; stop now */
8601 panic_plain("\n%s: broken m_sum16() for len=%d "
8602 "offset=%d sum=0x%04x [expected=0x%04x]\n",
8603 __func__
, len
, i
, sum
, sumr
);
8607 /* Simple sum16 contiguous buffer test by aligment */
8608 sum
= b_sum16(c
, len
);
8610 /* Something is horribly broken; stop now */
8612 panic_plain("\n%s: broken b_sum16() for len=%d "
8613 "align=%d sum=0x%04x [expected=0x%04x]\n",
8614 __func__
, len
, i
, sum
, sumr
);
8622 kprintf("PASSED\n");
8624 #endif /* DEBUG || DEVELOPMENT */
8626 #define CASE_STRINGIFY(x) case x: return #x
8628 __private_extern__
const char *
8629 dlil_kev_dl_code_str(u_int32_t event_code
)
8631 switch (event_code
) {
8632 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
8633 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
8634 CASE_STRINGIFY(KEV_DL_SIFMTU
);
8635 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
8636 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
8637 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
8638 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
8639 CASE_STRINGIFY(KEV_DL_DELMULTI
);
8640 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
8641 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
8642 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
8643 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
8644 CASE_STRINGIFY(KEV_DL_LINK_ON
);
8645 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
8646 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
8647 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
8648 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
8649 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
8650 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
8651 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
8652 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
8653 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
8654 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
8655 CASE_STRINGIFY(KEV_DL_ISSUES
);
8656 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);
8664 * Mirror the arguments of ifnet_get_local_ports_extended()
8670 sysctl_get_ports_used SYSCTL_HANDLER_ARGS
8672 #pragma unused(oidp)
8673 int *name
= (int *)arg1
;
8677 protocol_family_t protocol
;
8680 u_int8_t
*bitfield
= NULL
;
8682 if (req
->newptr
!= USER_ADDR_NULL
) {
8691 if (req
->oldptr
== USER_ADDR_NULL
) {
8692 req
->oldidx
= bitstr_size(65536);
8695 if (req
->oldlen
< bitstr_size(65536)) {
8704 ifnet_head_lock_shared();
8705 if (!IF_INDEX_IN_RANGE(idx
)) {
8710 ifp
= ifindex2ifnet
[idx
];
8713 bitfield
= _MALLOC(bitstr_size(65536), M_TEMP
, M_WAITOK
| M_ZERO
);
8714 if (bitfield
== NULL
) {
8718 error
= ifnet_get_local_ports_extended(ifp
, protocol
, flags
, bitfield
);
8720 printf("%s: ifnet_get_local_ports_extended() error %d\n",
8724 error
= SYSCTL_OUT(req
, bitfield
, bitstr_size(65536));
8726 if (bitfield
!= NULL
)
8727 _FREE(bitfield
, M_TEMP
);
8732 dlil_dt_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
8734 #pragma unused(arg1)
8735 struct ifnet
*ifp
= arg0
;
8737 if (ifnet_is_attached(ifp
, 1)) {
8738 nstat_ifnet_threshold_reached(ifp
->if_index
);
8739 ifnet_decr_iorefcnt(ifp
);
8744 ifnet_notify_data_threshold(struct ifnet
*ifp
)
8746 uint64_t bytes
= (ifp
->if_ibytes
+ ifp
->if_obytes
);
8747 uint64_t oldbytes
= ifp
->if_dt_bytes
;
8749 ASSERT(ifp
->if_dt_tcall
!= NULL
);
8752 * If we went over the threshold, notify NetworkStatistics.
8753 * We rate-limit it based on the threshold interval value.
8755 if (threshold_notify
&& (bytes
- oldbytes
) > ifp
->if_data_threshold
&&
8756 OSCompareAndSwap64(oldbytes
, bytes
, &ifp
->if_dt_bytes
) &&
8757 !thread_call_isactive(ifp
->if_dt_tcall
)) {
8758 uint64_t tival
= (threshold_interval
* NSEC_PER_SEC
);
8759 uint64_t now
= mach_absolute_time(), deadline
= now
;
8763 nanoseconds_to_absolutetime(tival
, &ival
);
8764 clock_deadline_for_periodic_event(ival
, now
, &deadline
);
8765 (void) thread_call_enter_delayed(ifp
->if_dt_tcall
,
8768 (void) thread_call_enter(ifp
->if_dt_tcall
);
8773 #if (DEVELOPMENT || DEBUG)
8775 * The sysctl variable name contains the input parameters of
8776 * ifnet_get_keepalive_offload_frames()
8777 * ifp (interface index): name[0]
8778 * frames_array_count: name[1]
8779 * frame_data_offset: name[2]
8780 * The return length gives used_frames_count
8783 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8785 #pragma unused(oidp)
8786 int *name
= (int *)arg1
;
8787 u_int namelen
= arg2
;
8790 u_int32_t frames_array_count
;
8791 size_t frame_data_offset
;
8792 u_int32_t used_frames_count
;
8793 struct ifnet_keepalive_offload_frame
*frames_array
= NULL
;
8798 * Only root can get look at other people TCP frames
8800 error
= proc_suser(current_proc());
8804 * Validate the input parameters
8806 if (req
->newptr
!= USER_ADDR_NULL
) {
8814 if (req
->oldptr
== USER_ADDR_NULL
) {
8818 if (req
->oldlen
== 0) {
8823 frames_array_count
= name
[1];
8824 frame_data_offset
= name
[2];
8826 /* Make sure the passed buffer is large enough */
8827 if (frames_array_count
* sizeof(struct ifnet_keepalive_offload_frame
) >
8833 ifnet_head_lock_shared();
8834 if (!IF_INDEX_IN_RANGE(idx
)) {
8839 ifp
= ifindex2ifnet
[idx
];
8842 frames_array
= _MALLOC(frames_array_count
*
8843 sizeof(struct ifnet_keepalive_offload_frame
), M_TEMP
, M_WAITOK
);
8844 if (frames_array
== NULL
) {
8849 error
= ifnet_get_keepalive_offload_frames(ifp
, frames_array
,
8850 frames_array_count
, frame_data_offset
, &used_frames_count
);
8852 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8857 for (i
= 0; i
< used_frames_count
; i
++) {
8858 error
= SYSCTL_OUT(req
, frames_array
+ i
,
8859 sizeof(struct ifnet_keepalive_offload_frame
));
8865 if (frames_array
!= NULL
)
8866 _FREE(frames_array
, M_TEMP
);
8869 #endif /* DEVELOPMENT || DEBUG */
8872 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow
*ifs
,
8875 tcp_update_stats_per_flow(ifs
, ifp
);
8879 dlil_mit_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
8881 #pragma unused(arg1)
8882 struct ifnet
*ifp
= (struct ifnet
*)arg0
;
8883 struct dlil_threading_info
*inp
= ifp
->if_inp
;
8885 ifnet_lock_shared(ifp
);
8886 if (!IF_FULLY_ATTACHED(ifp
) || inp
== NULL
) {
8887 ifnet_lock_done(ifp
);
8891 lck_mtx_lock_spin(&inp
->input_lck
);
8892 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
8893 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
) ||
8894 !qempty(&inp
->rcvq_pkts
)) {
8896 wakeup_one((caddr_t
)&inp
->input_waiting
);
8898 lck_mtx_unlock(&inp
->input_lck
);
8899 ifnet_lock_done(ifp
);