2 * Copyright (c) 1999-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_llreach.h>
71 #include <net/kpi_interfacefilter.h>
72 #include <net/classq/classq.h>
73 #include <net/classq/classq_sfb.h>
74 #include <net/flowhash.h>
75 #include <net/ntstat.h>
78 #include <netinet/in_var.h>
79 #include <netinet/igmp_var.h>
80 #include <netinet/ip_var.h>
81 #include <netinet/tcp.h>
82 #include <netinet/tcp_var.h>
83 #include <netinet/udp.h>
84 #include <netinet/udp_var.h>
85 #include <netinet/if_ether.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/in_tclass.h>
91 #include <netinet6/in6_var.h>
92 #include <netinet6/nd6.h>
93 #include <netinet6/mld6_var.h>
94 #include <netinet6/scope6_var.h>
97 #include <libkern/OSAtomic.h>
98 #include <libkern/tree.h>
100 #include <dev/random/randomdev.h>
101 #include <machine/machine_routines.h>
103 #include <mach/thread_act.h>
104 #include <mach/sdt.h>
107 #include <sys/kauth.h>
108 #include <security/mac_framework.h>
109 #include <net/ethernet.h>
110 #include <net/firewire.h>
114 #include <net/pfvar.h>
117 #include <net/altq/altq.h>
119 #include <net/pktsched/pktsched.h>
122 #include <net/necp.h>
125 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
126 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
127 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
128 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
129 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
131 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
132 #define MAX_LINKADDR 4 /* LONGWORDS */
133 #define M_NKE M_IFADDR
136 #define DLIL_PRINTF printf
138 #define DLIL_PRINTF kprintf
141 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
142 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
144 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
145 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
153 * List of if_proto structures in if_proto_hash[] is protected by
154 * the ifnet lock. The rest of the fields are initialized at protocol
155 * attach time and never change, thus no lock required as long as
156 * a reference to it is valid, via if_proto_ref().
159 SLIST_ENTRY(if_proto
) next_hash
;
163 protocol_family_t protocol_family
;
167 proto_media_input input
;
168 proto_media_preout pre_output
;
169 proto_media_event event
;
170 proto_media_ioctl ioctl
;
171 proto_media_detached detached
;
172 proto_media_resolve_multi resolve_multi
;
173 proto_media_send_arp send_arp
;
176 proto_media_input_v2 input
;
177 proto_media_preout pre_output
;
178 proto_media_event event
;
179 proto_media_ioctl ioctl
;
180 proto_media_detached detached
;
181 proto_media_resolve_multi resolve_multi
;
182 proto_media_send_arp send_arp
;
187 SLIST_HEAD(proto_hash_entry
, if_proto
);
189 #define DLIL_SDLMAXLEN 64
190 #define DLIL_SDLDATALEN \
191 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
194 struct ifnet dl_if
; /* public ifnet */
196 * DLIL private fields, protected by dl_if_lock
198 decl_lck_mtx_data(, dl_if_lock
);
199 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
200 u_int32_t dl_if_flags
; /* flags (below) */
201 u_int32_t dl_if_refcnt
; /* refcnt */
202 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
203 void *dl_if_uniqueid
; /* unique interface id */
204 size_t dl_if_uniqueid_len
; /* length of the unique id */
205 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
206 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
208 struct ifaddr ifa
; /* lladdr ifa */
209 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
210 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
212 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
213 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
214 ctrace_t dl_if_attach
; /* attach PC stacktrace */
215 ctrace_t dl_if_detach
; /* detach PC stacktrace */
218 /* Values for dl_if_flags (private to DLIL) */
219 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
220 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
221 #define DLIF_DEBUG 0x4 /* has debugging info */
223 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
226 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
228 struct dlil_ifnet_dbg
{
229 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
230 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
231 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
233 * Circular lists of ifnet_{reference,release} callers.
235 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
236 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
239 #define DLIL_TO_IFP(s) (&s->dl_if)
240 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
242 struct ifnet_filter
{
243 TAILQ_ENTRY(ifnet_filter
) filt_next
;
245 u_int32_t filt_flags
;
247 const char *filt_name
;
249 protocol_family_t filt_protocol
;
250 iff_input_func filt_input
;
251 iff_output_func filt_output
;
252 iff_event_func filt_event
;
253 iff_ioctl_func filt_ioctl
;
254 iff_detached_func filt_detached
;
257 struct proto_input_entry
;
259 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
260 static lck_grp_t
*dlil_lock_group
;
261 lck_grp_t
*ifnet_lock_group
;
262 static lck_grp_t
*ifnet_head_lock_group
;
263 static lck_grp_t
*ifnet_snd_lock_group
;
264 static lck_grp_t
*ifnet_rcv_lock_group
;
265 lck_attr_t
*ifnet_lock_attr
;
266 decl_lck_rw_data(static, ifnet_head_lock
);
267 decl_lck_mtx_data(static, dlil_ifnet_lock
);
268 u_int32_t dlil_filter_disable_tso_count
= 0;
271 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
273 static unsigned int ifnet_debug
; /* debugging (disabled) */
275 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
276 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
277 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
279 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
280 #define DLIF_ZONE_NAME "ifnet" /* zone name */
282 static unsigned int dlif_filt_size
; /* size of ifnet_filter */
283 static struct zone
*dlif_filt_zone
; /* zone for ifnet_filter */
285 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
286 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
288 static unsigned int dlif_phash_size
; /* size of ifnet proto hash table */
289 static struct zone
*dlif_phash_zone
; /* zone for ifnet proto hash table */
291 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
292 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
294 static unsigned int dlif_proto_size
; /* size of if_proto */
295 static struct zone
*dlif_proto_zone
; /* zone for if_proto */
297 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
298 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
300 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
301 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
302 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
304 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
305 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
307 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
308 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
309 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
311 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
312 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
314 static u_int32_t net_rtref
;
316 static struct dlil_main_threading_info dlil_main_input_thread_info
;
317 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
318 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
320 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
, bool update_generation
);
321 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
322 static void dlil_if_trace(struct dlil_ifnet
*, int);
323 static void if_proto_ref(struct if_proto
*);
324 static void if_proto_free(struct if_proto
*);
325 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
326 static int dlil_ifp_proto_count(struct ifnet
*);
327 static void if_flt_monitor_busy(struct ifnet
*);
328 static void if_flt_monitor_unbusy(struct ifnet
*);
329 static void if_flt_monitor_enter(struct ifnet
*);
330 static void if_flt_monitor_leave(struct ifnet
*);
331 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
332 char **, protocol_family_t
);
333 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
335 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
336 const struct sockaddr_dl
*);
337 static int ifnet_lookup(struct ifnet
*);
338 static void if_purgeaddrs(struct ifnet
*);
340 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
341 struct mbuf
*, char *);
342 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
344 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
345 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
346 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
347 const struct kev_msg
*);
348 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
349 unsigned long, void *);
350 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
351 struct sockaddr_dl
*, size_t);
352 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
353 const struct sockaddr_dl
*, const struct sockaddr
*,
354 const struct sockaddr_dl
*, const struct sockaddr
*);
356 static errno_t
ifp_if_output(struct ifnet
*, struct mbuf
*);
357 static void ifp_if_start(struct ifnet
*);
358 static errno_t
ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
359 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
360 boolean_t poll
, struct thread
*tp
);
361 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
362 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
363 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
364 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
365 protocol_family_t
*);
366 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
367 const struct ifnet_demux_desc
*, u_int32_t
);
368 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
369 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
370 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
371 const struct sockaddr
*, const char *, const char *);
372 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
373 const struct sockaddr
*, const char *, const char *,
374 u_int32_t
*, u_int32_t
*);
375 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
376 static void ifp_if_free(struct ifnet
*);
377 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
378 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
379 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
381 static void dlil_main_input_thread_func(void *, wait_result_t
);
382 static void dlil_input_thread_func(void *, wait_result_t
);
383 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
384 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*);
385 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
386 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
387 struct dlil_threading_info
*, boolean_t
);
388 static void dlil_input_stats_sync(struct ifnet
*, struct dlil_threading_info
*);
389 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
390 u_int32_t
, ifnet_model_t
, boolean_t
);
391 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
392 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
395 static void dlil_verify_sum16(void);
397 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
399 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
402 static void ifnet_detacher_thread_func(void *, wait_result_t
);
403 static int ifnet_detacher_thread_cont(int);
404 static void ifnet_detach_final(struct ifnet
*);
405 static void ifnet_detaching_enqueue(struct ifnet
*);
406 static struct ifnet
*ifnet_detaching_dequeue(void);
408 static void ifnet_start_thread_fn(void *, wait_result_t
);
409 static void ifnet_poll_thread_fn(void *, wait_result_t
);
410 static void ifnet_poll(struct ifnet
*);
412 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
413 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
415 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
416 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
419 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
420 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
421 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
422 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
423 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
424 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
425 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
426 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
427 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
428 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
429 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
430 static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS
;
432 struct chain_len_stats tx_chain_len_stats
;
433 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
;
435 /* The following are protected by dlil_ifnet_lock */
436 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
437 static u_int32_t ifnet_detaching_cnt
;
438 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
440 decl_lck_mtx_data(static, ifnet_fc_lock
);
442 static uint32_t ifnet_flowhash_seed
;
444 struct ifnet_flowhash_key
{
445 char ifk_name
[IFNAMSIZ
];
449 uint32_t ifk_capabilities
;
450 uint32_t ifk_capenable
;
451 uint32_t ifk_output_sched_model
;
456 /* Flow control entry per interface */
457 struct ifnet_fc_entry
{
458 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
459 u_int32_t ifce_flowhash
;
460 struct ifnet
*ifce_ifp
;
463 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
464 static int ifce_cmp(const struct ifnet_fc_entry
*,
465 const struct ifnet_fc_entry
*);
466 static int ifnet_fc_add(struct ifnet
*);
467 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
468 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
470 /* protected by ifnet_fc_lock */
471 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
472 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
473 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
475 static unsigned int ifnet_fc_zone_size
; /* sizeof ifnet_fc_entry */
476 static struct zone
*ifnet_fc_zone
; /* ifnet_fc_entry zone */
478 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
479 #define IFNET_FC_ZONE_MAX 32
481 extern void bpfdetach(struct ifnet
*);
482 extern void proto_input_run(void);
484 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
486 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
489 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
492 int dlil_lladdr_ckreq
= 0;
496 int dlil_verbose
= 1;
498 int dlil_verbose
= 0;
500 #if IFNET_INPUT_SANITY_CHK
501 /* sanity checking of input packet lists received */
502 static u_int32_t dlil_input_sanity_check
= 0;
503 #endif /* IFNET_INPUT_SANITY_CHK */
504 /* rate limit debug messages */
505 struct timespec dlil_dbgrate
= { 1, 0 };
507 SYSCTL_DECL(_net_link_generic_system
);
510 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_lladdr_ckreq
,
511 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_lladdr_ckreq
, 0,
512 "Require MACF system info check to expose link-layer address");
515 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
516 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
518 #define IF_SNDQ_MINLEN 32
519 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
520 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
521 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
522 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
524 #define IF_RCVQ_MINLEN 32
525 #define IF_RCVQ_MAXLEN 256
526 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
527 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
528 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
529 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
531 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
532 static u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
533 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
534 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
535 "ilog2 of EWMA decay rate of avg inbound packets");
537 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
538 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
539 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
540 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
541 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
542 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
543 "Q", "input poll mode freeze time");
545 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
546 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
547 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
548 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
549 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
550 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
551 "Q", "input poll sampling time");
553 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
554 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
555 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
556 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
557 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
558 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
559 "Q", "input poll interval (time)");
561 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
562 static u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
563 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
564 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
565 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
567 #define IF_RXPOLL_WLOWAT 10
568 static u_int32_t if_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
569 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
570 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_wlowat
,
571 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
572 "I", "input poll wakeup low watermark");
574 #define IF_RXPOLL_WHIWAT 100
575 static u_int32_t if_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
576 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
577 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_whiwat
,
578 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
579 "I", "input poll wakeup high watermark");
581 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
582 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
583 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
584 "max packets per poll call");
586 static u_int32_t if_rxpoll
= 1;
587 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
588 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
589 sysctl_rxpoll
, "I", "enable opportunistic input polling");
591 u_int32_t if_bw_smoothing_val
= 3;
592 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, if_bw_smoothing_val
,
593 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_bw_smoothing_val
, 0, "");
595 u_int32_t if_bw_measure_size
= 10;
596 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, if_bw_measure_size
,
597 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_bw_measure_size
, 0, "");
599 static u_int32_t cur_dlil_input_threads
= 0;
600 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
601 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
602 "Current number of DLIL input threads");
604 #if IFNET_INPUT_SANITY_CHK
605 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
606 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
607 "Turn on sanity checking in DLIL input");
608 #endif /* IFNET_INPUT_SANITY_CHK */
610 static u_int32_t if_flowadv
= 1;
611 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
612 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
613 "enable flow-advisory mechanism");
615 static u_int32_t if_delaybased_queue
= 1;
616 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
617 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
618 "enable delay based dynamic queue sizing");
620 static uint64_t hwcksum_in_invalidated
= 0;
621 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
622 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
623 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
625 uint32_t hwcksum_dbg
= 0;
626 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
627 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
628 "enable hardware cksum debugging");
630 u_int32_t ifnet_start_delayed
= 0;
631 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delayed
,
632 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_start_delayed
, 0,
633 "number of times start was delayed");
635 u_int32_t ifnet_delay_start_disabled
= 0;
636 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delay_disabled
,
637 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_delay_start_disabled
, 0,
638 "number of times start was delayed");
640 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
641 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
642 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
643 #define HWCKSUM_DBG_MASK \
644 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
645 HWCKSUM_DBG_FINALIZE_FORCED)
647 static uint32_t hwcksum_dbg_mode
= 0;
648 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
649 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
650 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
652 static uint64_t hwcksum_dbg_partial_forced
= 0;
653 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
654 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
655 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
657 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
658 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
659 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
660 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
662 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
663 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
664 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
665 &hwcksum_dbg_partial_rxoff_forced
, 0,
666 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
667 "forced partial cksum rx offset");
669 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
670 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
671 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
672 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
673 "adjusted partial cksum rx offset");
675 static uint64_t hwcksum_dbg_verified
= 0;
676 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
677 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
678 &hwcksum_dbg_verified
, "packets verified for having good checksum");
680 static uint64_t hwcksum_dbg_bad_cksum
= 0;
681 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
682 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
683 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
685 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
686 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
687 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
688 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
690 static uint64_t hwcksum_dbg_adjusted
= 0;
691 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
692 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
693 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
695 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
696 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
697 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
698 &hwcksum_dbg_finalized_hdr
, "finalized headers");
700 static uint64_t hwcksum_dbg_finalized_data
= 0;
701 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
702 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
703 &hwcksum_dbg_finalized_data
, "finalized payloads");
705 uint32_t hwcksum_tx
= 1;
706 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
707 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
708 "enable transmit hardware checksum offload");
710 uint32_t hwcksum_rx
= 1;
711 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
712 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
713 "enable receive hardware checksum offload");
715 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, tx_chain_len_stats
,
716 CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 9,
717 sysctl_tx_chain_len_stats
, "S", "");
719 uint32_t tx_chain_len_count
= 0;
720 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, tx_chain_len_count
,
721 CTLFLAG_RW
| CTLFLAG_LOCKED
, &tx_chain_len_count
, 0, "");
723 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_ports_used
,
724 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_ports_used
, "");
726 #if (DEVELOPMENT || DEBUG)
727 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
;
728 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_kao_frames
,
729 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_kao_frames
, "");
730 #endif /* DEVELOPMENT || DEBUG */
732 unsigned int net_rxpoll
= 1;
733 unsigned int net_affinity
= 1;
734 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
736 extern u_int32_t inject_buckets
;
738 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
739 static lck_attr_t
*dlil_lck_attributes
= NULL
;
742 #define DLIL_INPUT_CHECK(m, ifp) { \
743 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
744 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
745 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
746 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
751 #define DLIL_EWMA(old, new, decay) do { \
753 if ((_avg = (old)) > 0) \
754 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
760 #define MBPS (1ULL * 1000 * 1000)
761 #define GBPS (MBPS * 1000)
763 struct rxpoll_time_tbl
{
764 u_int64_t speed
; /* downlink speed */
765 u_int32_t plowat
; /* packets low watermark */
766 u_int32_t phiwat
; /* packets high watermark */
767 u_int32_t blowat
; /* bytes low watermark */
768 u_int32_t bhiwat
; /* bytes high watermark */
771 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
772 { 10 * MBPS
, 2, 8, (1 * 1024), (6 * 1024) },
773 { 100 * MBPS
, 10, 40, (4 * 1024), (64 * 1024) },
774 { 1 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
775 { 10 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
776 { 100 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
781 proto_hash_value(u_int32_t protocol_family
)
784 * dlil_proto_unplumb_all() depends on the mapping between
785 * the hash bucket index and the protocol family defined
786 * here; future changes must be applied there as well.
788 switch (protocol_family
) {
802 * Caller must already be holding ifnet lock.
804 static struct if_proto
*
805 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
807 struct if_proto
*proto
= NULL
;
808 u_int32_t i
= proto_hash_value(protocol_family
);
810 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
812 if (ifp
->if_proto_hash
!= NULL
)
813 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
815 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
)
816 proto
= SLIST_NEXT(proto
, next_hash
);
825 if_proto_ref(struct if_proto
*proto
)
827 atomic_add_32(&proto
->refcount
, 1);
830 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
833 if_proto_free(struct if_proto
*proto
)
836 struct ifnet
*ifp
= proto
->ifp
;
837 u_int32_t proto_family
= proto
->protocol_family
;
838 struct kev_dl_proto_data ev_pr_data
;
840 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
844 /* No more reference on this, protocol must have been detached */
845 VERIFY(proto
->detached
);
847 if (proto
->proto_kpi
== kProtoKPI_v1
) {
848 if (proto
->kpi
.v1
.detached
)
849 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
851 if (proto
->proto_kpi
== kProtoKPI_v2
) {
852 if (proto
->kpi
.v2
.detached
)
853 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
857 * Cleanup routes that may still be in the routing table for that
858 * interface/protocol pair.
860 if_rtproto_del(ifp
, proto_family
);
863 * The reserved field carries the number of protocol still attached
864 * (subject to change)
866 ifnet_lock_shared(ifp
);
867 ev_pr_data
.proto_family
= proto_family
;
868 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
869 ifnet_lock_done(ifp
);
871 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
872 (struct net_event_data
*)&ev_pr_data
,
873 sizeof (struct kev_dl_proto_data
));
875 zfree(dlif_proto_zone
, proto
);
878 __private_extern__
void
879 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
881 unsigned int type
= 0;
885 case IFNET_LCK_ASSERT_EXCLUSIVE
:
886 type
= LCK_RW_ASSERT_EXCLUSIVE
;
889 case IFNET_LCK_ASSERT_SHARED
:
890 type
= LCK_RW_ASSERT_SHARED
;
893 case IFNET_LCK_ASSERT_OWNED
:
894 type
= LCK_RW_ASSERT_HELD
;
897 case IFNET_LCK_ASSERT_NOTOWNED
:
898 /* nothing to do here for RW lock; bypass assert */
903 panic("bad ifnet assert type: %d", what
);
907 lck_rw_assert(&ifp
->if_lock
, type
);
910 __private_extern__
void
911 ifnet_lock_shared(struct ifnet
*ifp
)
913 lck_rw_lock_shared(&ifp
->if_lock
);
916 __private_extern__
void
917 ifnet_lock_exclusive(struct ifnet
*ifp
)
919 lck_rw_lock_exclusive(&ifp
->if_lock
);
922 __private_extern__
void
923 ifnet_lock_done(struct ifnet
*ifp
)
925 lck_rw_done(&ifp
->if_lock
);
929 __private_extern__
void
930 if_inetdata_lock_shared(struct ifnet
*ifp
)
932 lck_rw_lock_shared(&ifp
->if_inetdata_lock
);
935 __private_extern__
void
936 if_inetdata_lock_exclusive(struct ifnet
*ifp
)
938 lck_rw_lock_exclusive(&ifp
->if_inetdata_lock
);
941 __private_extern__
void
942 if_inetdata_lock_done(struct ifnet
*ifp
)
944 lck_rw_done(&ifp
->if_inetdata_lock
);
949 __private_extern__
void
950 if_inet6data_lock_shared(struct ifnet
*ifp
)
952 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
955 __private_extern__
void
956 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
958 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
961 __private_extern__
void
962 if_inet6data_lock_done(struct ifnet
*ifp
)
964 lck_rw_done(&ifp
->if_inet6data_lock
);
968 __private_extern__
void
969 ifnet_head_lock_shared(void)
971 lck_rw_lock_shared(&ifnet_head_lock
);
974 __private_extern__
void
975 ifnet_head_lock_exclusive(void)
977 lck_rw_lock_exclusive(&ifnet_head_lock
);
980 __private_extern__
void
981 ifnet_head_done(void)
983 lck_rw_done(&ifnet_head_lock
);
986 __private_extern__
void
987 ifnet_head_assert_exclusive(void)
989 lck_rw_assert(&ifnet_head_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
993 * Caller must already be holding ifnet lock.
996 dlil_ifp_proto_count(struct ifnet
*ifp
)
1000 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
1002 if (ifp
->if_proto_hash
== NULL
)
1005 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1006 struct if_proto
*proto
;
1007 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1015 __private_extern__
void
1016 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
1017 u_int32_t event_code
, struct net_event_data
*event_data
,
1018 u_int32_t event_data_len
)
1020 struct net_event_data ev_data
;
1021 struct kev_msg ev_msg
;
1023 bzero(&ev_msg
, sizeof (ev_msg
));
1024 bzero(&ev_data
, sizeof (ev_data
));
1026 * a net event always starts with a net_event_data structure
1027 * but the caller can generate a simple net event or
1028 * provide a longer event structure to post
1030 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1031 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1032 ev_msg
.kev_subclass
= event_subclass
;
1033 ev_msg
.event_code
= event_code
;
1035 if (event_data
== NULL
) {
1036 event_data
= &ev_data
;
1037 event_data_len
= sizeof (struct net_event_data
);
1040 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
1041 event_data
->if_family
= ifp
->if_family
;
1042 event_data
->if_unit
= (u_int32_t
)ifp
->if_unit
;
1044 ev_msg
.dv
[0].data_length
= event_data_len
;
1045 ev_msg
.dv
[0].data_ptr
= event_data
;
1046 ev_msg
.dv
[1].data_length
= 0;
1048 /* Don't update interface generation for quality and RRC state changess */
1049 bool update_generation
= (event_subclass
!= KEV_DL_SUBCLASS
||
1050 (event_code
!= KEV_DL_LINK_QUALITY_METRIC_CHANGED
&&
1051 event_code
!= KEV_DL_RRC_STATE_CHANGED
));
1053 dlil_event_internal(ifp
, &ev_msg
, update_generation
);
1056 __private_extern__
int
1057 dlil_alloc_local_stats(struct ifnet
*ifp
)
1060 void *buf
, *base
, **pbuf
;
1065 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1066 /* allocate tcpstat_local structure */
1067 buf
= zalloc(dlif_tcpstat_zone
);
1072 bzero(buf
, dlif_tcpstat_bufsize
);
1074 /* Get the 64-bit aligned base address for this object */
1075 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1076 sizeof (u_int64_t
));
1077 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1078 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1081 * Wind back a pointer size from the aligned base and
1082 * save the original address so we can free it later.
1084 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1086 ifp
->if_tcp_stat
= base
;
1088 /* allocate udpstat_local structure */
1089 buf
= zalloc(dlif_udpstat_zone
);
1094 bzero(buf
, dlif_udpstat_bufsize
);
1096 /* Get the 64-bit aligned base address for this object */
1097 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1098 sizeof (u_int64_t
));
1099 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1100 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1103 * Wind back a pointer size from the aligned base and
1104 * save the original address so we can free it later.
1106 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1108 ifp
->if_udp_stat
= base
;
1110 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof (u_int64_t
)) &&
1111 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof (u_int64_t
)));
1116 if (ifp
->if_ipv4_stat
== NULL
) {
1117 MALLOC(ifp
->if_ipv4_stat
, struct if_tcp_ecn_stat
*,
1118 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1119 if (ifp
->if_ipv4_stat
== NULL
) {
1125 if (ifp
->if_ipv6_stat
== NULL
) {
1126 MALLOC(ifp
->if_ipv6_stat
, struct if_tcp_ecn_stat
*,
1127 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1128 if (ifp
->if_ipv6_stat
== NULL
) {
1135 if (ifp
->if_tcp_stat
!= NULL
) {
1137 ((intptr_t)ifp
->if_tcp_stat
- sizeof (void *));
1138 zfree(dlif_tcpstat_zone
, *pbuf
);
1139 ifp
->if_tcp_stat
= NULL
;
1141 if (ifp
->if_udp_stat
!= NULL
) {
1143 ((intptr_t)ifp
->if_udp_stat
- sizeof (void *));
1144 zfree(dlif_udpstat_zone
, *pbuf
);
1145 ifp
->if_udp_stat
= NULL
;
1147 if (ifp
->if_ipv4_stat
!= NULL
) {
1148 FREE(ifp
->if_ipv4_stat
, M_TEMP
);
1149 ifp
->if_ipv4_stat
= NULL
;
1151 if (ifp
->if_ipv6_stat
!= NULL
) {
1152 FREE(ifp
->if_ipv6_stat
, M_TEMP
);
1153 ifp
->if_ipv6_stat
= NULL
;
1161 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
)
1163 thread_continue_t func
;
1167 /* NULL ifp indicates the main input thread, called at dlil_init time */
1169 func
= dlil_main_input_thread_func
;
1170 VERIFY(inp
== dlil_main_input_thread
);
1171 (void) strlcat(inp
->input_name
,
1172 "main_input", DLIL_THREADNAME_LEN
);
1173 } else if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1174 func
= dlil_rxpoll_input_thread_func
;
1175 VERIFY(inp
!= dlil_main_input_thread
);
1176 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1177 "%s_input_poll", if_name(ifp
));
1179 func
= dlil_input_thread_func
;
1180 VERIFY(inp
!= dlil_main_input_thread
);
1181 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1182 "%s_input", if_name(ifp
));
1184 VERIFY(inp
->input_thr
== THREAD_NULL
);
1186 inp
->lck_grp
= lck_grp_alloc_init(inp
->input_name
, dlil_grp_attributes
);
1187 lck_mtx_init(&inp
->input_lck
, inp
->lck_grp
, dlil_lck_attributes
);
1189 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1190 inp
->ifp
= ifp
; /* NULL for main input thread */
1192 net_timerclear(&inp
->mode_holdtime
);
1193 net_timerclear(&inp
->mode_lasttime
);
1194 net_timerclear(&inp
->sample_holdtime
);
1195 net_timerclear(&inp
->sample_lasttime
);
1196 net_timerclear(&inp
->dbg_lasttime
);
1199 * For interfaces that support opportunistic polling, set the
1200 * low and high watermarks for outstanding inbound packets/bytes.
1201 * Also define freeze times for transitioning between modes
1202 * and updating the average.
1204 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1205 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1206 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1208 limit
= (u_int32_t
)-1;
1211 _qinit(&inp
->rcvq_pkts
, Q_DROPTAIL
, limit
);
1212 if (inp
== dlil_main_input_thread
) {
1213 struct dlil_main_threading_info
*inpm
=
1214 (struct dlil_main_threading_info
*)inp
;
1215 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
);
1218 error
= kernel_thread_start(func
, inp
, &inp
->input_thr
);
1219 if (error
== KERN_SUCCESS
) {
1220 ml_thread_policy(inp
->input_thr
, MACHINE_GROUP
,
1221 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_NETISR
));
1223 * We create an affinity set so that the matching workloop
1224 * thread or the starter thread (for loopback) can be
1225 * scheduled on the same processor set as the input thread.
1228 struct thread
*tp
= inp
->input_thr
;
1231 * Randomize to reduce the probability
1232 * of affinity tag namespace collision.
1234 read_random(&tag
, sizeof (tag
));
1235 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1236 thread_reference(tp
);
1238 inp
->net_affinity
= TRUE
;
1241 } else if (inp
== dlil_main_input_thread
) {
1242 panic_plain("%s: couldn't create main input thread", __func__
);
1245 panic_plain("%s: couldn't create %s input thread", __func__
,
1249 OSAddAtomic(1, &cur_dlil_input_threads
);
1255 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1259 VERIFY(current_thread() == inp
->input_thr
);
1260 VERIFY(inp
!= dlil_main_input_thread
);
1262 OSAddAtomic(-1, &cur_dlil_input_threads
);
1264 lck_mtx_destroy(&inp
->input_lck
, inp
->lck_grp
);
1265 lck_grp_free(inp
->lck_grp
);
1267 inp
->input_waiting
= 0;
1269 bzero(inp
->input_name
, sizeof (inp
->input_name
));
1272 VERIFY(qhead(&inp
->rcvq_pkts
) == NULL
&& qempty(&inp
->rcvq_pkts
));
1273 qlimit(&inp
->rcvq_pkts
) = 0;
1274 bzero(&inp
->stats
, sizeof (inp
->stats
));
1276 VERIFY(!inp
->net_affinity
);
1277 inp
->input_thr
= THREAD_NULL
;
1278 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
1279 VERIFY(inp
->poll_thr
== THREAD_NULL
);
1280 VERIFY(inp
->tag
== 0);
1282 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1283 bzero(&inp
->tstats
, sizeof (inp
->tstats
));
1284 bzero(&inp
->pstats
, sizeof (inp
->pstats
));
1285 bzero(&inp
->sstats
, sizeof (inp
->sstats
));
1287 net_timerclear(&inp
->mode_holdtime
);
1288 net_timerclear(&inp
->mode_lasttime
);
1289 net_timerclear(&inp
->sample_holdtime
);
1290 net_timerclear(&inp
->sample_lasttime
);
1291 net_timerclear(&inp
->dbg_lasttime
);
1293 #if IFNET_INPUT_SANITY_CHK
1294 inp
->input_mbuf_cnt
= 0;
1295 #endif /* IFNET_INPUT_SANITY_CHK */
1298 printf("%s: input thread terminated\n",
1302 /* for the extra refcnt from kernel_thread_start() */
1303 thread_deallocate(current_thread());
1305 /* this is the end */
1306 thread_terminate(current_thread());
1310 static kern_return_t
1311 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1313 thread_affinity_policy_data_t policy
;
1315 bzero(&policy
, sizeof (policy
));
1316 policy
.affinity_tag
= tag
;
1317 return (thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1318 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
));
1324 thread_t thread
= THREAD_NULL
;
1327 * The following fields must be 64-bit aligned for atomic operations.
1329 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1330 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1331 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1332 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1333 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1334 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1335 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1336 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1337 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1338 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1339 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1340 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1341 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1342 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1343 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1345 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1346 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1347 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1348 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1349 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1350 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1351 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1352 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1353 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1354 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1355 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1356 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1357 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1358 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1359 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1362 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1364 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1365 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1366 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1367 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1368 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1369 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1370 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1371 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1372 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1373 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1374 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1375 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1376 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1379 * ... as well as the mbuf checksum flags counterparts.
1381 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1382 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1383 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1384 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1385 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1386 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1387 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1388 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1389 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1390 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1393 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1395 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1396 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1398 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1399 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1400 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1401 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1403 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1404 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1405 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1407 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1408 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1409 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1410 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1411 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1412 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1413 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1414 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1415 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1416 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1417 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1418 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1419 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1420 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1421 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1422 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1424 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1425 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1426 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1427 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1428 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1429 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1430 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC
== IFNET_SUBFAMILY_INTCOPROC
);
1432 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1433 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1435 PE_parse_boot_argn("net_affinity", &net_affinity
,
1436 sizeof (net_affinity
));
1438 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof (net_rxpoll
));
1440 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof (net_rtref
));
1442 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof (ifnet_debug
));
1444 dlif_size
= (ifnet_debug
== 0) ? sizeof (struct dlil_ifnet
) :
1445 sizeof (struct dlil_ifnet_dbg
);
1446 /* Enforce 64-bit alignment for dlil_ifnet structure */
1447 dlif_bufsize
= dlif_size
+ sizeof (void *) + sizeof (u_int64_t
);
1448 dlif_bufsize
= P2ROUNDUP(dlif_bufsize
, sizeof (u_int64_t
));
1449 dlif_zone
= zinit(dlif_bufsize
, DLIF_ZONE_MAX
* dlif_bufsize
,
1451 if (dlif_zone
== NULL
) {
1452 panic_plain("%s: failed allocating %s", __func__
,
1456 zone_change(dlif_zone
, Z_EXPAND
, TRUE
);
1457 zone_change(dlif_zone
, Z_CALLERACCT
, FALSE
);
1459 dlif_filt_size
= sizeof (struct ifnet_filter
);
1460 dlif_filt_zone
= zinit(dlif_filt_size
,
1461 DLIF_FILT_ZONE_MAX
* dlif_filt_size
, 0, DLIF_FILT_ZONE_NAME
);
1462 if (dlif_filt_zone
== NULL
) {
1463 panic_plain("%s: failed allocating %s", __func__
,
1464 DLIF_FILT_ZONE_NAME
);
1467 zone_change(dlif_filt_zone
, Z_EXPAND
, TRUE
);
1468 zone_change(dlif_filt_zone
, Z_CALLERACCT
, FALSE
);
1470 dlif_phash_size
= sizeof (struct proto_hash_entry
) * PROTO_HASH_SLOTS
;
1471 dlif_phash_zone
= zinit(dlif_phash_size
,
1472 DLIF_PHASH_ZONE_MAX
* dlif_phash_size
, 0, DLIF_PHASH_ZONE_NAME
);
1473 if (dlif_phash_zone
== NULL
) {
1474 panic_plain("%s: failed allocating %s", __func__
,
1475 DLIF_PHASH_ZONE_NAME
);
1478 zone_change(dlif_phash_zone
, Z_EXPAND
, TRUE
);
1479 zone_change(dlif_phash_zone
, Z_CALLERACCT
, FALSE
);
1481 dlif_proto_size
= sizeof (struct if_proto
);
1482 dlif_proto_zone
= zinit(dlif_proto_size
,
1483 DLIF_PROTO_ZONE_MAX
* dlif_proto_size
, 0, DLIF_PROTO_ZONE_NAME
);
1484 if (dlif_proto_zone
== NULL
) {
1485 panic_plain("%s: failed allocating %s", __func__
,
1486 DLIF_PROTO_ZONE_NAME
);
1489 zone_change(dlif_proto_zone
, Z_EXPAND
, TRUE
);
1490 zone_change(dlif_proto_zone
, Z_CALLERACCT
, FALSE
);
1492 dlif_tcpstat_size
= sizeof (struct tcpstat_local
);
1493 /* Enforce 64-bit alignment for tcpstat_local structure */
1494 dlif_tcpstat_bufsize
=
1495 dlif_tcpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1496 dlif_tcpstat_bufsize
=
1497 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof (u_int64_t
));
1498 dlif_tcpstat_zone
= zinit(dlif_tcpstat_bufsize
,
1499 DLIF_TCPSTAT_ZONE_MAX
* dlif_tcpstat_bufsize
, 0,
1500 DLIF_TCPSTAT_ZONE_NAME
);
1501 if (dlif_tcpstat_zone
== NULL
) {
1502 panic_plain("%s: failed allocating %s", __func__
,
1503 DLIF_TCPSTAT_ZONE_NAME
);
1506 zone_change(dlif_tcpstat_zone
, Z_EXPAND
, TRUE
);
1507 zone_change(dlif_tcpstat_zone
, Z_CALLERACCT
, FALSE
);
1509 dlif_udpstat_size
= sizeof (struct udpstat_local
);
1510 /* Enforce 64-bit alignment for udpstat_local structure */
1511 dlif_udpstat_bufsize
=
1512 dlif_udpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1513 dlif_udpstat_bufsize
=
1514 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof (u_int64_t
));
1515 dlif_udpstat_zone
= zinit(dlif_udpstat_bufsize
,
1516 DLIF_TCPSTAT_ZONE_MAX
* dlif_udpstat_bufsize
, 0,
1517 DLIF_UDPSTAT_ZONE_NAME
);
1518 if (dlif_udpstat_zone
== NULL
) {
1519 panic_plain("%s: failed allocating %s", __func__
,
1520 DLIF_UDPSTAT_ZONE_NAME
);
1523 zone_change(dlif_udpstat_zone
, Z_EXPAND
, TRUE
);
1524 zone_change(dlif_udpstat_zone
, Z_CALLERACCT
, FALSE
);
1526 ifnet_llreach_init();
1528 TAILQ_INIT(&dlil_ifnet_head
);
1529 TAILQ_INIT(&ifnet_head
);
1530 TAILQ_INIT(&ifnet_detaching_head
);
1531 TAILQ_INIT(&ifnet_ordered_head
);
1533 /* Setup the lock groups we will use */
1534 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1536 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1537 dlil_grp_attributes
);
1538 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1539 dlil_grp_attributes
);
1540 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1541 dlil_grp_attributes
);
1542 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1543 dlil_grp_attributes
);
1544 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1545 dlil_grp_attributes
);
1547 /* Setup the lock attributes we will use */
1548 dlil_lck_attributes
= lck_attr_alloc_init();
1550 ifnet_lock_attr
= lck_attr_alloc_init();
1552 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1553 dlil_lck_attributes
);
1554 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1556 /* Setup interface flow control related items */
1557 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1559 ifnet_fc_zone_size
= sizeof (struct ifnet_fc_entry
);
1560 ifnet_fc_zone
= zinit(ifnet_fc_zone_size
,
1561 IFNET_FC_ZONE_MAX
* ifnet_fc_zone_size
, 0, IFNET_FC_ZONE_NAME
);
1562 if (ifnet_fc_zone
== NULL
) {
1563 panic_plain("%s: failed allocating %s", __func__
,
1564 IFNET_FC_ZONE_NAME
);
1567 zone_change(ifnet_fc_zone
, Z_EXPAND
, TRUE
);
1568 zone_change(ifnet_fc_zone
, Z_CALLERACCT
, FALSE
);
1570 /* Initialize interface address subsystem */
1574 /* Initialize the packet filter */
1578 /* Initialize queue algorithms */
1581 /* Initialize packet schedulers */
1584 /* Initialize flow advisory subsystem */
1587 /* Initialize the pktap virtual interface */
1590 /* Initialize the service class to dscp map */
1594 /* Run self-tests */
1595 dlil_verify_sum16();
1599 * Create and start up the main DLIL input thread and the interface
1600 * detacher threads once everything is initialized.
1602 dlil_create_input_thread(NULL
, dlil_main_input_thread
);
1604 if (kernel_thread_start(ifnet_detacher_thread_func
,
1605 NULL
, &thread
) != KERN_SUCCESS
) {
1606 panic_plain("%s: couldn't create detacher thread", __func__
);
1609 thread_deallocate(thread
);
1613 if_flt_monitor_busy(struct ifnet
*ifp
)
1615 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1618 VERIFY(ifp
->if_flt_busy
!= 0);
1622 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1624 if_flt_monitor_leave(ifp
);
1628 if_flt_monitor_enter(struct ifnet
*ifp
)
1630 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1632 while (ifp
->if_flt_busy
) {
1633 ++ifp
->if_flt_waiters
;
1634 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1635 (PZERO
- 1), "if_flt_monitor", NULL
);
1637 if_flt_monitor_busy(ifp
);
1641 if_flt_monitor_leave(struct ifnet
*ifp
)
1643 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1645 VERIFY(ifp
->if_flt_busy
!= 0);
1648 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1649 ifp
->if_flt_waiters
= 0;
1650 wakeup(&ifp
->if_flt_head
);
1654 __private_extern__
int
1655 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1656 interface_filter_t
*filter_ref
, u_int32_t flags
)
1659 struct ifnet_filter
*filter
= NULL
;
1661 ifnet_head_lock_shared();
1662 /* Check that the interface is in the global list */
1663 if (!ifnet_lookup(ifp
)) {
1668 filter
= zalloc(dlif_filt_zone
);
1669 if (filter
== NULL
) {
1673 bzero(filter
, dlif_filt_size
);
1675 /* refcnt held above during lookup */
1676 filter
->filt_flags
= flags
;
1677 filter
->filt_ifp
= ifp
;
1678 filter
->filt_cookie
= if_filter
->iff_cookie
;
1679 filter
->filt_name
= if_filter
->iff_name
;
1680 filter
->filt_protocol
= if_filter
->iff_protocol
;
1681 filter
->filt_input
= if_filter
->iff_input
;
1682 filter
->filt_output
= if_filter
->iff_output
;
1683 filter
->filt_event
= if_filter
->iff_event
;
1684 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1685 filter
->filt_detached
= if_filter
->iff_detached
;
1687 lck_mtx_lock(&ifp
->if_flt_lock
);
1688 if_flt_monitor_enter(ifp
);
1690 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1691 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1693 if_flt_monitor_leave(ifp
);
1694 lck_mtx_unlock(&ifp
->if_flt_lock
);
1696 *filter_ref
= filter
;
1699 * Bump filter count and route_generation ID to let TCP
1700 * know it shouldn't do TSO on this connection
1702 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1703 OSAddAtomic(1, &dlil_filter_disable_tso_count
);
1704 routegenid_update();
1707 printf("%s: %s filter attached\n", if_name(ifp
),
1708 if_filter
->iff_name
);
1712 if (retval
!= 0 && ifp
!= NULL
) {
1713 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1714 if_name(ifp
), if_filter
->iff_name
, retval
);
1716 if (retval
!= 0 && filter
!= NULL
)
1717 zfree(dlif_filt_zone
, filter
);
1723 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1727 if (detached
== 0) {
1730 ifnet_head_lock_shared();
1731 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1732 interface_filter_t entry
= NULL
;
1734 lck_mtx_lock(&ifp
->if_flt_lock
);
1735 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1736 if (entry
!= filter
|| entry
->filt_skip
)
1739 * We've found a match; since it's possible
1740 * that the thread gets blocked in the monitor,
1741 * we do the lock dance. Interface should
1742 * not be detached since we still have a use
1743 * count held during filter attach.
1745 entry
->filt_skip
= 1; /* skip input/output */
1746 lck_mtx_unlock(&ifp
->if_flt_lock
);
1749 lck_mtx_lock(&ifp
->if_flt_lock
);
1750 if_flt_monitor_enter(ifp
);
1751 lck_mtx_assert(&ifp
->if_flt_lock
,
1752 LCK_MTX_ASSERT_OWNED
);
1754 /* Remove the filter from the list */
1755 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
1758 if_flt_monitor_leave(ifp
);
1759 lck_mtx_unlock(&ifp
->if_flt_lock
);
1761 printf("%s: %s filter detached\n",
1762 if_name(ifp
), filter
->filt_name
);
1766 lck_mtx_unlock(&ifp
->if_flt_lock
);
1770 /* filter parameter is not a valid filter ref */
1776 printf("%s filter detached\n", filter
->filt_name
);
1780 /* Call the detached function if there is one */
1781 if (filter
->filt_detached
)
1782 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
1785 * Decrease filter count and route_generation ID to let TCP
1786 * know it should reevalute doing TSO or not
1788 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1789 OSAddAtomic(-1, &dlil_filter_disable_tso_count
);
1790 routegenid_update();
1793 /* Free the filter */
1794 zfree(dlif_filt_zone
, filter
);
1797 if (retval
!= 0 && filter
!= NULL
) {
1798 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1799 filter
->filt_name
, retval
);
1805 __private_extern__
void
1806 dlil_detach_filter(interface_filter_t filter
)
1810 dlil_detach_filter_internal(filter
, 0);
1814 * Main input thread:
1816 * a) handles all inbound packets for lo0
1817 * b) handles all inbound packets for interfaces with no dedicated
1818 * input thread (e.g. anything but Ethernet/PDP or those that support
1819 * opportunistic polling.)
1820 * c) protocol registrations
1821 * d) packet injections
1823 __attribute__((noreturn
))
1825 dlil_main_input_thread_func(void *v
, wait_result_t w
)
1828 struct dlil_main_threading_info
*inpm
= v
;
1829 struct dlil_threading_info
*inp
= v
;
1831 VERIFY(inp
== dlil_main_input_thread
);
1832 VERIFY(inp
->ifp
== NULL
);
1833 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
1836 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
1837 u_int32_t m_cnt
, m_cnt_loop
;
1838 boolean_t proto_req
;
1840 lck_mtx_lock_spin(&inp
->input_lck
);
1842 /* Wait until there is work to be done */
1843 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1844 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1845 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1846 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1849 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1850 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1852 /* Main input thread cannot be terminated */
1853 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_TERMINATE
));
1855 proto_req
= (inp
->input_waiting
&
1856 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
1858 /* Packets for non-dedicated interfaces other than lo0 */
1859 m_cnt
= qlen(&inp
->rcvq_pkts
);
1860 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
1862 /* Packets exclusive to lo0 */
1863 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
1864 m_loop
= _getq_all(&inpm
->lo_rcvq_pkts
, NULL
, NULL
, NULL
);
1868 lck_mtx_unlock(&inp
->input_lck
);
1871 * NOTE warning %%% attention !!!!
1872 * We should think about putting some thread starvation
1873 * safeguards if we deal with long chains of packets.
1876 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
1877 m_cnt_loop
, inp
->mode
);
1880 dlil_input_packet_list_extended(NULL
, m
,
1888 VERIFY(0); /* we should never get here */
1892 * Input thread for interfaces with legacy input model.
1895 dlil_input_thread_func(void *v
, wait_result_t w
)
1898 char thread_name
[MAXTHREADNAMESIZE
];
1899 struct dlil_threading_info
*inp
= v
;
1900 struct ifnet
*ifp
= inp
->ifp
;
1902 /* Construct the name for this thread, and then apply it. */
1903 bzero(thread_name
, sizeof(thread_name
));
1904 snprintf(thread_name
, sizeof(thread_name
), "dlil_input_%s", ifp
->if_xname
);
1905 thread_set_thread_name(inp
->input_thr
, thread_name
);
1907 VERIFY(inp
!= dlil_main_input_thread
);
1908 VERIFY(ifp
!= NULL
);
1909 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
);
1910 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
1913 struct mbuf
*m
= NULL
;
1916 lck_mtx_lock_spin(&inp
->input_lck
);
1918 /* Wait until there is work to be done */
1919 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1920 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1921 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1922 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1925 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1926 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1929 * Protocol registration and injection must always use
1930 * the main input thread; in theory the latter can utilize
1931 * the corresponding input thread where the packet arrived
1932 * on, but that requires our knowing the interface in advance
1933 * (and the benefits might not worth the trouble.)
1935 VERIFY(!(inp
->input_waiting
&
1936 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
1938 /* Packets for this interface */
1939 m_cnt
= qlen(&inp
->rcvq_pkts
);
1940 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
1942 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
1943 lck_mtx_unlock(&inp
->input_lck
);
1945 /* Free up pending packets */
1949 dlil_terminate_input_thread(inp
);
1956 dlil_input_stats_sync(ifp
, inp
);
1958 lck_mtx_unlock(&inp
->input_lck
);
1961 * NOTE warning %%% attention !!!!
1962 * We should think about putting some thread starvation
1963 * safeguards if we deal with long chains of packets.
1966 dlil_input_packet_list_extended(NULL
, m
,
1971 VERIFY(0); /* we should never get here */
1975 * Input thread for interfaces with opportunistic polling input model.
1978 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
1981 struct dlil_threading_info
*inp
= v
;
1982 struct ifnet
*ifp
= inp
->ifp
;
1985 VERIFY(inp
!= dlil_main_input_thread
);
1986 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
));
1989 struct mbuf
*m
= NULL
;
1990 u_int32_t m_cnt
, m_size
, poll_req
= 0;
1992 struct timespec now
, delta
;
1995 lck_mtx_lock_spin(&inp
->input_lck
);
1997 if ((ival
= inp
->rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
)
1998 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2000 /* Link parameters changed? */
2001 if (ifp
->if_poll_update
!= 0) {
2002 ifp
->if_poll_update
= 0;
2003 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
2006 /* Current operating mode */
2009 /* Wait until there is work to be done */
2010 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2011 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2012 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2013 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2016 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2017 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2020 * Protocol registration and injection must always use
2021 * the main input thread; in theory the latter can utilize
2022 * the corresponding input thread where the packet arrived
2023 * on, but that requires our knowing the interface in advance
2024 * (and the benefits might not worth the trouble.)
2026 VERIFY(!(inp
->input_waiting
&
2027 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
2029 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2030 /* Free up pending packets */
2031 _flushq(&inp
->rcvq_pkts
);
2032 lck_mtx_unlock(&inp
->input_lck
);
2034 dlil_terminate_input_thread(inp
);
2039 /* Total count of all packets */
2040 m_cnt
= qlen(&inp
->rcvq_pkts
);
2042 /* Total bytes of all packets */
2043 m_size
= qsize(&inp
->rcvq_pkts
);
2045 /* Packets for this interface */
2046 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2047 VERIFY(m
!= NULL
|| m_cnt
== 0);
2050 if (!net_timerisset(&inp
->sample_lasttime
))
2051 *(&inp
->sample_lasttime
) = *(&now
);
2053 net_timersub(&now
, &inp
->sample_lasttime
, &delta
);
2054 if (if_rxpoll
&& net_timerisset(&inp
->sample_holdtime
)) {
2055 u_int32_t ptot
, btot
;
2057 /* Accumulate statistics for current sampling */
2058 PKTCNTR_ADD(&inp
->sstats
, m_cnt
, m_size
);
2060 if (net_timercmp(&delta
, &inp
->sample_holdtime
, <))
2063 *(&inp
->sample_lasttime
) = *(&now
);
2065 /* Calculate min/max of inbound bytes */
2066 btot
= (u_int32_t
)inp
->sstats
.bytes
;
2067 if (inp
->rxpoll_bmin
== 0 || inp
->rxpoll_bmin
> btot
)
2068 inp
->rxpoll_bmin
= btot
;
2069 if (btot
> inp
->rxpoll_bmax
)
2070 inp
->rxpoll_bmax
= btot
;
2072 /* Calculate EWMA of inbound bytes */
2073 DLIL_EWMA(inp
->rxpoll_bavg
, btot
, if_rxpoll_decay
);
2075 /* Calculate min/max of inbound packets */
2076 ptot
= (u_int32_t
)inp
->sstats
.packets
;
2077 if (inp
->rxpoll_pmin
== 0 || inp
->rxpoll_pmin
> ptot
)
2078 inp
->rxpoll_pmin
= ptot
;
2079 if (ptot
> inp
->rxpoll_pmax
)
2080 inp
->rxpoll_pmax
= ptot
;
2082 /* Calculate EWMA of inbound packets */
2083 DLIL_EWMA(inp
->rxpoll_pavg
, ptot
, if_rxpoll_decay
);
2085 /* Reset sampling statistics */
2086 PKTCNTR_CLEAR(&inp
->sstats
);
2088 /* Calculate EWMA of wakeup requests */
2089 DLIL_EWMA(inp
->rxpoll_wavg
, inp
->wtot
, if_rxpoll_decay
);
2093 if (!net_timerisset(&inp
->dbg_lasttime
))
2094 *(&inp
->dbg_lasttime
) = *(&now
);
2095 net_timersub(&now
, &inp
->dbg_lasttime
, &delta
);
2096 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
2097 *(&inp
->dbg_lasttime
) = *(&now
);
2098 printf("%s: [%s] pkts avg %d max %d "
2099 "limits [%d/%d], wreq avg %d "
2100 "limits [%d/%d], bytes avg %d "
2101 "limits [%d/%d]\n", if_name(ifp
),
2103 IFNET_MODEL_INPUT_POLL_ON
) ?
2104 "ON" : "OFF", inp
->rxpoll_pavg
,
2113 inp
->rxpoll_bhiwat
);
2117 /* Perform mode transition, if necessary */
2118 if (!net_timerisset(&inp
->mode_lasttime
))
2119 *(&inp
->mode_lasttime
) = *(&now
);
2121 net_timersub(&now
, &inp
->mode_lasttime
, &delta
);
2122 if (net_timercmp(&delta
, &inp
->mode_holdtime
, <))
2125 if (inp
->rxpoll_pavg
<= inp
->rxpoll_plowat
&&
2126 inp
->rxpoll_bavg
<= inp
->rxpoll_blowat
&&
2127 inp
->mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2128 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2129 } else if (inp
->rxpoll_pavg
>= inp
->rxpoll_phiwat
&&
2130 (inp
->rxpoll_bavg
>= inp
->rxpoll_bhiwat
||
2131 inp
->rxpoll_wavg
>= inp
->rxpoll_whiwat
) &&
2132 inp
->mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2133 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2136 if (mode
!= inp
->mode
) {
2138 *(&inp
->mode_lasttime
) = *(&now
);
2143 dlil_input_stats_sync(ifp
, inp
);
2145 lck_mtx_unlock(&inp
->input_lck
);
2148 * If there's a mode change and interface is still attached,
2149 * perform a downcall to the driver for the new mode. Also
2150 * hold an IO refcnt on the interface to prevent it from
2151 * being detached (will be release below.)
2153 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2154 struct ifnet_model_params p
= { mode
, { 0 } };
2158 printf("%s: polling is now %s, "
2159 "pkts avg %d max %d limits [%d/%d], "
2160 "wreq avg %d limits [%d/%d], "
2161 "bytes avg %d limits [%d/%d]\n",
2163 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2164 "ON" : "OFF", inp
->rxpoll_pavg
,
2165 inp
->rxpoll_pmax
, inp
->rxpoll_plowat
,
2166 inp
->rxpoll_phiwat
, inp
->rxpoll_wavg
,
2167 inp
->rxpoll_wlowat
, inp
->rxpoll_whiwat
,
2168 inp
->rxpoll_bavg
, inp
->rxpoll_blowat
,
2169 inp
->rxpoll_bhiwat
);
2172 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2173 IFNET_CTL_SET_INPUT_MODEL
, sizeof (p
), &p
))) != 0) {
2174 printf("%s: error setting polling mode "
2175 "to %s (%d)\n", if_name(ifp
),
2176 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2181 case IFNET_MODEL_INPUT_POLL_OFF
:
2182 ifnet_set_poll_cycle(ifp
, NULL
);
2183 inp
->rxpoll_offreq
++;
2185 inp
->rxpoll_offerr
++;
2188 case IFNET_MODEL_INPUT_POLL_ON
:
2189 net_nsectimer(&ival
, &ts
);
2190 ifnet_set_poll_cycle(ifp
, &ts
);
2192 inp
->rxpoll_onreq
++;
2194 inp
->rxpoll_onerr
++;
2202 /* Release the IO refcnt */
2203 ifnet_decr_iorefcnt(ifp
);
2207 * NOTE warning %%% attention !!!!
2208 * We should think about putting some thread starvation
2209 * safeguards if we deal with long chains of packets.
2212 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2216 VERIFY(0); /* we should never get here */
2220 * Must be called on an attached ifnet (caller is expected to check.)
2221 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2224 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2227 struct dlil_threading_info
*inp
;
2228 u_int64_t sample_holdtime
, inbw
;
2230 VERIFY(ifp
!= NULL
);
2231 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2235 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2236 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0))
2238 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2239 p
->packets_lowat
>= p
->packets_hiwat
)
2241 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2242 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0))
2244 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2245 p
->bytes_lowat
>= p
->bytes_hiwat
)
2247 if (p
->interval_time
!= 0 &&
2248 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
)
2249 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2253 lck_mtx_lock(&inp
->input_lck
);
2255 lck_mtx_assert(&inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
2258 * Normally, we'd reset the parameters to the auto-tuned values
2259 * if the the input thread detects a change in link rate. If the
2260 * driver provides its own parameters right after a link rate
2261 * changes, but before the input thread gets to run, we want to
2262 * make sure to keep the driver's values. Clearing if_poll_update
2263 * will achieve that.
2265 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0)
2266 ifp
->if_poll_update
= 0;
2268 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2269 sample_holdtime
= 0; /* polling is disabled */
2270 inp
->rxpoll_wlowat
= inp
->rxpoll_plowat
=
2271 inp
->rxpoll_blowat
= 0;
2272 inp
->rxpoll_whiwat
= inp
->rxpoll_phiwat
=
2273 inp
->rxpoll_bhiwat
= (u_int32_t
)-1;
2274 inp
->rxpoll_plim
= 0;
2275 inp
->rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2277 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2281 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2282 if (inbw
< rxpoll_tbl
[i
].speed
)
2286 /* auto-tune if caller didn't specify a value */
2287 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2288 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2289 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2290 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2291 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2292 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2293 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2294 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2295 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2296 if_rxpoll_max
: p
->packets_limit
);
2297 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2298 if_rxpoll_interval_time
: p
->interval_time
);
2300 VERIFY(plowat
!= 0 && phiwat
!= 0);
2301 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2302 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2304 sample_holdtime
= if_rxpoll_sample_holdtime
;
2305 inp
->rxpoll_wlowat
= if_rxpoll_wlowat
;
2306 inp
->rxpoll_whiwat
= if_rxpoll_whiwat
;
2307 inp
->rxpoll_plowat
= plowat
;
2308 inp
->rxpoll_phiwat
= phiwat
;
2309 inp
->rxpoll_blowat
= blowat
;
2310 inp
->rxpoll_bhiwat
= bhiwat
;
2311 inp
->rxpoll_plim
= plim
;
2312 inp
->rxpoll_ival
= ival
;
2315 net_nsectimer(&if_rxpoll_mode_holdtime
, &inp
->mode_holdtime
);
2316 net_nsectimer(&sample_holdtime
, &inp
->sample_holdtime
);
2319 printf("%s: speed %llu bps, sample per %llu nsec, "
2320 "poll interval %llu nsec, pkts per poll %u, "
2321 "pkt limits [%u/%u], wreq limits [%u/%u], "
2322 "bytes limits [%u/%u]\n", if_name(ifp
),
2323 inbw
, sample_holdtime
, inp
->rxpoll_ival
, inp
->rxpoll_plim
,
2324 inp
->rxpoll_plowat
, inp
->rxpoll_phiwat
, inp
->rxpoll_wlowat
,
2325 inp
->rxpoll_whiwat
, inp
->rxpoll_blowat
, inp
->rxpoll_bhiwat
);
2329 lck_mtx_unlock(&inp
->input_lck
);
2335 * Must be called on an attached ifnet (caller is expected to check.)
2338 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2340 struct dlil_threading_info
*inp
;
2342 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2343 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2346 bzero(p
, sizeof (*p
));
2348 lck_mtx_lock(&inp
->input_lck
);
2349 p
->packets_limit
= inp
->rxpoll_plim
;
2350 p
->packets_lowat
= inp
->rxpoll_plowat
;
2351 p
->packets_hiwat
= inp
->rxpoll_phiwat
;
2352 p
->bytes_lowat
= inp
->rxpoll_blowat
;
2353 p
->bytes_hiwat
= inp
->rxpoll_bhiwat
;
2354 p
->interval_time
= inp
->rxpoll_ival
;
2355 lck_mtx_unlock(&inp
->input_lck
);
2361 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2362 const struct ifnet_stat_increment_param
*s
)
2364 return (ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
));
2368 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2369 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2371 return (ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
));
2375 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2376 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2378 ifnet_input_handler_func handler_func
;
2379 struct ifnet_stat_increment_param _s
;
2380 u_int32_t m_cnt
= 0, m_size
= 0;
2384 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2386 mbuf_freem_list(m_head
);
2390 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2391 VERIFY(m_tail
== NULL
|| ext
);
2392 VERIFY(s
!= NULL
|| !ext
);
2395 * Drop the packet(s) if the parameters are invalid, or if the
2396 * interface is no longer attached; else hold an IO refcnt to
2397 * prevent it from being detached (will be released below.)
2399 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_is_attached(ifp
, 1))) {
2401 mbuf_freem_list(m_head
);
2405 handler_func
= ifp
->if_input_handler
;
2406 VERIFY(handler_func
!= NULL
);
2408 if (m_tail
== NULL
) {
2410 while (m_head
!= NULL
) {
2411 #if IFNET_INPUT_SANITY_CHK
2412 if (dlil_input_sanity_check
!= 0)
2413 DLIL_INPUT_CHECK(last
, ifp
);
2414 #endif /* IFNET_INPUT_SANITY_CHK */
2416 m_size
+= m_length(last
);
2417 if (mbuf_nextpkt(last
) == NULL
)
2419 last
= mbuf_nextpkt(last
);
2423 #if IFNET_INPUT_SANITY_CHK
2424 if (dlil_input_sanity_check
!= 0) {
2427 DLIL_INPUT_CHECK(last
, ifp
);
2429 m_size
+= m_length(last
);
2430 if (mbuf_nextpkt(last
) == NULL
)
2432 last
= mbuf_nextpkt(last
);
2435 m_cnt
= s
->packets_in
;
2436 m_size
= s
->bytes_in
;
2440 m_cnt
= s
->packets_in
;
2441 m_size
= s
->bytes_in
;
2443 #endif /* IFNET_INPUT_SANITY_CHK */
2446 if (last
!= m_tail
) {
2447 panic_plain("%s: invalid input packet chain for %s, "
2448 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2453 * Assert packet count only for the extended variant, for backwards
2454 * compatibility, since this came directly from the device driver.
2455 * Relax this assertion for input bytes, as the driver may have
2456 * included the link-layer headers in the computation; hence
2457 * m_size is just an approximation.
2459 if (ext
&& s
->packets_in
!= m_cnt
) {
2460 panic_plain("%s: input packet count mismatch for %s, "
2461 "%d instead of %d\n", __func__
, if_name(ifp
),
2462 s
->packets_in
, m_cnt
);
2466 bzero(&_s
, sizeof (_s
));
2471 _s
.packets_in
= m_cnt
;
2472 _s
.bytes_in
= m_size
;
2474 err
= (*handler_func
)(ifp
, m_head
, m_tail
, s
, poll
, current_thread());
2476 if (ifp
!= lo_ifp
) {
2477 /* Release the IO refcnt */
2478 ifnet_decr_iorefcnt(ifp
);
2485 ifnet_set_input_handler(struct ifnet
*ifp
, ifnet_input_handler_func fn
)
2487 return (atomic_test_set_ptr(&ifp
->if_input_handler
,
2488 dlil_input_handler
, fn
) ? 0 : EBUSY
);
2492 ifnet_reset_input_handler(struct ifnet
*ifp
)
2494 atomic_set_ptr(&ifp
->if_input_handler
, dlil_input_handler
);
2498 ifnet_set_output_handler(struct ifnet
*ifp
, ifnet_output_handler_func fn
)
2500 return (atomic_test_set_ptr(&ifp
->if_output_handler
,
2501 dlil_output_handler
, fn
) ? 0 : EBUSY
);
2505 ifnet_reset_output_handler(struct ifnet
*ifp
)
2507 atomic_set_ptr(&ifp
->if_output_handler
, dlil_output_handler
);
2511 dlil_output_handler(struct ifnet
*ifp
, struct mbuf
*m
)
2513 return (ifp
->if_output(ifp
, m
));
2517 dlil_input_handler(struct ifnet
*ifp
, struct mbuf
*m_head
,
2518 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
2519 boolean_t poll
, struct thread
*tp
)
2521 struct dlil_threading_info
*inp
;
2522 u_int32_t m_cnt
= s
->packets_in
;
2523 u_int32_t m_size
= s
->bytes_in
;
2525 if ((inp
= ifp
->if_inp
) == NULL
)
2526 inp
= dlil_main_input_thread
;
2529 * If there is a matching DLIL input thread associated with an
2530 * affinity set, associate this thread with the same set. We
2531 * will only do this once.
2533 lck_mtx_lock_spin(&inp
->input_lck
);
2534 if (inp
!= dlil_main_input_thread
&& inp
->net_affinity
&& tp
!= NULL
&&
2535 ((!poll
&& inp
->wloop_thr
== THREAD_NULL
) ||
2536 (poll
&& inp
->poll_thr
== THREAD_NULL
))) {
2537 u_int32_t tag
= inp
->tag
;
2540 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2543 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2544 inp
->wloop_thr
= tp
;
2546 lck_mtx_unlock(&inp
->input_lck
);
2548 /* Associate the current thread with the new affinity tag */
2549 (void) dlil_affinity_set(tp
, tag
);
2552 * Take a reference on the current thread; during detach,
2553 * we will need to refer to it in order ot tear down its
2556 thread_reference(tp
);
2557 lck_mtx_lock_spin(&inp
->input_lck
);
2560 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
2563 * Because of loopbacked multicast we cannot stuff the ifp in
2564 * the rcvif of the packet header: loopback (lo0) packets use a
2565 * dedicated list so that we can later associate them with lo_ifp
2566 * on their way up the stack. Packets for other interfaces without
2567 * dedicated input threads go to the regular list.
2569 if (m_head
!= NULL
) {
2570 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
2571 struct dlil_main_threading_info
*inpm
=
2572 (struct dlil_main_threading_info
*)inp
;
2573 _addq_multi(&inpm
->lo_rcvq_pkts
, m_head
, m_tail
,
2576 _addq_multi(&inp
->rcvq_pkts
, m_head
, m_tail
,
2581 #if IFNET_INPUT_SANITY_CHK
2582 if (dlil_input_sanity_check
!= 0) {
2586 for (m0
= m_head
, count
= 0; m0
; m0
= mbuf_nextpkt(m0
))
2589 if (count
!= m_cnt
) {
2590 panic_plain("%s: invalid packet count %d "
2591 "(expected %d)\n", if_name(ifp
),
2596 inp
->input_mbuf_cnt
+= m_cnt
;
2598 #endif /* IFNET_INPUT_SANITY_CHK */
2600 dlil_input_stats_add(s
, inp
, poll
);
2602 * If we're using the main input thread, synchronize the
2603 * stats now since we have the interface context. All
2604 * other cases involving dedicated input threads will
2605 * have their stats synchronized there.
2607 if (inp
== dlil_main_input_thread
)
2608 dlil_input_stats_sync(ifp
, inp
);
2610 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
2611 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
2613 wakeup_one((caddr_t
)&inp
->input_waiting
);
2615 lck_mtx_unlock(&inp
->input_lck
);
2621 ifnet_start_common(struct ifnet
*ifp
, int resetfc
)
2623 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2626 * If the starter thread is inactive, signal it to do work,
2627 * unless the interface is being flow controlled from below,
2628 * e.g. a virtual interface being flow controlled by a real
2629 * network interface beneath it.
2631 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2633 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
2634 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
2635 lck_mtx_unlock(&ifp
->if_start_lock
);
2638 ifp
->if_start_req
++;
2639 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
&&
2640 (resetfc
|| !(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) ||
2641 IFCQ_LEN(&ifp
->if_snd
) >= ifp
->if_start_delay_qlen
||
2642 ifp
->if_start_delayed
== 0)) {
2643 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
2645 lck_mtx_unlock(&ifp
->if_start_lock
);
2649 ifnet_start(struct ifnet
*ifp
)
2651 ifnet_start_common(ifp
, 0);
2655 ifnet_start_thread_fn(void *v
, wait_result_t w
)
2658 struct ifnet
*ifp
= v
;
2659 char ifname
[IFNAMSIZ
+ 1];
2660 char thread_name
[MAXTHREADNAMESIZE
];
2661 struct timespec
*ts
= NULL
;
2662 struct ifclassq
*ifq
= &ifp
->if_snd
;
2663 struct timespec delay_start_ts
;
2665 /* Construct the name for this thread, and then apply it. */
2666 bzero(thread_name
, sizeof(thread_name
));
2667 snprintf(thread_name
, sizeof(thread_name
), "ifnet_start_%s", ifp
->if_xname
);
2668 thread_set_thread_name(ifp
->if_start_thread
, thread_name
);
2671 * Treat the dedicated starter thread for lo0 as equivalent to
2672 * the driver workloop thread; if net_affinity is enabled for
2673 * the main input thread, associate this starter thread to it
2674 * by binding them with the same affinity tag. This is done
2675 * only once (as we only have one lo_ifp which never goes away.)
2677 if (ifp
== lo_ifp
) {
2678 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
2679 struct thread
*tp
= current_thread();
2681 lck_mtx_lock(&inp
->input_lck
);
2682 if (inp
->net_affinity
) {
2683 u_int32_t tag
= inp
->tag
;
2685 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2686 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2687 inp
->wloop_thr
= tp
;
2688 lck_mtx_unlock(&inp
->input_lck
);
2690 /* Associate this thread with the affinity tag */
2691 (void) dlil_affinity_set(tp
, tag
);
2693 lck_mtx_unlock(&inp
->input_lck
);
2697 snprintf(ifname
, sizeof (ifname
), "%s_starter",
2700 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2703 if (ifp
->if_start_thread
!= NULL
)
2704 (void) msleep(&ifp
->if_start_thread
,
2705 &ifp
->if_start_lock
,
2706 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2708 /* interface is detached? */
2709 if (ifp
->if_start_thread
== THREAD_NULL
) {
2710 ifnet_set_start_cycle(ifp
, NULL
);
2711 lck_mtx_unlock(&ifp
->if_start_lock
);
2715 printf("%s: starter thread terminated\n",
2719 /* for the extra refcnt from kernel_thread_start() */
2720 thread_deallocate(current_thread());
2721 /* this is the end */
2722 thread_terminate(current_thread());
2727 ifp
->if_start_active
= 1;
2730 u_int32_t req
= ifp
->if_start_req
;
2731 if (!IFCQ_IS_EMPTY(ifq
) &&
2732 (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
2733 ifp
->if_start_delayed
== 0 &&
2734 IFCQ_LEN(ifq
) < ifp
->if_start_delay_qlen
&&
2735 (ifp
->if_eflags
& IFEF_DELAY_START
)) {
2736 ifp
->if_start_delayed
= 1;
2737 ifnet_start_delayed
++;
2740 ifp
->if_start_delayed
= 0;
2742 lck_mtx_unlock(&ifp
->if_start_lock
);
2745 * If no longer attached, don't call start because ifp
2746 * is being destroyed; else hold an IO refcnt to
2747 * prevent the interface from being detached (will be
2750 if (!ifnet_is_attached(ifp
, 1)) {
2751 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2755 /* invoke the driver's start routine */
2756 ((*ifp
->if_start
)(ifp
));
2759 * Release the io ref count taken by ifnet_is_attached.
2761 ifnet_decr_iorefcnt(ifp
);
2763 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2765 /* if there's no pending request, we're done */
2766 if (req
== ifp
->if_start_req
)
2770 ifp
->if_start_req
= 0;
2771 ifp
->if_start_active
= 0;
2774 * Wakeup N ns from now if rate-controlled by TBR, and if
2775 * there are still packets in the send queue which haven't
2776 * been dequeued so far; else sleep indefinitely (ts = NULL)
2777 * until ifnet_start() is called again.
2779 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
2780 &ifp
->if_start_cycle
: NULL
);
2782 if (ts
== NULL
&& ifp
->if_start_delayed
== 1) {
2783 delay_start_ts
.tv_sec
= 0;
2784 delay_start_ts
.tv_nsec
= ifp
->if_start_delay_timeout
;
2785 ts
= &delay_start_ts
;
2788 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
2796 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
2799 bzero(&ifp
->if_start_cycle
, sizeof (ifp
->if_start_cycle
));
2801 *(&ifp
->if_start_cycle
) = *ts
;
2803 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
2804 printf("%s: restart interval set to %lu nsec\n",
2805 if_name(ifp
), ts
->tv_nsec
);
2809 ifnet_poll(struct ifnet
*ifp
)
2812 * If the poller thread is inactive, signal it to do work.
2814 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2816 if (!ifp
->if_poll_active
&& ifp
->if_poll_thread
!= THREAD_NULL
) {
2817 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
2819 lck_mtx_unlock(&ifp
->if_poll_lock
);
2823 ifnet_poll_thread_fn(void *v
, wait_result_t w
)
2826 struct dlil_threading_info
*inp
;
2827 struct ifnet
*ifp
= v
;
2828 char ifname
[IFNAMSIZ
+ 1];
2829 struct timespec
*ts
= NULL
;
2830 struct ifnet_stat_increment_param s
;
2832 snprintf(ifname
, sizeof (ifname
), "%s_poller",
2834 bzero(&s
, sizeof (s
));
2836 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2839 VERIFY(inp
!= NULL
);
2842 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
2843 (void) msleep(&ifp
->if_poll_thread
, &ifp
->if_poll_lock
,
2844 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2847 /* interface is detached (maybe while asleep)? */
2848 if (ifp
->if_poll_thread
== THREAD_NULL
) {
2849 ifnet_set_poll_cycle(ifp
, NULL
);
2850 lck_mtx_unlock(&ifp
->if_poll_lock
);
2853 printf("%s: poller thread terminated\n",
2857 /* for the extra refcnt from kernel_thread_start() */
2858 thread_deallocate(current_thread());
2859 /* this is the end */
2860 thread_terminate(current_thread());
2865 ifp
->if_poll_active
= 1;
2867 struct mbuf
*m_head
, *m_tail
;
2868 u_int32_t m_lim
, m_cnt
, m_totlen
;
2869 u_int16_t req
= ifp
->if_poll_req
;
2871 lck_mtx_unlock(&ifp
->if_poll_lock
);
2874 * If no longer attached, there's nothing to do;
2875 * else hold an IO refcnt to prevent the interface
2876 * from being detached (will be released below.)
2878 if (!ifnet_is_attached(ifp
, 1)) {
2879 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2883 m_lim
= (inp
->rxpoll_plim
!= 0) ? inp
->rxpoll_plim
:
2884 MAX((qlimit(&inp
->rcvq_pkts
)),
2885 (inp
->rxpoll_phiwat
<< 2));
2887 if (dlil_verbose
> 1) {
2888 printf("%s: polling up to %d pkts, "
2889 "pkts avg %d max %d, wreq avg %d, "
2891 if_name(ifp
), m_lim
,
2892 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
2893 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
2896 /* invoke the driver's input poll routine */
2897 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
2898 &m_cnt
, &m_totlen
));
2900 if (m_head
!= NULL
) {
2901 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
2903 if (dlil_verbose
> 1) {
2904 printf("%s: polled %d pkts, "
2905 "pkts avg %d max %d, wreq avg %d, "
2907 if_name(ifp
), m_cnt
,
2908 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
2909 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
2912 /* stats are required for extended variant */
2913 s
.packets_in
= m_cnt
;
2914 s
.bytes_in
= m_totlen
;
2916 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
2919 if (dlil_verbose
> 1) {
2920 printf("%s: no packets, "
2921 "pkts avg %d max %d, wreq avg %d, "
2923 if_name(ifp
), inp
->rxpoll_pavg
,
2924 inp
->rxpoll_pmax
, inp
->rxpoll_wavg
,
2928 (void) ifnet_input_common(ifp
, NULL
, NULL
,
2932 /* Release the io ref count */
2933 ifnet_decr_iorefcnt(ifp
);
2935 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2937 /* if there's no pending request, we're done */
2938 if (req
== ifp
->if_poll_req
)
2941 ifp
->if_poll_req
= 0;
2942 ifp
->if_poll_active
= 0;
2945 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2946 * until ifnet_poll() is called again.
2948 ts
= &ifp
->if_poll_cycle
;
2949 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
2957 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
2960 bzero(&ifp
->if_poll_cycle
, sizeof (ifp
->if_poll_cycle
));
2962 *(&ifp
->if_poll_cycle
) = *ts
;
2964 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
2965 printf("%s: poll interval set to %lu nsec\n",
2966 if_name(ifp
), ts
->tv_nsec
);
2970 ifnet_purge(struct ifnet
*ifp
)
2972 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
))
2977 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
2979 IFCQ_LOCK_ASSERT_HELD(ifq
);
2981 if (!(IFCQ_IS_READY(ifq
)))
2984 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
2985 struct tb_profile tb
= { ifq
->ifcq_tbr
.tbr_rate_raw
,
2986 ifq
->ifcq_tbr
.tbr_percent
, 0 };
2987 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
2990 ifclassq_update(ifq
, ev
);
2994 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
2997 case CLASSQ_EV_LINK_BANDWIDTH
:
2998 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
))
2999 ifp
->if_poll_update
++;
3008 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
3010 struct ifclassq
*ifq
;
3014 if (ifp
== NULL
|| model
>= IFNET_SCHED_MODEL_MAX
)
3016 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3021 omodel
= ifp
->if_output_sched_model
;
3022 ifp
->if_output_sched_model
= model
;
3023 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0)
3024 ifp
->if_output_sched_model
= omodel
;
3031 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3035 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3038 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
3044 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3046 if (ifp
== NULL
|| maxqlen
== NULL
)
3048 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3051 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
3057 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
3061 if (ifp
== NULL
|| pkts
== NULL
)
3063 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3066 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
3073 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3074 u_int32_t
*pkts
, u_int32_t
*bytes
)
3078 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
3079 (pkts
== NULL
&& bytes
== NULL
))
3081 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3084 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
3090 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3092 struct dlil_threading_info
*inp
;
3096 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3100 maxqlen
= if_rcvq_maxlen
;
3101 else if (maxqlen
< IF_RCVQ_MINLEN
)
3102 maxqlen
= IF_RCVQ_MINLEN
;
3105 lck_mtx_lock(&inp
->input_lck
);
3106 qlimit(&inp
->rcvq_pkts
) = maxqlen
;
3107 lck_mtx_unlock(&inp
->input_lck
);
3113 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3115 struct dlil_threading_info
*inp
;
3117 if (ifp
== NULL
|| maxqlen
== NULL
)
3119 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3123 lck_mtx_lock(&inp
->input_lck
);
3124 *maxqlen
= qlimit(&inp
->rcvq_pkts
);
3125 lck_mtx_unlock(&inp
->input_lck
);
3130 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
3133 struct timespec now
;
3136 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
3137 m
->m_nextpkt
!= NULL
) {
3141 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3142 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
3143 /* flag tested without lock for performance */
3146 } else if (!(ifp
->if_flags
& IFF_UP
)) {
3152 net_timernsec(&now
, &now_nsec
);
3153 m
->m_pkthdr
.pkt_timestamp
= now_nsec
;
3154 m
->m_pkthdr
.pkt_flags
&= ~PKTF_DRV_TS_VALID
;
3156 if (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) {
3158 * If the driver chose to delay start callback for
3159 * coalescing multiple packets, Then use the following
3160 * heuristics to make sure that start callback will
3161 * be delayed only when bulk data transfer is detected.
3162 * 1. number of packets enqueued in (delay_win * 2) is
3163 * greater than or equal to the delay qlen.
3164 * 2. If delay_start is enabled it will stay enabled for
3165 * another 10 idle windows. This is to take into account
3166 * variable RTT and burst traffic.
3167 * 3. If the time elapsed since last enqueue is more
3168 * than 200ms we disable delaying start callback. This is
3169 * is to take idle time into account.
3171 u_int64_t dwin
= (ifp
->if_start_delay_timeout
<< 1);
3172 if (ifp
->if_start_delay_swin
> 0) {
3173 if ((ifp
->if_start_delay_swin
+ dwin
) > now_nsec
) {
3174 ifp
->if_start_delay_cnt
++;
3175 } else if ((now_nsec
- ifp
->if_start_delay_swin
)
3176 >= (200 * 1000 * 1000)) {
3177 ifp
->if_start_delay_swin
= now_nsec
;
3178 ifp
->if_start_delay_cnt
= 1;
3179 ifp
->if_start_delay_idle
= 0;
3180 if (ifp
->if_eflags
& IFEF_DELAY_START
) {
3182 ~(IFEF_DELAY_START
);
3183 ifnet_delay_start_disabled
++;
3186 if (ifp
->if_start_delay_cnt
>=
3187 ifp
->if_start_delay_qlen
) {
3188 ifp
->if_eflags
|= IFEF_DELAY_START
;
3189 ifp
->if_start_delay_idle
= 0;
3191 if (ifp
->if_start_delay_idle
>= 10) {
3192 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3193 ifnet_delay_start_disabled
++;
3195 ifp
->if_start_delay_idle
++;
3198 ifp
->if_start_delay_swin
= now_nsec
;
3199 ifp
->if_start_delay_cnt
= 1;
3202 ifp
->if_start_delay_swin
= now_nsec
;
3203 ifp
->if_start_delay_cnt
= 1;
3204 ifp
->if_start_delay_idle
= 0;
3205 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3208 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3211 /* enqueue the packet */
3212 error
= ifclassq_enqueue(&ifp
->if_snd
, m
);
3215 * Tell the driver to start dequeueing; do this even when the queue
3216 * for the packet is suspended (EQSUSPENDED), as the driver could still
3217 * be dequeueing from other unsuspended queues.
3219 if (!(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3220 (error
== 0 || error
== EQFULL
|| error
== EQSUSPENDED
))
3227 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
3230 if (ifp
== NULL
|| mp
== NULL
)
3232 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3233 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3235 if (!ifnet_is_attached(ifp
, 1))
3237 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
3238 mp
, NULL
, NULL
, NULL
);
3239 ifnet_decr_iorefcnt(ifp
);
3245 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3249 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
))
3251 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3252 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3254 if (!ifnet_is_attached(ifp
, 1))
3257 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1, mp
, NULL
, NULL
, NULL
);
3258 ifnet_decr_iorefcnt(ifp
);
3263 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t pkt_limit
,
3264 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3267 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1)
3269 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3270 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3272 if (!ifnet_is_attached(ifp
, 1))
3275 rc
= ifclassq_dequeue(&ifp
->if_snd
, pkt_limit
,
3276 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, head
, tail
, cnt
, len
);
3277 ifnet_decr_iorefcnt(ifp
);
3282 ifnet_dequeue_multi_bytes(struct ifnet
*ifp
, u_int32_t byte_limit
,
3283 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3286 if (ifp
== NULL
|| head
== NULL
|| byte_limit
< 1)
3288 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3289 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3291 if (!ifnet_is_attached(ifp
, 1))
3294 rc
= ifclassq_dequeue(&ifp
->if_snd
, CLASSQ_DEQUEUE_MAX_PKT_LIMIT
,
3295 byte_limit
, head
, tail
, cnt
, len
);
3296 ifnet_decr_iorefcnt(ifp
);
3301 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3302 u_int32_t pkt_limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
3306 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1 ||
3309 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3310 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3312 if (!ifnet_is_attached(ifp
, 1))
3314 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, pkt_limit
, head
,
3316 ifnet_decr_iorefcnt(ifp
);
3321 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
3322 const struct sockaddr
*dest
, const char *dest_linkaddr
,
3323 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
3330 return (ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
));
3334 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
3335 char **frame_header_p
, protocol_family_t protocol_family
)
3337 struct ifnet_filter
*filter
;
3340 * Pass the inbound packet to the interface filters
3342 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3343 /* prevent filter list from changing in case we drop the lock */
3344 if_flt_monitor_busy(ifp
);
3345 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3348 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
3349 (filter
->filt_protocol
== 0 ||
3350 filter
->filt_protocol
== protocol_family
)) {
3351 lck_mtx_unlock(&ifp
->if_flt_lock
);
3353 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
3354 ifp
, protocol_family
, m_p
, frame_header_p
);
3356 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3358 /* we're done with the filter list */
3359 if_flt_monitor_unbusy(ifp
);
3360 lck_mtx_unlock(&ifp
->if_flt_lock
);
3365 /* we're done with the filter list */
3366 if_flt_monitor_unbusy(ifp
);
3367 lck_mtx_unlock(&ifp
->if_flt_lock
);
3370 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3371 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3374 (*m_p
)->m_flags
&= ~M_PROTO1
;
3380 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
3381 protocol_family_t protocol_family
)
3383 struct ifnet_filter
*filter
;
3386 * Pass the outbound packet to the interface filters
3388 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3389 /* prevent filter list from changing in case we drop the lock */
3390 if_flt_monitor_busy(ifp
);
3391 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3394 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
3395 (filter
->filt_protocol
== 0 ||
3396 filter
->filt_protocol
== protocol_family
)) {
3397 lck_mtx_unlock(&ifp
->if_flt_lock
);
3399 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
3400 protocol_family
, m_p
);
3402 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3404 /* we're done with the filter list */
3405 if_flt_monitor_unbusy(ifp
);
3406 lck_mtx_unlock(&ifp
->if_flt_lock
);
3411 /* we're done with the filter list */
3412 if_flt_monitor_unbusy(ifp
);
3413 lck_mtx_unlock(&ifp
->if_flt_lock
);
3419 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
3423 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
3424 /* Version 1 protocols get one packet at a time */
3426 char * frame_header
;
3429 next_packet
= m
->m_nextpkt
;
3430 m
->m_nextpkt
= NULL
;
3431 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3432 m
->m_pkthdr
.pkt_hdr
= NULL
;
3433 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
3434 ifproto
->protocol_family
, m
, frame_header
);
3435 if (error
!= 0 && error
!= EJUSTRETURN
)
3439 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
3440 /* Version 2 protocols support packet lists */
3441 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
3442 ifproto
->protocol_family
, m
);
3443 if (error
!= 0 && error
!= EJUSTRETURN
)
3449 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
3450 struct dlil_threading_info
*inp
, boolean_t poll
)
3452 struct ifnet_stat_increment_param
*d
= &inp
->stats
;
3454 if (s
->packets_in
!= 0)
3455 d
->packets_in
+= s
->packets_in
;
3456 if (s
->bytes_in
!= 0)
3457 d
->bytes_in
+= s
->bytes_in
;
3458 if (s
->errors_in
!= 0)
3459 d
->errors_in
+= s
->errors_in
;
3461 if (s
->packets_out
!= 0)
3462 d
->packets_out
+= s
->packets_out
;
3463 if (s
->bytes_out
!= 0)
3464 d
->bytes_out
+= s
->bytes_out
;
3465 if (s
->errors_out
!= 0)
3466 d
->errors_out
+= s
->errors_out
;
3468 if (s
->collisions
!= 0)
3469 d
->collisions
+= s
->collisions
;
3470 if (s
->dropped
!= 0)
3471 d
->dropped
+= s
->dropped
;
3474 PKTCNTR_ADD(&inp
->tstats
, s
->packets_in
, s
->bytes_in
);
3478 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
3480 struct ifnet_stat_increment_param
*s
= &inp
->stats
;
3483 * Use of atomic operations is unavoidable here because
3484 * these stats may also be incremented elsewhere via KPIs.
3486 if (s
->packets_in
!= 0) {
3487 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
3490 if (s
->bytes_in
!= 0) {
3491 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
3494 if (s
->errors_in
!= 0) {
3495 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
3499 if (s
->packets_out
!= 0) {
3500 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
3503 if (s
->bytes_out
!= 0) {
3504 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
3507 if (s
->errors_out
!= 0) {
3508 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
3512 if (s
->collisions
!= 0) {
3513 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
3516 if (s
->dropped
!= 0) {
3517 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
3521 * If we went over the threshold, notify NetworkStatistics.
3523 if (ifp
->if_data_threshold
&&
3524 (ifp
->if_ibytes
+ ifp
->if_obytes
) - ifp
->if_dt_bytes
>
3525 ifp
->if_data_threshold
) {
3526 ifp
->if_dt_bytes
= ifp
->if_ibytes
+ ifp
->if_obytes
;
3528 lck_mtx_convert_spin(&inp
->input_lck
);
3529 nstat_ifnet_threshold_reached(ifp
->if_index
);
3532 * No need for atomic operations as they are modified here
3533 * only from within the DLIL input thread context.
3535 if (inp
->tstats
.packets
!= 0) {
3536 inp
->pstats
.ifi_poll_packets
+= inp
->tstats
.packets
;
3537 inp
->tstats
.packets
= 0;
3539 if (inp
->tstats
.bytes
!= 0) {
3540 inp
->pstats
.ifi_poll_bytes
+= inp
->tstats
.bytes
;
3541 inp
->tstats
.bytes
= 0;
3545 __private_extern__
void
3546 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
3548 return (dlil_input_packet_list_common(ifp
, m
, 0,
3549 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
));
3552 __private_extern__
void
3553 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
3554 u_int32_t cnt
, ifnet_model_t mode
)
3556 return (dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
));
3560 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
3561 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
3564 protocol_family_t protocol_family
;
3566 ifnet_t ifp
= ifp_param
;
3567 char * frame_header
;
3568 struct if_proto
* last_ifproto
= NULL
;
3569 mbuf_t pkt_first
= NULL
;
3570 mbuf_t
* pkt_next
= NULL
;
3571 u_int32_t poll_thresh
= 0, poll_ival
= 0;
3573 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
3575 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
3576 (poll_ival
= if_rxpoll_interval_pkts
) > 0)
3580 struct if_proto
*ifproto
= NULL
;
3582 uint32_t pktf_mask
; /* pkt flags to preserve */
3584 if (ifp_param
== NULL
)
3585 ifp
= m
->m_pkthdr
.rcvif
;
3587 if ((ifp
->if_eflags
& IFEF_RXPOLL
) && poll_thresh
!= 0 &&
3588 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0)
3591 /* Check if this mbuf looks valid */
3592 MBUF_INPUT_CHECK(m
, ifp
);
3594 next_packet
= m
->m_nextpkt
;
3595 m
->m_nextpkt
= NULL
;
3596 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3597 m
->m_pkthdr
.pkt_hdr
= NULL
;
3600 * Get an IO reference count if the interface is not
3601 * loopback (lo0) and it is attached; lo0 never goes
3602 * away, so optimize for that.
3604 if (ifp
!= lo_ifp
) {
3605 if (!ifnet_is_attached(ifp
, 1)) {
3613 * If this arrived on lo0, preserve interface addr
3614 * info to allow for connectivity between loopback
3615 * and local interface addresses.
3617 pktf_mask
= (PKTF_LOOP
|PKTF_IFAINFO
);
3620 /* make sure packet comes in clean */
3621 m_classifier_init(m
, pktf_mask
);
3623 ifp_inc_traffic_class_in(ifp
, m
);
3625 /* find which protocol family this packet is for */
3626 ifnet_lock_shared(ifp
);
3627 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
3629 ifnet_lock_done(ifp
);
3631 if (error
== EJUSTRETURN
)
3633 protocol_family
= 0;
3636 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
3637 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
))
3638 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
3642 * For partial checksum offload, we expect the driver to
3643 * set the start offset indicating the start of the span
3644 * that is covered by the hardware-computed checksum;
3645 * adjust this start offset accordingly because the data
3646 * pointer has been advanced beyond the link-layer header.
3648 * Don't adjust if the interface is a bridge member, as
3649 * the adjustment will occur from the context of the
3650 * bridge interface during input.
3652 if (ifp
->if_bridge
== NULL
&& (m
->m_pkthdr
.csum_flags
&
3653 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
3654 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
3657 if (frame_header
== NULL
||
3658 frame_header
< (char *)mbuf_datastart(m
) ||
3659 frame_header
> (char *)m
->m_data
||
3660 (adj
= (m
->m_data
- frame_header
)) >
3661 m
->m_pkthdr
.csum_rx_start
) {
3662 m
->m_pkthdr
.csum_data
= 0;
3663 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
3664 hwcksum_in_invalidated
++;
3666 m
->m_pkthdr
.csum_rx_start
-= adj
;
3670 pktap_input(ifp
, protocol_family
, m
, frame_header
);
3672 if (m
->m_flags
& (M_BCAST
|M_MCAST
))
3673 atomic_add_64(&ifp
->if_imcasts
, 1);
3675 /* run interface filters, exclude VLAN packets PR-3586856 */
3676 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
3677 error
= dlil_interface_filters_input(ifp
, &m
,
3678 &frame_header
, protocol_family
);
3680 if (error
!= EJUSTRETURN
)
3685 if (error
!= 0 || ((m
->m_flags
& M_PROMISC
) != 0)) {
3690 /* Lookup the protocol attachment to this interface */
3691 if (protocol_family
== 0) {
3693 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
3694 (last_ifproto
->protocol_family
== protocol_family
)) {
3695 VERIFY(ifproto
== NULL
);
3696 ifproto
= last_ifproto
;
3697 if_proto_ref(last_ifproto
);
3699 VERIFY(ifproto
== NULL
);
3700 ifnet_lock_shared(ifp
);
3701 /* callee holds a proto refcnt upon success */
3702 ifproto
= find_attached_proto(ifp
, protocol_family
);
3703 ifnet_lock_done(ifp
);
3705 if (ifproto
== NULL
) {
3706 /* no protocol for this packet, discard */
3710 if (ifproto
!= last_ifproto
) {
3711 if (last_ifproto
!= NULL
) {
3712 /* pass up the list for the previous protocol */
3713 dlil_ifproto_input(last_ifproto
, pkt_first
);
3715 if_proto_free(last_ifproto
);
3717 last_ifproto
= ifproto
;
3718 if_proto_ref(ifproto
);
3720 /* extend the list */
3721 m
->m_pkthdr
.pkt_hdr
= frame_header
;
3722 if (pkt_first
== NULL
) {
3727 pkt_next
= &m
->m_nextpkt
;
3730 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
3731 /* pass up the last list of packets */
3732 dlil_ifproto_input(last_ifproto
, pkt_first
);
3733 if_proto_free(last_ifproto
);
3734 last_ifproto
= NULL
;
3736 if (ifproto
!= NULL
) {
3737 if_proto_free(ifproto
);
3743 /* update the driver's multicast filter, if needed */
3744 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
3745 ifp
->if_updatemcasts
= 0;
3747 ifnet_decr_iorefcnt(ifp
);
3750 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
3754 if_mcasts_update(struct ifnet
*ifp
)
3758 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
3759 if (err
== EAFNOSUPPORT
)
3761 printf("%s: %s %d suspended link-layer multicast membership(s) "
3762 "(err=%d)\n", if_name(ifp
),
3763 (err
== 0 ? "successfully restored" : "failed to restore"),
3764 ifp
->if_updatemcasts
, err
);
3766 /* just return success */
3770 /* If ifp is set, we will increment the generation for the interface */
3772 dlil_post_complete_msg(struct ifnet
*ifp
, struct kev_msg
*event
)
3775 ifnet_increment_generation(ifp
);
3779 necp_update_all_clients();
3782 return (kev_post_msg(event
));
3785 #define TMP_IF_PROTO_ARR_SIZE 10
3787 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
, bool update_generation
)
3789 struct ifnet_filter
*filter
= NULL
;
3790 struct if_proto
*proto
= NULL
;
3791 int if_proto_count
= 0;
3792 struct if_proto
**tmp_ifproto_arr
= NULL
;
3793 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
3794 int tmp_ifproto_arr_idx
= 0;
3795 bool tmp_malloc
= false;
3798 * Pass the event to the interface filters
3800 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3801 /* prevent filter list from changing in case we drop the lock */
3802 if_flt_monitor_busy(ifp
);
3803 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3804 if (filter
->filt_event
!= NULL
) {
3805 lck_mtx_unlock(&ifp
->if_flt_lock
);
3807 filter
->filt_event(filter
->filt_cookie
, ifp
,
3808 filter
->filt_protocol
, event
);
3810 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3813 /* we're done with the filter list */
3814 if_flt_monitor_unbusy(ifp
);
3815 lck_mtx_unlock(&ifp
->if_flt_lock
);
3817 /* Get an io ref count if the interface is attached */
3818 if (!ifnet_is_attached(ifp
, 1))
3822 * An embedded tmp_list_entry in if_proto may still get
3823 * over-written by another thread after giving up ifnet lock,
3824 * therefore we are avoiding embedded pointers here.
3826 ifnet_lock_shared(ifp
);
3827 if_proto_count
= dlil_ifp_proto_count(ifp
);
3828 if (if_proto_count
) {
3830 VERIFY(ifp
->if_proto_hash
!= NULL
);
3831 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
3832 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
3834 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
3835 sizeof (*tmp_ifproto_arr
) * if_proto_count
,
3837 if (tmp_ifproto_arr
== NULL
) {
3838 ifnet_lock_done(ifp
);
3844 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
3845 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
3847 if_proto_ref(proto
);
3848 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
3849 tmp_ifproto_arr_idx
++;
3852 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
3854 ifnet_lock_done(ifp
);
3856 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
3857 tmp_ifproto_arr_idx
++) {
3858 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
3859 VERIFY(proto
!= NULL
);
3860 proto_media_event eventp
=
3861 (proto
->proto_kpi
== kProtoKPI_v1
?
3862 proto
->kpi
.v1
.event
:
3863 proto
->kpi
.v2
.event
);
3865 if (eventp
!= NULL
) {
3866 eventp(ifp
, proto
->protocol_family
,
3869 if_proto_free(proto
);
3874 FREE(tmp_ifproto_arr
, M_TEMP
);
3877 /* Pass the event to the interface */
3878 if (ifp
->if_event
!= NULL
)
3879 ifp
->if_event(ifp
, event
);
3881 /* Release the io ref count */
3882 ifnet_decr_iorefcnt(ifp
);
3884 return (dlil_post_complete_msg(update_generation
? ifp
: NULL
, event
));
3888 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
3890 struct kev_msg kev_msg
;
3893 if (ifp
== NULL
|| event
== NULL
)
3896 bzero(&kev_msg
, sizeof (kev_msg
));
3897 kev_msg
.vendor_code
= event
->vendor_code
;
3898 kev_msg
.kev_class
= event
->kev_class
;
3899 kev_msg
.kev_subclass
= event
->kev_subclass
;
3900 kev_msg
.event_code
= event
->event_code
;
3901 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
3902 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
3903 kev_msg
.dv
[1].data_length
= 0;
3905 result
= dlil_event_internal(ifp
, &kev_msg
, TRUE
);
3911 #include <netinet/ip6.h>
3912 #include <netinet/ip.h>
3914 dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
3918 struct ip6_hdr
*ip6
;
3919 int type
= SOCK_RAW
;
3924 m
= m_pullup(*mp
, sizeof(struct ip
));
3928 ip
= mtod(m
, struct ip
*);
3929 if (ip
->ip_p
== IPPROTO_TCP
)
3931 else if (ip
->ip_p
== IPPROTO_UDP
)
3935 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
3939 ip6
= mtod(m
, struct ip6_hdr
*);
3940 if (ip6
->ip6_nxt
== IPPROTO_TCP
)
3942 else if (ip6
->ip6_nxt
== IPPROTO_UDP
)
3953 * This is mostly called from the context of the DLIL input thread;
3954 * because of that there is no need for atomic operations.
3956 static __inline
void
3957 ifp_inc_traffic_class_in(struct ifnet
*ifp
, struct mbuf
*m
)
3959 if (!(m
->m_flags
& M_PKTHDR
))
3962 switch (m_get_traffic_class(m
)) {
3964 ifp
->if_tc
.ifi_ibepackets
++;
3965 ifp
->if_tc
.ifi_ibebytes
+= m
->m_pkthdr
.len
;
3968 ifp
->if_tc
.ifi_ibkpackets
++;
3969 ifp
->if_tc
.ifi_ibkbytes
+= m
->m_pkthdr
.len
;
3972 ifp
->if_tc
.ifi_ivipackets
++;
3973 ifp
->if_tc
.ifi_ivibytes
+= m
->m_pkthdr
.len
;
3976 ifp
->if_tc
.ifi_ivopackets
++;
3977 ifp
->if_tc
.ifi_ivobytes
+= m
->m_pkthdr
.len
;
3983 if (mbuf_is_traffic_class_privileged(m
)) {
3984 ifp
->if_tc
.ifi_ipvpackets
++;
3985 ifp
->if_tc
.ifi_ipvbytes
+= m
->m_pkthdr
.len
;
3990 * This is called from DLIL output, hence multiple threads could end
3991 * up modifying the statistics. We trade off acccuracy for performance
3992 * by not using atomic operations here.
3994 static __inline
void
3995 ifp_inc_traffic_class_out(struct ifnet
*ifp
, struct mbuf
*m
)
3997 if (!(m
->m_flags
& M_PKTHDR
))
4000 switch (m_get_traffic_class(m
)) {
4002 ifp
->if_tc
.ifi_obepackets
++;
4003 ifp
->if_tc
.ifi_obebytes
+= m
->m_pkthdr
.len
;
4006 ifp
->if_tc
.ifi_obkpackets
++;
4007 ifp
->if_tc
.ifi_obkbytes
+= m
->m_pkthdr
.len
;
4010 ifp
->if_tc
.ifi_ovipackets
++;
4011 ifp
->if_tc
.ifi_ovibytes
+= m
->m_pkthdr
.len
;
4014 ifp
->if_tc
.ifi_ovopackets
++;
4015 ifp
->if_tc
.ifi_ovobytes
+= m
->m_pkthdr
.len
;
4021 if (mbuf_is_traffic_class_privileged(m
)) {
4022 ifp
->if_tc
.ifi_opvpackets
++;
4023 ifp
->if_tc
.ifi_opvbytes
+= m
->m_pkthdr
.len
;
4028 dlil_count_chain_len(mbuf_t m
, struct chain_len_stats
*cls
)
4041 atomic_add_64(&cls
->cls_one
, 1);
4044 atomic_add_64(&cls
->cls_two
, 1);
4047 atomic_add_64(&cls
->cls_three
, 1);
4050 atomic_add_64(&cls
->cls_four
, 1);
4054 atomic_add_64(&cls
->cls_five_or_more
, 1);
4062 * Caller should have a lock on the protocol domain if the protocol
4063 * doesn't support finer grained locking. In most cases, the lock
4064 * will be held from the socket layer and won't be released until
4065 * we return back to the socket layer.
4067 * This does mean that we must take a protocol lock before we take
4068 * an interface lock if we're going to take both. This makes sense
4069 * because a protocol is likely to interact with an ifp while it
4070 * is under the protocol lock.
4072 * An advisory code will be returned if adv is not null. This
4073 * can be used to provide feedback about interface queues to the
4077 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
4078 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
4080 ifnet_output_handler_func handler_func
;
4081 char *frame_type
= NULL
;
4082 char *dst_linkaddr
= NULL
;
4084 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
4085 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
4086 struct if_proto
*proto
= NULL
;
4088 mbuf_t send_head
= NULL
;
4089 mbuf_t
*send_tail
= &send_head
;
4091 u_int32_t pre
= 0, post
= 0;
4092 u_int32_t fpkts
= 0, fbytes
= 0;
4095 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4098 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4099 * from happening while this operation is in progress
4101 if (!ifnet_is_attached(ifp
, 1)) {
4107 handler_func
= ifp
->if_output_handler
;
4108 VERIFY(handler_func
!= NULL
);
4110 /* update the driver's multicast filter, if needed */
4111 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
4112 ifp
->if_updatemcasts
= 0;
4114 frame_type
= frame_type_buffer
;
4115 dst_linkaddr
= dst_linkaddr_buffer
;
4118 ifnet_lock_shared(ifp
);
4119 /* callee holds a proto refcnt upon success */
4120 proto
= find_attached_proto(ifp
, proto_family
);
4121 if (proto
== NULL
) {
4122 ifnet_lock_done(ifp
);
4126 ifnet_lock_done(ifp
);
4130 if (packetlist
== NULL
)
4134 packetlist
= packetlist
->m_nextpkt
;
4135 m
->m_nextpkt
= NULL
;
4138 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4139 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
4141 if (preoutp
!= NULL
) {
4142 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
4143 frame_type
, dst_linkaddr
);
4146 if (retval
== EJUSTRETURN
)
4155 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
4156 dlil_get_socket_type(&m
, proto_family
, raw
));
4165 if (!raw
&& proto_family
== PF_INET
) {
4166 struct ip
*ip
= mtod(m
, struct ip
*);
4167 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4168 struct ip
*, ip
, struct ifnet
*, ifp
,
4169 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
4171 } else if (!raw
&& proto_family
== PF_INET6
) {
4172 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
4173 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4174 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
4175 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
4177 #endif /* CONFIG_DTRACE */
4179 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
4183 * If this is a broadcast packet that needs to be
4184 * looped back into the system, set the inbound ifp
4185 * to that of the outbound ifp. This will allow
4186 * us to determine that it is a legitimate packet
4187 * for the system. Only set the ifp if it's not
4188 * already set, just to be safe.
4190 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
4191 m
->m_pkthdr
.rcvif
== NULL
) {
4192 m
->m_pkthdr
.rcvif
= ifp
;
4196 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
4197 frame_type
, &pre
, &post
);
4199 if (retval
!= EJUSTRETURN
)
4205 * For partial checksum offload, adjust the start
4206 * and stuff offsets based on the prepended header.
4208 if ((m
->m_pkthdr
.csum_flags
&
4209 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4210 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4211 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
4212 m
->m_pkthdr
.csum_tx_start
+= pre
;
4215 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
))
4216 dlil_output_cksum_dbg(ifp
, m
, pre
,
4220 * Clear the ifp if it was set above, and to be
4221 * safe, only if it is still the same as the
4222 * outbound ifp we have in context. If it was
4223 * looped back, then a copy of it was sent to the
4224 * loopback interface with the rcvif set, and we
4225 * are clearing the one that will go down to the
4228 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
)
4229 m
->m_pkthdr
.rcvif
= NULL
;
4233 * Let interface filters (if any) do their thing ...
4235 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4236 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
4237 retval
= dlil_interface_filters_output(ifp
,
4240 if (retval
!= EJUSTRETURN
)
4246 * Strip away M_PROTO1 bit prior to sending packet
4247 * to the driver as this field may be used by the driver
4249 m
->m_flags
&= ~M_PROTO1
;
4252 * If the underlying interface is not capable of handling a
4253 * packet whose data portion spans across physically disjoint
4254 * pages, we need to "normalize" the packet so that we pass
4255 * down a chain of mbufs where each mbuf points to a span that
4256 * resides in the system page boundary. If the packet does
4257 * not cross page(s), the following is a no-op.
4259 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
4260 if ((m
= m_normalize(m
)) == NULL
)
4265 * If this is a TSO packet, make sure the interface still
4266 * advertise TSO capability.
4268 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
4275 * If the packet service class is not background,
4276 * update the timestamp to indicate recent activity
4277 * on a foreground socket.
4279 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
4280 m
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
) {
4281 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_SO_BACKGROUND
))
4282 ifp
->if_fg_sendts
= net_uptime();
4284 if (m
->m_pkthdr
.pkt_flags
& PKTF_SO_REALTIME
)
4285 ifp
->if_rt_sendts
= net_uptime();
4288 ifp_inc_traffic_class_out(ifp
, m
);
4289 pktap_output(ifp
, proto_family
, m
, pre
, post
);
4292 * Count the number of elements in the mbuf chain
4294 if (tx_chain_len_count
) {
4295 dlil_count_chain_len(m
, &tx_chain_len_stats
);
4299 * Finally, call the driver.
4301 if (ifp
->if_eflags
& (IFEF_SENDLIST
| IFEF_ENQUEUE_MULTI
)) {
4302 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4303 flen
+= (m_pktlen(m
) - (pre
+ post
));
4304 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4307 send_tail
= &m
->m_nextpkt
;
4309 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4310 flen
= (m_pktlen(m
) - (pre
+ post
));
4311 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4315 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4317 retval
= (*handler_func
)(ifp
, m
);
4318 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4319 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
4320 adv
->code
= (retval
== EQFULL
?
4321 FADV_FLOW_CONTROLLED
:
4326 if (retval
== 0 && flen
> 0) {
4330 if (retval
!= 0 && dlil_verbose
) {
4331 printf("%s: output error on %s retval = %d\n",
4332 __func__
, if_name(ifp
),
4335 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
4338 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4343 packetlist
= packetlist
->m_nextpkt
;
4344 m
->m_nextpkt
= NULL
;
4346 } while (m
!= NULL
);
4348 if (send_head
!= NULL
) {
4349 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4351 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
4352 retval
= (*handler_func
)(ifp
, send_head
);
4353 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4355 adv
->code
= (retval
== EQFULL
?
4356 FADV_FLOW_CONTROLLED
:
4361 if (retval
== 0 && flen
> 0) {
4365 if (retval
!= 0 && dlil_verbose
) {
4366 printf("%s: output error on %s retval = %d\n",
4367 __func__
, if_name(ifp
), retval
);
4370 struct mbuf
*send_m
;
4372 VERIFY(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
);
4373 while (send_head
!= NULL
) {
4375 send_head
= send_m
->m_nextpkt
;
4376 send_m
->m_nextpkt
= NULL
;
4377 retval
= (*handler_func
)(ifp
, send_m
);
4378 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4380 adv
->code
= (retval
== EQFULL
?
4381 FADV_FLOW_CONTROLLED
:
4391 if (retval
!= 0 && dlil_verbose
) {
4392 printf("%s: output error on %s "
4394 __func__
, if_name(ifp
), retval
);
4402 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4405 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4409 ifp
->if_fbytes
+= fbytes
;
4411 ifp
->if_fpackets
+= fpkts
;
4413 if_proto_free(proto
);
4414 if (packetlist
) /* if any packets are left, clean up */
4415 mbuf_freem_list(packetlist
);
4416 if (retval
== EJUSTRETURN
)
4419 ifnet_decr_iorefcnt(ifp
);
4425 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
4428 struct ifnet_filter
*filter
;
4429 int retval
= EOPNOTSUPP
;
4432 if (ifp
== NULL
|| ioctl_code
== 0)
4435 /* Get an io ref count if the interface is attached */
4436 if (!ifnet_is_attached(ifp
, 1))
4437 return (EOPNOTSUPP
);
4440 * Run the interface filters first.
4441 * We want to run all filters before calling the protocol,
4442 * interface family, or interface.
4444 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4445 /* prevent filter list from changing in case we drop the lock */
4446 if_flt_monitor_busy(ifp
);
4447 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4448 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
4449 filter
->filt_protocol
== proto_fam
)) {
4450 lck_mtx_unlock(&ifp
->if_flt_lock
);
4452 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
4453 proto_fam
, ioctl_code
, ioctl_arg
);
4455 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4457 /* Only update retval if no one has handled the ioctl */
4458 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4459 if (result
== ENOTSUP
)
4460 result
= EOPNOTSUPP
;
4462 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
4463 /* we're done with the filter list */
4464 if_flt_monitor_unbusy(ifp
);
4465 lck_mtx_unlock(&ifp
->if_flt_lock
);
4471 /* we're done with the filter list */
4472 if_flt_monitor_unbusy(ifp
);
4473 lck_mtx_unlock(&ifp
->if_flt_lock
);
4475 /* Allow the protocol to handle the ioctl */
4476 if (proto_fam
!= 0) {
4477 struct if_proto
*proto
;
4479 /* callee holds a proto refcnt upon success */
4480 ifnet_lock_shared(ifp
);
4481 proto
= find_attached_proto(ifp
, proto_fam
);
4482 ifnet_lock_done(ifp
);
4483 if (proto
!= NULL
) {
4484 proto_media_ioctl ioctlp
=
4485 (proto
->proto_kpi
== kProtoKPI_v1
?
4486 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
4487 result
= EOPNOTSUPP
;
4489 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
4491 if_proto_free(proto
);
4493 /* Only update retval if no one has handled the ioctl */
4494 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4495 if (result
== ENOTSUP
)
4496 result
= EOPNOTSUPP
;
4498 if (retval
&& retval
!= EOPNOTSUPP
)
4504 /* retval is either 0 or EOPNOTSUPP */
4507 * Let the interface handle this ioctl.
4508 * If it returns EOPNOTSUPP, ignore that, we may have
4509 * already handled this in the protocol or family.
4512 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
4514 /* Only update retval if no one has handled the ioctl */
4515 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4516 if (result
== ENOTSUP
)
4517 result
= EOPNOTSUPP
;
4519 if (retval
&& retval
!= EOPNOTSUPP
) {
4525 if (retval
== EJUSTRETURN
)
4528 ifnet_decr_iorefcnt(ifp
);
4533 __private_extern__ errno_t
4534 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
4539 if (ifp
->if_set_bpf_tap
) {
4540 /* Get an io reference on the interface if it is attached */
4541 if (!ifnet_is_attached(ifp
, 1))
4543 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
4544 ifnet_decr_iorefcnt(ifp
);
4550 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
4551 struct sockaddr
*ll_addr
, size_t ll_len
)
4553 errno_t result
= EOPNOTSUPP
;
4554 struct if_proto
*proto
;
4555 const struct sockaddr
*verify
;
4556 proto_media_resolve_multi resolvep
;
4558 if (!ifnet_is_attached(ifp
, 1))
4561 bzero(ll_addr
, ll_len
);
4563 /* Call the protocol first; callee holds a proto refcnt upon success */
4564 ifnet_lock_shared(ifp
);
4565 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
4566 ifnet_lock_done(ifp
);
4567 if (proto
!= NULL
) {
4568 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
4569 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
4570 if (resolvep
!= NULL
)
4571 result
= resolvep(ifp
, proto_addr
,
4572 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
4573 if_proto_free(proto
);
4576 /* Let the interface verify the multicast address */
4577 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
4581 verify
= proto_addr
;
4582 result
= ifp
->if_check_multi(ifp
, verify
);
4585 ifnet_decr_iorefcnt(ifp
);
4589 __private_extern__ errno_t
4590 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
4591 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
4592 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
4594 struct if_proto
*proto
;
4597 /* callee holds a proto refcnt upon success */
4598 ifnet_lock_shared(ifp
);
4599 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
4600 ifnet_lock_done(ifp
);
4601 if (proto
== NULL
) {
4604 proto_media_send_arp arpp
;
4605 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4606 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
4612 arpstat
.txrequests
++;
4613 if (target_hw
!= NULL
)
4614 arpstat
.txurequests
++;
4617 arpstat
.txreplies
++;
4620 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
4621 target_hw
, target_proto
);
4623 if_proto_free(proto
);
4629 struct net_thread_marks
{ };
4630 static const struct net_thread_marks net_thread_marks_base
= { };
4632 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
4633 &net_thread_marks_base
;
4635 __private_extern__ net_thread_marks_t
4636 net_thread_marks_push(u_int32_t push
)
4638 static const char *const base
= (const void*)&net_thread_marks_base
;
4642 struct uthread
*uth
= get_bsdthread_info(current_thread());
4644 pop
= push
& ~uth
->uu_network_marks
;
4646 uth
->uu_network_marks
|= pop
;
4649 return ((net_thread_marks_t
)&base
[pop
]);
4652 __private_extern__ net_thread_marks_t
4653 net_thread_unmarks_push(u_int32_t unpush
)
4655 static const char *const base
= (const void*)&net_thread_marks_base
;
4656 u_int32_t unpop
= 0;
4659 struct uthread
*uth
= get_bsdthread_info(current_thread());
4661 unpop
= unpush
& uth
->uu_network_marks
;
4663 uth
->uu_network_marks
&= ~unpop
;
4666 return ((net_thread_marks_t
)&base
[unpop
]);
4669 __private_extern__
void
4670 net_thread_marks_pop(net_thread_marks_t popx
)
4672 static const char *const base
= (const void*)&net_thread_marks_base
;
4673 const ptrdiff_t pop
= (const char *)popx
- (const char *)base
;
4676 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4677 struct uthread
*uth
= get_bsdthread_info(current_thread());
4679 VERIFY((pop
& ones
) == pop
);
4680 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
4681 uth
->uu_network_marks
&= ~pop
;
4685 __private_extern__
void
4686 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
4688 static const char *const base
= (const void*)&net_thread_marks_base
;
4689 ptrdiff_t unpop
= (const char *)unpopx
- (const char *)base
;
4692 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4693 struct uthread
*uth
= get_bsdthread_info(current_thread());
4695 VERIFY((unpop
& ones
) == unpop
);
4696 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
4697 uth
->uu_network_marks
|= unpop
;
4701 __private_extern__ u_int32_t
4702 net_thread_is_marked(u_int32_t check
)
4705 struct uthread
*uth
= get_bsdthread_info(current_thread());
4706 return (uth
->uu_network_marks
& check
);
4712 __private_extern__ u_int32_t
4713 net_thread_is_unmarked(u_int32_t check
)
4716 struct uthread
*uth
= get_bsdthread_info(current_thread());
4717 return (~uth
->uu_network_marks
& check
);
4723 static __inline__
int
4724 _is_announcement(const struct sockaddr_in
* sender_sin
,
4725 const struct sockaddr_in
* target_sin
)
4727 if (sender_sin
== NULL
) {
4730 return (sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
);
4733 __private_extern__ errno_t
4734 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
*sender_hw
,
4735 const struct sockaddr
*sender_proto
, const struct sockaddr_dl
*target_hw
,
4736 const struct sockaddr
*target_proto0
, u_int32_t rtflags
)
4739 const struct sockaddr_in
* sender_sin
;
4740 const struct sockaddr_in
* target_sin
;
4741 struct sockaddr_inarp target_proto_sinarp
;
4742 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
4744 if (target_proto
== NULL
|| (sender_proto
!= NULL
&&
4745 sender_proto
->sa_family
!= target_proto
->sa_family
))
4749 * If the target is a (default) router, provide that
4750 * information to the send_arp callback routine.
4752 if (rtflags
& RTF_ROUTER
) {
4753 bcopy(target_proto
, &target_proto_sinarp
,
4754 sizeof (struct sockaddr_in
));
4755 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
4756 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
4760 * If this is an ARP request and the target IP is IPv4LL,
4761 * send the request on all interfaces. The exception is
4762 * an announcement, which must only appear on the specific
4765 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
4766 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
4767 if (target_proto
->sa_family
== AF_INET
&&
4768 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
4769 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
4770 !_is_announcement(target_sin
, sender_sin
)) {
4777 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
4778 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
4780 ifaddr_t source_hw
= NULL
;
4781 ifaddr_t source_ip
= NULL
;
4782 struct sockaddr_in source_ip_copy
;
4783 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
4786 * Only arp on interfaces marked for IPv4LL
4787 * ARPing. This may mean that we don't ARP on
4788 * the interface the subnet route points to.
4790 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
))
4793 /* Find the source IP address */
4794 ifnet_lock_shared(cur_ifp
);
4795 source_hw
= cur_ifp
->if_lladdr
;
4796 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
4798 IFA_LOCK(source_ip
);
4799 if (source_ip
->ifa_addr
!= NULL
&&
4800 source_ip
->ifa_addr
->sa_family
==
4802 /* Copy the source IP address */
4804 *(struct sockaddr_in
*)
4805 (void *)source_ip
->ifa_addr
;
4806 IFA_UNLOCK(source_ip
);
4809 IFA_UNLOCK(source_ip
);
4812 /* No IP Source, don't arp */
4813 if (source_ip
== NULL
) {
4814 ifnet_lock_done(cur_ifp
);
4818 IFA_ADDREF(source_hw
);
4819 ifnet_lock_done(cur_ifp
);
4822 new_result
= dlil_send_arp_internal(cur_ifp
,
4823 arpop
, (struct sockaddr_dl
*)(void *)
4824 source_hw
->ifa_addr
,
4825 (struct sockaddr
*)&source_ip_copy
, NULL
,
4828 IFA_REMREF(source_hw
);
4829 if (result
== ENOTSUP
) {
4830 result
= new_result
;
4833 ifnet_list_free(ifp_list
);
4836 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
4837 sender_proto
, target_hw
, target_proto
);
4844 * Caller must hold ifnet head lock.
4847 ifnet_lookup(struct ifnet
*ifp
)
4851 lck_rw_assert(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
4852 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
4856 return (_ifp
!= NULL
);
4860 * Caller has to pass a non-zero refio argument to get a
4861 * IO reference count. This will prevent ifnet_detach from
4862 * being called when there are outstanding io reference counts.
4865 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
4869 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
4870 if ((ret
= ((ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
)) ==
4875 lck_mtx_unlock(&ifp
->if_ref_lock
);
4881 * Caller must ensure the interface is attached; the assumption is that
4882 * there is at least an outstanding IO reference count held already.
4883 * Most callers would call ifnet_is_attached() instead.
4886 ifnet_incr_iorefcnt(struct ifnet
*ifp
)
4888 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
4889 VERIFY((ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
)) ==
4891 VERIFY(ifp
->if_refio
> 0);
4893 lck_mtx_unlock(&ifp
->if_ref_lock
);
4897 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
4899 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
4900 VERIFY(ifp
->if_refio
> 0);
4901 VERIFY((ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
)) != 0);
4905 * if there are no more outstanding io references, wakeup the
4906 * ifnet_detach thread if detaching flag is set.
4908 if (ifp
->if_refio
== 0 &&
4909 (ifp
->if_refflags
& IFRF_DETACHING
) != 0) {
4910 wakeup(&(ifp
->if_refio
));
4912 lck_mtx_unlock(&ifp
->if_ref_lock
);
4916 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
4918 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
4923 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
4924 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
4929 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
4930 tr
= dl_if_dbg
->dldbg_if_refhold
;
4932 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
4933 tr
= dl_if_dbg
->dldbg_if_refrele
;
4936 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
4937 ctrace_record(&tr
[idx
]);
4941 dlil_if_ref(struct ifnet
*ifp
)
4943 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
4948 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
4949 ++dl_if
->dl_if_refcnt
;
4950 if (dl_if
->dl_if_refcnt
== 0) {
4951 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
4954 if (dl_if
->dl_if_trace
!= NULL
)
4955 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
4956 lck_mtx_unlock(&dl_if
->dl_if_lock
);
4962 dlil_if_free(struct ifnet
*ifp
)
4964 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
4969 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
4970 if (dl_if
->dl_if_refcnt
== 0) {
4971 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
4974 --dl_if
->dl_if_refcnt
;
4975 if (dl_if
->dl_if_trace
!= NULL
)
4976 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
4977 lck_mtx_unlock(&dl_if
->dl_if_lock
);
4983 dlil_attach_protocol_internal(struct if_proto
*proto
,
4984 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
)
4986 struct kev_dl_proto_data ev_pr_data
;
4987 struct ifnet
*ifp
= proto
->ifp
;
4989 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
4990 struct if_proto
*prev_proto
;
4991 struct if_proto
*_proto
;
4993 /* callee holds a proto refcnt upon success */
4994 ifnet_lock_exclusive(ifp
);
4995 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
4996 if (_proto
!= NULL
) {
4997 ifnet_lock_done(ifp
);
4998 if_proto_free(_proto
);
5003 * Call family module add_proto routine so it can refine the
5004 * demux descriptors as it wishes.
5006 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
5009 ifnet_lock_done(ifp
);
5014 * Insert the protocol in the hash
5016 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
5017 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
)
5018 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
5020 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
5022 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
5025 /* hold a proto refcnt for attach */
5026 if_proto_ref(proto
);
5029 * The reserved field carries the number of protocol still attached
5030 * (subject to change)
5032 ev_pr_data
.proto_family
= proto
->protocol_family
;
5033 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
5034 ifnet_lock_done(ifp
);
5036 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
5037 (struct net_event_data
*)&ev_pr_data
,
5038 sizeof (struct kev_dl_proto_data
));
5043 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
5044 const struct ifnet_attach_proto_param
*proto_details
)
5047 struct if_proto
*ifproto
= NULL
;
5049 ifnet_head_lock_shared();
5050 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5054 /* Check that the interface is in the global list */
5055 if (!ifnet_lookup(ifp
)) {
5060 ifproto
= zalloc(dlif_proto_zone
);
5061 if (ifproto
== NULL
) {
5065 bzero(ifproto
, dlif_proto_size
);
5067 /* refcnt held above during lookup */
5069 ifproto
->protocol_family
= protocol
;
5070 ifproto
->proto_kpi
= kProtoKPI_v1
;
5071 ifproto
->kpi
.v1
.input
= proto_details
->input
;
5072 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
5073 ifproto
->kpi
.v1
.event
= proto_details
->event
;
5074 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
5075 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
5076 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
5077 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
5079 retval
= dlil_attach_protocol_internal(ifproto
,
5080 proto_details
->demux_list
, proto_details
->demux_count
);
5083 printf("%s: attached v1 protocol %d\n", if_name(ifp
),
5088 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5089 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5090 if_name(ifp
), protocol
, retval
);
5093 if (retval
!= 0 && ifproto
!= NULL
)
5094 zfree(dlif_proto_zone
, ifproto
);
5099 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
5100 const struct ifnet_attach_proto_param_v2
*proto_details
)
5103 struct if_proto
*ifproto
= NULL
;
5105 ifnet_head_lock_shared();
5106 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5110 /* Check that the interface is in the global list */
5111 if (!ifnet_lookup(ifp
)) {
5116 ifproto
= zalloc(dlif_proto_zone
);
5117 if (ifproto
== NULL
) {
5121 bzero(ifproto
, sizeof(*ifproto
));
5123 /* refcnt held above during lookup */
5125 ifproto
->protocol_family
= protocol
;
5126 ifproto
->proto_kpi
= kProtoKPI_v2
;
5127 ifproto
->kpi
.v2
.input
= proto_details
->input
;
5128 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
5129 ifproto
->kpi
.v2
.event
= proto_details
->event
;
5130 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
5131 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
5132 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
5133 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
5135 retval
= dlil_attach_protocol_internal(ifproto
,
5136 proto_details
->demux_list
, proto_details
->demux_count
);
5139 printf("%s: attached v2 protocol %d\n", if_name(ifp
),
5144 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5145 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5146 if_name(ifp
), protocol
, retval
);
5149 if (retval
!= 0 && ifproto
!= NULL
)
5150 zfree(dlif_proto_zone
, ifproto
);
5155 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
5157 struct if_proto
*proto
= NULL
;
5160 if (ifp
== NULL
|| proto_family
== 0) {
5165 ifnet_lock_exclusive(ifp
);
5166 /* callee holds a proto refcnt upon success */
5167 proto
= find_attached_proto(ifp
, proto_family
);
5168 if (proto
== NULL
) {
5170 ifnet_lock_done(ifp
);
5174 /* call family module del_proto */
5175 if (ifp
->if_del_proto
)
5176 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
5178 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
5179 proto
, if_proto
, next_hash
);
5181 if (proto
->proto_kpi
== kProtoKPI_v1
) {
5182 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
5183 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
5184 proto
->kpi
.v1
.event
= ifproto_media_event
;
5185 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
5186 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
5187 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
5189 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
5190 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
5191 proto
->kpi
.v2
.event
= ifproto_media_event
;
5192 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
5193 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
5194 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
5196 proto
->detached
= 1;
5197 ifnet_lock_done(ifp
);
5200 printf("%s: detached %s protocol %d\n", if_name(ifp
),
5201 (proto
->proto_kpi
== kProtoKPI_v1
) ?
5202 "v1" : "v2", proto_family
);
5205 /* release proto refcnt held during protocol attach */
5206 if_proto_free(proto
);
5209 * Release proto refcnt held during lookup; the rest of
5210 * protocol detach steps will happen when the last proto
5211 * reference is released.
5213 if_proto_free(proto
);
5221 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
5222 struct mbuf
*packet
, char *header
)
5224 #pragma unused(ifp, protocol, packet, header)
5229 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
5230 struct mbuf
*packet
)
5232 #pragma unused(ifp, protocol, packet)
5238 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
5239 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
5240 char *link_layer_dest
)
5242 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5248 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
5249 const struct kev_msg
*event
)
5251 #pragma unused(ifp, protocol, event)
5255 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
5256 unsigned long command
, void *argument
)
5258 #pragma unused(ifp, protocol, command, argument)
5263 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
5264 struct sockaddr_dl
*out_ll
, size_t ll_len
)
5266 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5271 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
5272 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
5273 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
5275 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5279 extern int if_next_index(void);
5280 extern int tcp_ecn_outbound
;
5283 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
5285 struct ifnet
*tmp_if
;
5287 struct if_data_internal if_data_saved
;
5288 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5289 struct dlil_threading_info
*dl_inp
;
5290 u_int32_t sflags
= 0;
5297 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5298 * prevent the interface from being configured while it is
5299 * embryonic, as ifnet_head_lock is dropped and reacquired
5300 * below prior to marking the ifnet with IFRF_ATTACHED.
5303 ifnet_head_lock_exclusive();
5304 /* Verify we aren't already on the list */
5305 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
5306 if (tmp_if
== ifp
) {
5313 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5314 if (ifp
->if_refflags
& IFRF_ATTACHED
) {
5315 panic_plain("%s: flags mismatch (attached set) ifp=%p",
5319 lck_mtx_unlock(&ifp
->if_ref_lock
);
5321 ifnet_lock_exclusive(ifp
);
5324 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
5325 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
5327 if (ll_addr
!= NULL
) {
5328 if (ifp
->if_addrlen
== 0) {
5329 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
5330 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
5331 ifnet_lock_done(ifp
);
5339 * Allow interfaces without protocol families to attach
5340 * only if they have the necessary fields filled out.
5342 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
5343 DLIL_PRINTF("%s: Attempt to attach interface without "
5344 "family module - %d\n", __func__
, ifp
->if_family
);
5345 ifnet_lock_done(ifp
);
5351 /* Allocate protocol hash table */
5352 VERIFY(ifp
->if_proto_hash
== NULL
);
5353 ifp
->if_proto_hash
= zalloc(dlif_phash_zone
);
5354 if (ifp
->if_proto_hash
== NULL
) {
5355 ifnet_lock_done(ifp
);
5360 bzero(ifp
->if_proto_hash
, dlif_phash_size
);
5362 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5363 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
5364 TAILQ_INIT(&ifp
->if_flt_head
);
5365 VERIFY(ifp
->if_flt_busy
== 0);
5366 VERIFY(ifp
->if_flt_waiters
== 0);
5367 lck_mtx_unlock(&ifp
->if_flt_lock
);
5369 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
5370 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
5371 LIST_INIT(&ifp
->if_multiaddrs
);
5374 VERIFY(ifp
->if_allhostsinm
== NULL
);
5375 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
5376 TAILQ_INIT(&ifp
->if_addrhead
);
5378 if (ifp
->if_index
== 0) {
5379 int idx
= if_next_index();
5383 ifnet_lock_done(ifp
);
5388 ifp
->if_index
= idx
;
5390 /* There should not be anything occupying this slot */
5391 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
5393 /* allocate (if needed) and initialize a link address */
5394 VERIFY(!(dl_if
->dl_if_flags
& DLIF_REUSE
) || ifp
->if_lladdr
!= NULL
);
5395 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
5397 ifnet_lock_done(ifp
);
5403 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
5404 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
5406 /* make this address the first on the list */
5408 /* hold a reference for ifnet_addrs[] */
5409 IFA_ADDREF_LOCKED(ifa
);
5410 /* if_attach_link_ifa() holds a reference for ifa_link */
5411 if_attach_link_ifa(ifp
, ifa
);
5415 mac_ifnet_label_associate(ifp
);
5418 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
5419 ifindex2ifnet
[ifp
->if_index
] = ifp
;
5421 /* Hold a reference to the underlying dlil_ifnet */
5422 ifnet_reference(ifp
);
5424 /* Clear stats (save and restore other fields that we care) */
5425 if_data_saved
= ifp
->if_data
;
5426 bzero(&ifp
->if_data
, sizeof (ifp
->if_data
));
5427 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
5428 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
5429 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
5430 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
5431 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
5432 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
5433 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
5434 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
5435 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
5436 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
5437 ifnet_touch_lastchange(ifp
);
5439 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
5440 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
||
5441 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_FQ_CODEL
);
5443 /* By default, use SFB and enable flow advisory */
5444 sflags
= PKTSCHEDF_QALG_SFB
;
5446 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
5448 if (if_delaybased_queue
)
5449 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
5451 /* Initialize transmit queue(s) */
5452 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5454 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5455 "err=%d", __func__
, ifp
, err
);
5459 /* Sanity checks on the input thread storage */
5460 dl_inp
= &dl_if
->dl_if_inpstorage
;
5461 bzero(&dl_inp
->stats
, sizeof (dl_inp
->stats
));
5462 VERIFY(dl_inp
->input_waiting
== 0);
5463 VERIFY(dl_inp
->wtot
== 0);
5464 VERIFY(dl_inp
->ifp
== NULL
);
5465 VERIFY(qhead(&dl_inp
->rcvq_pkts
) == NULL
&& qempty(&dl_inp
->rcvq_pkts
));
5466 VERIFY(qlimit(&dl_inp
->rcvq_pkts
) == 0);
5467 VERIFY(!dl_inp
->net_affinity
);
5468 VERIFY(ifp
->if_inp
== NULL
);
5469 VERIFY(dl_inp
->input_thr
== THREAD_NULL
);
5470 VERIFY(dl_inp
->wloop_thr
== THREAD_NULL
);
5471 VERIFY(dl_inp
->poll_thr
== THREAD_NULL
);
5472 VERIFY(dl_inp
->tag
== 0);
5473 VERIFY(dl_inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
5474 bzero(&dl_inp
->tstats
, sizeof (dl_inp
->tstats
));
5475 bzero(&dl_inp
->pstats
, sizeof (dl_inp
->pstats
));
5476 bzero(&dl_inp
->sstats
, sizeof (dl_inp
->sstats
));
5477 #if IFNET_INPUT_SANITY_CHK
5478 VERIFY(dl_inp
->input_mbuf_cnt
== 0);
5479 #endif /* IFNET_INPUT_SANITY_CHK */
5482 * A specific DLIL input thread is created per Ethernet/cellular
5483 * interface or for an interface which supports opportunistic
5484 * input polling. Pseudo interfaces or other types of interfaces
5485 * use the main input thread instead.
5487 if ((net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) ||
5488 ifp
->if_type
== IFT_ETHER
|| ifp
->if_type
== IFT_CELLULAR
) {
5489 ifp
->if_inp
= dl_inp
;
5490 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
);
5492 panic_plain("%s: ifp=%p couldn't get an input thread; "
5493 "err=%d", __func__
, ifp
, err
);
5499 * If the driver supports the new transmit model, calculate flow hash
5500 * and create a workloop starter thread to invoke the if_start callback
5501 * where the packets may be dequeued and transmitted.
5503 if (ifp
->if_eflags
& IFEF_TXSTART
) {
5504 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
5505 VERIFY(ifp
->if_flowhash
!= 0);
5507 VERIFY(ifp
->if_start
!= NULL
);
5508 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
5510 ifnet_set_start_cycle(ifp
, NULL
);
5511 ifp
->if_start_active
= 0;
5512 ifp
->if_start_req
= 0;
5513 ifp
->if_start_flags
= 0;
5514 if ((err
= kernel_thread_start(ifnet_start_thread_fn
, ifp
,
5515 &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
5516 panic_plain("%s: ifp=%p couldn't get a start thread; "
5517 "err=%d", __func__
, ifp
, err
);
5520 ml_thread_policy(ifp
->if_start_thread
, MACHINE_GROUP
,
5521 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5523 ifp
->if_flowhash
= 0;
5527 * If the driver supports the new receive model, create a poller
5528 * thread to invoke if_input_poll callback where the packets may
5529 * be dequeued from the driver and processed for reception.
5531 if (ifp
->if_eflags
& IFEF_RXPOLL
) {
5532 VERIFY(ifp
->if_input_poll
!= NULL
);
5533 VERIFY(ifp
->if_input_ctl
!= NULL
);
5534 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
5536 ifnet_set_poll_cycle(ifp
, NULL
);
5537 ifp
->if_poll_update
= 0;
5538 ifp
->if_poll_active
= 0;
5539 ifp
->if_poll_req
= 0;
5540 if ((err
= kernel_thread_start(ifnet_poll_thread_fn
, ifp
,
5541 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
5542 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5543 "err=%d", __func__
, ifp
, err
);
5546 ml_thread_policy(ifp
->if_poll_thread
, MACHINE_GROUP
,
5547 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5550 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
5551 VERIFY(ifp
->if_desc
.ifd_len
== 0);
5552 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
5554 /* Record attach PC stacktrace */
5555 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
5557 ifp
->if_updatemcasts
= 0;
5558 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
5559 struct ifmultiaddr
*ifma
;
5560 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
5562 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
5563 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
)
5564 ifp
->if_updatemcasts
++;
5568 printf("%s: attached with %d suspended link-layer multicast "
5569 "membership(s)\n", if_name(ifp
),
5570 ifp
->if_updatemcasts
);
5573 /* Clear logging parameters */
5574 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
5575 ifp
->if_fg_sendts
= 0;
5577 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
5578 VERIFY(ifp
->if_delegated
.type
== 0);
5579 VERIFY(ifp
->if_delegated
.family
== 0);
5580 VERIFY(ifp
->if_delegated
.subfamily
== 0);
5581 VERIFY(ifp
->if_delegated
.expensive
== 0);
5583 VERIFY(ifp
->if_agentids
== NULL
);
5584 VERIFY(ifp
->if_agentcount
== 0);
5586 /* Reset interface state */
5587 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
5588 ifp
->if_interface_state
.valid_bitmask
|=
5589 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
5590 ifp
->if_interface_state
.interface_availability
=
5591 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
;
5593 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5594 if (ifp
== lo_ifp
) {
5595 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_GOOD
;
5596 ifp
->if_interface_state
.valid_bitmask
|=
5597 IF_INTERFACE_STATE_LQM_STATE_VALID
;
5599 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_UNKNOWN
;
5603 * Enable ECN capability on this interface depending on the
5604 * value of ECN global setting
5606 if (tcp_ecn_outbound
== 2 && !IFNET_IS_CELLULAR(ifp
)) {
5607 ifp
->if_eflags
|= IFEF_ECN_ENABLE
;
5608 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
5612 * Built-in Cyclops always on policy for WiFi infra
5614 if (IFNET_IS_WIFI_INFRA(ifp
) && net_qos_policy_wifi_enabled
!= 0) {
5617 error
= if_set_qosmarking_mode(ifp
,
5618 IFRTYPE_QOSMARKING_FASTLANE
);
5620 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5621 __func__
, ifp
->if_xname
, error
);
5623 ifp
->if_eflags
|= IFEF_QOSMARKING_ENABLED
;
5624 #if (DEVELOPMENT || DEBUG)
5625 printf("%s fastlane enabled on %s\n",
5626 __func__
, ifp
->if_xname
);
5627 #endif /* (DEVELOPMENT || DEBUG) */
5631 ifnet_lock_done(ifp
);
5634 lck_mtx_lock(&ifp
->if_cached_route_lock
);
5635 /* Enable forwarding cached route */
5636 ifp
->if_fwd_cacheok
= 1;
5637 /* Clean up any existing cached routes */
5638 ROUTE_RELEASE(&ifp
->if_fwd_route
);
5639 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
5640 ROUTE_RELEASE(&ifp
->if_src_route
);
5641 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
5642 ROUTE_RELEASE(&ifp
->if_src_route6
);
5643 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
5644 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
5646 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5649 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5650 * and trees; do this before the ifnet is marked as attached.
5651 * The ifnet keeps the reference to the info structures even after
5652 * the ifnet is detached, since the network-layer records still
5653 * refer to the info structures even after that. This also
5654 * makes it possible for them to still function after the ifnet
5655 * is recycled or reattached.
5658 if (IGMP_IFINFO(ifp
) == NULL
) {
5659 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, M_WAITOK
);
5660 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
5662 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
5663 igmp_domifreattach(IGMP_IFINFO(ifp
));
5667 if (MLD_IFINFO(ifp
) == NULL
) {
5668 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, M_WAITOK
);
5669 VERIFY(MLD_IFINFO(ifp
) != NULL
);
5671 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
5672 mld_domifreattach(MLD_IFINFO(ifp
));
5676 VERIFY(ifp
->if_data_threshold
== 0);
5679 * Finally, mark this ifnet as attached.
5681 lck_mtx_lock(rnh_lock
);
5682 ifnet_lock_exclusive(ifp
);
5683 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5684 ifp
->if_refflags
= IFRF_ATTACHED
;
5685 lck_mtx_unlock(&ifp
->if_ref_lock
);
5687 /* boot-args override; enable idle notification */
5688 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
5691 /* apply previous request(s) to set the idle flags, if any */
5692 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
5693 ifp
->if_idle_new_flags_mask
);
5696 ifnet_lock_done(ifp
);
5697 lck_mtx_unlock(rnh_lock
);
5702 * Attach packet filter to this interface, if enabled.
5704 pf_ifnet_hook(ifp
, 1);
5707 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
5710 printf("%s: attached%s\n", if_name(ifp
),
5711 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
5718 * Prepare the storage for the first/permanent link address, which must
5719 * must have the same lifetime as the ifnet itself. Although the link
5720 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5721 * its location in memory must never change as it may still be referred
5722 * to by some parts of the system afterwards (unfortunate implementation
5723 * artifacts inherited from BSD.)
5725 * Caller must hold ifnet lock as writer.
5727 static struct ifaddr
*
5728 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
5730 struct ifaddr
*ifa
, *oifa
;
5731 struct sockaddr_dl
*asdl
, *msdl
;
5732 char workbuf
[IFNAMSIZ
*2];
5733 int namelen
, masklen
, socksize
;
5734 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5736 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
5737 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
5739 namelen
= snprintf(workbuf
, sizeof (workbuf
), "%s",
5741 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0])
5742 + ((namelen
> 0) ? namelen
: 0);
5743 socksize
= masklen
+ ifp
->if_addrlen
;
5744 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
5745 if ((u_int32_t
)socksize
< sizeof (struct sockaddr_dl
))
5746 socksize
= sizeof(struct sockaddr_dl
);
5747 socksize
= ROUNDUP(socksize
);
5750 ifa
= ifp
->if_lladdr
;
5751 if (socksize
> DLIL_SDLMAXLEN
||
5752 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
5754 * Rare, but in the event that the link address requires
5755 * more storage space than DLIL_SDLMAXLEN, allocate the
5756 * largest possible storages for address and mask, such
5757 * that we can reuse the same space when if_addrlen grows.
5758 * This same space will be used when if_addrlen shrinks.
5760 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
5761 int ifasize
= sizeof (*ifa
) + 2 * SOCK_MAXADDRLEN
;
5762 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
5766 /* Don't set IFD_ALLOC, as this is permanent */
5767 ifa
->ifa_debug
= IFD_LINK
;
5770 /* address and mask sockaddr_dl locations */
5771 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
5772 bzero(asdl
, SOCK_MAXADDRLEN
);
5773 msdl
= (struct sockaddr_dl
*)(void *)
5774 ((char *)asdl
+ SOCK_MAXADDRLEN
);
5775 bzero(msdl
, SOCK_MAXADDRLEN
);
5777 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
5779 * Use the storage areas for address and mask within the
5780 * dlil_ifnet structure. This is the most common case.
5783 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
5785 /* Don't set IFD_ALLOC, as this is permanent */
5786 ifa
->ifa_debug
= IFD_LINK
;
5789 /* address and mask sockaddr_dl locations */
5790 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
5791 bzero(asdl
, sizeof (dl_if
->dl_if_lladdr
.asdl
));
5792 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
5793 bzero(msdl
, sizeof (dl_if
->dl_if_lladdr
.msdl
));
5796 /* hold a permanent reference for the ifnet itself */
5797 IFA_ADDREF_LOCKED(ifa
);
5798 oifa
= ifp
->if_lladdr
;
5799 ifp
->if_lladdr
= ifa
;
5801 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
5803 ifa
->ifa_rtrequest
= link_rtrequest
;
5804 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
5805 asdl
->sdl_len
= socksize
;
5806 asdl
->sdl_family
= AF_LINK
;
5808 bcopy(workbuf
, asdl
->sdl_data
, min(namelen
,
5809 sizeof (asdl
->sdl_data
)));
5810 asdl
->sdl_nlen
= namelen
;
5814 asdl
->sdl_index
= ifp
->if_index
;
5815 asdl
->sdl_type
= ifp
->if_type
;
5816 if (ll_addr
!= NULL
) {
5817 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
5818 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
5822 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
5823 msdl
->sdl_len
= masklen
;
5825 msdl
->sdl_data
[--namelen
] = 0xff;
5835 if_purgeaddrs(struct ifnet
*ifp
)
5841 in6_purgeaddrs(ifp
);
5846 ifnet_detach(ifnet_t ifp
)
5848 struct ifnet
*delegated_ifp
;
5849 struct nd_ifinfo
*ndi
= NULL
;
5854 ndi
= ND_IFINFO(ifp
);
5856 ndi
->cga_initialized
= FALSE
;
5858 lck_mtx_lock(rnh_lock
);
5859 ifnet_head_lock_exclusive();
5860 ifnet_lock_exclusive(ifp
);
5863 * Check to see if this interface has previously triggered
5864 * aggressive protocol draining; if so, decrement the global
5865 * refcnt and clear PR_AGGDRAIN on the route domain if
5866 * there are no more of such an interface around.
5868 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
5870 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5871 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
5872 lck_mtx_unlock(&ifp
->if_ref_lock
);
5873 ifnet_lock_done(ifp
);
5875 lck_mtx_unlock(rnh_lock
);
5877 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
5878 /* Interface has already been detached */
5879 lck_mtx_unlock(&ifp
->if_ref_lock
);
5880 ifnet_lock_done(ifp
);
5882 lck_mtx_unlock(rnh_lock
);
5885 /* Indicate this interface is being detached */
5886 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
5887 ifp
->if_refflags
|= IFRF_DETACHING
;
5888 lck_mtx_unlock(&ifp
->if_ref_lock
);
5891 printf("%s: detaching\n", if_name(ifp
));
5893 /* Reset ECN enable/disable flags */
5894 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
5895 ifp
->if_eflags
&= ~IFEF_ECN_ENABLE
;
5898 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5899 * no longer be visible during lookups from this point.
5901 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
5902 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
5903 ifp
->if_link
.tqe_next
= NULL
;
5904 ifp
->if_link
.tqe_prev
= NULL
;
5905 if (ifp
->if_ordered_link
.tqe_next
!= NULL
||
5906 ifp
->if_ordered_link
.tqe_prev
!= NULL
) {
5907 ifnet_remove_from_ordered_list(ifp
);
5909 ifindex2ifnet
[ifp
->if_index
] = NULL
;
5911 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
5912 ifp
->if_eflags
&= ~(IFEF_IPV4_ROUTER
| IFEF_IPV6_ROUTER
);
5914 /* Record detach PC stacktrace */
5915 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
5917 /* Clear logging parameters */
5918 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
5920 /* Clear delegated interface info (reference released below) */
5921 delegated_ifp
= ifp
->if_delegated
.ifp
;
5922 bzero(&ifp
->if_delegated
, sizeof (ifp
->if_delegated
));
5924 /* Reset interface state */
5925 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
5927 ifnet_lock_done(ifp
);
5929 lck_mtx_unlock(rnh_lock
);
5931 /* Release reference held on the delegated interface */
5932 if (delegated_ifp
!= NULL
)
5933 ifnet_release(delegated_ifp
);
5935 /* Reset Link Quality Metric (unless loopback [lo0]) */
5937 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
, 0);
5939 /* Reset TCP local statistics */
5940 if (ifp
->if_tcp_stat
!= NULL
)
5941 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
5943 /* Reset UDP local statistics */
5944 if (ifp
->if_udp_stat
!= NULL
)
5945 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
5947 /* Reset ifnet IPv4 stats */
5948 if (ifp
->if_ipv4_stat
!= NULL
)
5949 bzero(ifp
->if_ipv4_stat
, sizeof(*ifp
->if_ipv4_stat
));
5951 /* Reset ifnet IPv6 stats */
5952 if (ifp
->if_ipv6_stat
!= NULL
)
5953 bzero(ifp
->if_ipv6_stat
, sizeof(*ifp
->if_ipv6_stat
));
5955 /* Release memory held for interface link status report */
5956 if (ifp
->if_link_status
!= NULL
) {
5957 FREE(ifp
->if_link_status
, M_TEMP
);
5958 ifp
->if_link_status
= NULL
;
5961 /* Clear agent IDs */
5962 if (ifp
->if_agentids
!= NULL
) {
5963 FREE(ifp
->if_agentids
, M_NETAGENT
);
5964 ifp
->if_agentids
= NULL
;
5966 ifp
->if_agentcount
= 0;
5969 /* Let BPF know we're detaching */
5972 /* Mark the interface as DOWN */
5975 /* Disable forwarding cached route */
5976 lck_mtx_lock(&ifp
->if_cached_route_lock
);
5977 ifp
->if_fwd_cacheok
= 0;
5978 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
5980 ifp
->if_data_threshold
= 0;
5982 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5983 * references to the info structures and leave them attached to
5987 igmp_domifdetach(ifp
);
5990 mld_domifdetach(ifp
);
5993 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
5995 /* Let worker thread take care of the rest, to avoid reentrancy */
5997 ifnet_detaching_enqueue(ifp
);
6004 ifnet_detaching_enqueue(struct ifnet
*ifp
)
6006 dlil_if_lock_assert();
6008 ++ifnet_detaching_cnt
;
6009 VERIFY(ifnet_detaching_cnt
!= 0);
6010 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6011 wakeup((caddr_t
)&ifnet_delayed_run
);
6014 static struct ifnet
*
6015 ifnet_detaching_dequeue(void)
6019 dlil_if_lock_assert();
6021 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
6022 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
6024 VERIFY(ifnet_detaching_cnt
!= 0);
6025 --ifnet_detaching_cnt
;
6026 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6027 ifp
->if_detaching_link
.tqe_next
= NULL
;
6028 ifp
->if_detaching_link
.tqe_prev
= NULL
;
6034 ifnet_detacher_thread_cont(int err
)
6040 dlil_if_lock_assert();
6041 while (ifnet_detaching_cnt
== 0) {
6042 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6043 (PZERO
- 1), "ifnet_detacher_cont", 0,
6044 ifnet_detacher_thread_cont
);
6048 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
6050 /* Take care of detaching ifnet */
6051 ifp
= ifnet_detaching_dequeue();
6054 ifnet_detach_final(ifp
);
6061 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
6063 #pragma unused(v, w)
6065 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6066 (PZERO
- 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont
);
6068 * msleep0() shouldn't have returned as PCATCH was not set;
6069 * therefore assert in this case.
6076 ifnet_detach_final(struct ifnet
*ifp
)
6078 struct ifnet_filter
*filter
, *filter_next
;
6079 struct ifnet_filter_head fhead
;
6080 struct dlil_threading_info
*inp
;
6082 ifnet_detached_func if_free
;
6085 lck_mtx_lock(&ifp
->if_ref_lock
);
6086 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6087 panic("%s: flags mismatch (detaching not set) ifp=%p",
6093 * Wait until the existing IO references get released
6094 * before we proceed with ifnet_detach. This is not a
6095 * common case, so block without using a continuation.
6097 while (ifp
->if_refio
> 0) {
6098 printf("%s: Waiting for IO references on %s interface "
6099 "to be released\n", __func__
, if_name(ifp
));
6100 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
6101 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
6103 lck_mtx_unlock(&ifp
->if_ref_lock
);
6105 /* Drain and destroy send queue */
6106 ifclassq_teardown(ifp
);
6108 /* Detach interface filters */
6109 lck_mtx_lock(&ifp
->if_flt_lock
);
6110 if_flt_monitor_enter(ifp
);
6112 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
6113 fhead
= ifp
->if_flt_head
;
6114 TAILQ_INIT(&ifp
->if_flt_head
);
6116 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
6117 filter_next
= TAILQ_NEXT(filter
, filt_next
);
6118 lck_mtx_unlock(&ifp
->if_flt_lock
);
6120 dlil_detach_filter_internal(filter
, 1);
6121 lck_mtx_lock(&ifp
->if_flt_lock
);
6123 if_flt_monitor_leave(ifp
);
6124 lck_mtx_unlock(&ifp
->if_flt_lock
);
6126 /* Tell upper layers to drop their network addresses */
6129 ifnet_lock_exclusive(ifp
);
6131 /* Uplumb all protocols */
6132 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
6133 struct if_proto
*proto
;
6135 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6136 while (proto
!= NULL
) {
6137 protocol_family_t family
= proto
->protocol_family
;
6138 ifnet_lock_done(ifp
);
6139 proto_unplumb(family
, ifp
);
6140 ifnet_lock_exclusive(ifp
);
6141 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6143 /* There should not be any protocols left */
6144 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
6146 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
6147 ifp
->if_proto_hash
= NULL
;
6149 /* Detach (permanent) link address from if_addrhead */
6150 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
6151 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
6153 if_detach_link_ifa(ifp
, ifa
);
6156 /* Remove (permanent) link address from ifnet_addrs[] */
6158 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
6160 /* This interface should not be on {ifnet_head,detaching} */
6161 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
6162 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
6163 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
6164 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
6165 VERIFY(ifp
->if_ordered_link
.tqe_next
== NULL
);
6166 VERIFY(ifp
->if_ordered_link
.tqe_prev
== NULL
);
6168 /* The slot should have been emptied */
6169 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
6171 /* There should not be any addresses left */
6172 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
6175 * Signal the starter thread to terminate itself.
6177 if (ifp
->if_start_thread
!= THREAD_NULL
) {
6178 lck_mtx_lock_spin(&ifp
->if_start_lock
);
6179 ifp
->if_start_flags
= 0;
6180 ifp
->if_start_thread
= THREAD_NULL
;
6181 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
6182 lck_mtx_unlock(&ifp
->if_start_lock
);
6186 * Signal the poller thread to terminate itself.
6188 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
6189 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
6190 ifp
->if_poll_thread
= THREAD_NULL
;
6191 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
6192 lck_mtx_unlock(&ifp
->if_poll_lock
);
6196 * If thread affinity was set for the workloop thread, we will need
6197 * to tear down the affinity and release the extra reference count
6198 * taken at attach time. Does not apply to lo0 or other interfaces
6199 * without dedicated input threads.
6201 if ((inp
= ifp
->if_inp
) != NULL
) {
6202 VERIFY(inp
!= dlil_main_input_thread
);
6204 if (inp
->net_affinity
) {
6205 struct thread
*tp
, *wtp
, *ptp
;
6207 lck_mtx_lock_spin(&inp
->input_lck
);
6208 wtp
= inp
->wloop_thr
;
6209 inp
->wloop_thr
= THREAD_NULL
;
6210 ptp
= inp
->poll_thr
;
6211 inp
->poll_thr
= THREAD_NULL
;
6212 tp
= inp
->input_thr
; /* don't nullify now */
6214 inp
->net_affinity
= FALSE
;
6215 lck_mtx_unlock(&inp
->input_lck
);
6217 /* Tear down poll thread affinity */
6219 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
6220 (void) dlil_affinity_set(ptp
,
6221 THREAD_AFFINITY_TAG_NULL
);
6222 thread_deallocate(ptp
);
6225 /* Tear down workloop thread affinity */
6227 (void) dlil_affinity_set(wtp
,
6228 THREAD_AFFINITY_TAG_NULL
);
6229 thread_deallocate(wtp
);
6232 /* Tear down DLIL input thread affinity */
6233 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
6234 thread_deallocate(tp
);
6237 /* disassociate ifp DLIL input thread */
6240 lck_mtx_lock_spin(&inp
->input_lck
);
6241 inp
->input_waiting
|= DLIL_INPUT_TERMINATE
;
6242 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
6243 wakeup_one((caddr_t
)&inp
->input_waiting
);
6245 lck_mtx_unlock(&inp
->input_lck
);
6248 /* The driver might unload, so point these to ourselves */
6249 if_free
= ifp
->if_free
;
6250 ifp
->if_output_handler
= ifp_if_output
;
6251 ifp
->if_output
= ifp_if_output
;
6252 ifp
->if_pre_enqueue
= ifp_if_output
;
6253 ifp
->if_start
= ifp_if_start
;
6254 ifp
->if_output_ctl
= ifp_if_ctl
;
6255 ifp
->if_input_handler
= ifp_if_input
;
6256 ifp
->if_input_poll
= ifp_if_input_poll
;
6257 ifp
->if_input_ctl
= ifp_if_ctl
;
6258 ifp
->if_ioctl
= ifp_if_ioctl
;
6259 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
6260 ifp
->if_free
= ifp_if_free
;
6261 ifp
->if_demux
= ifp_if_demux
;
6262 ifp
->if_event
= ifp_if_event
;
6263 ifp
->if_framer_legacy
= ifp_if_framer
;
6264 ifp
->if_framer
= ifp_if_framer_extended
;
6265 ifp
->if_add_proto
= ifp_if_add_proto
;
6266 ifp
->if_del_proto
= ifp_if_del_proto
;
6267 ifp
->if_check_multi
= ifp_if_check_multi
;
6269 /* wipe out interface description */
6270 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
6271 ifp
->if_desc
.ifd_len
= 0;
6272 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
6273 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
6275 /* there shouldn't be any delegation by now */
6276 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
6277 VERIFY(ifp
->if_delegated
.type
== 0);
6278 VERIFY(ifp
->if_delegated
.family
== 0);
6279 VERIFY(ifp
->if_delegated
.subfamily
== 0);
6280 VERIFY(ifp
->if_delegated
.expensive
== 0);
6282 /* QoS marking get cleared */
6283 ifp
->if_eflags
&= ~IFEF_QOSMARKING_ENABLED
;
6284 if_set_qosmarking_mode(ifp
, IFRTYPE_QOSMARKING_MODE_NONE
);
6286 ifnet_lock_done(ifp
);
6290 * Detach this interface from packet filter, if enabled.
6292 pf_ifnet_hook(ifp
, 0);
6295 /* Filter list should be empty */
6296 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
6297 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
6298 VERIFY(ifp
->if_flt_busy
== 0);
6299 VERIFY(ifp
->if_flt_waiters
== 0);
6300 lck_mtx_unlock(&ifp
->if_flt_lock
);
6302 /* Last chance to drain send queue */
6305 /* Last chance to cleanup any cached route */
6306 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6307 VERIFY(!ifp
->if_fwd_cacheok
);
6308 ROUTE_RELEASE(&ifp
->if_fwd_route
);
6309 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
6310 ROUTE_RELEASE(&ifp
->if_src_route
);
6311 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
6312 ROUTE_RELEASE(&ifp
->if_src_route6
);
6313 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
6314 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6316 VERIFY(ifp
->if_data_threshold
== 0);
6318 ifnet_llreach_ifdetach(ifp
);
6320 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
6323 * Finally, mark this ifnet as detached.
6325 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6326 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6327 panic("%s: flags mismatch (detaching not set) ifp=%p",
6331 ifp
->if_refflags
&= ~IFRF_DETACHING
;
6332 lck_mtx_unlock(&ifp
->if_ref_lock
);
6333 if (if_free
!= NULL
)
6337 printf("%s: detached\n", if_name(ifp
));
6339 /* Release reference held during ifnet attach */
6344 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
6352 ifp_if_start(struct ifnet
*ifp
)
6358 ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
6359 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
6360 boolean_t poll
, struct thread
*tp
)
6362 #pragma unused(ifp, m_tail, s, poll, tp)
6363 m_freem_list(m_head
);
6368 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
6369 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
6371 #pragma unused(ifp, flags, max_cnt)
6383 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
6385 #pragma unused(ifp, cmd, arglen, arg)
6386 return (EOPNOTSUPP
);
6390 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
6392 #pragma unused(ifp, fh, pf)
6394 return (EJUSTRETURN
);
6398 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
6399 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
6401 #pragma unused(ifp, pf, da, dc)
6406 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
6408 #pragma unused(ifp, pf)
6413 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
6415 #pragma unused(ifp, sa)
6416 return (EOPNOTSUPP
);
6420 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
6421 const struct sockaddr
*sa
, const char *ll
, const char *t
)
6423 #pragma unused(ifp, m, sa, ll, t)
6424 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
));
6428 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
6429 const struct sockaddr
*sa
, const char *ll
, const char *t
,
6430 u_int32_t
*pre
, u_int32_t
*post
)
6432 #pragma unused(ifp, sa, ll, t)
6441 return (EJUSTRETURN
);
6445 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
6447 #pragma unused(ifp, cmd, arg)
6448 return (EOPNOTSUPP
);
6452 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
6454 #pragma unused(ifp, tm, f)
6455 /* XXX not sure what to do here */
6460 ifp_if_free(struct ifnet
*ifp
)
6466 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
6468 #pragma unused(ifp, e)
6472 int dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
6473 size_t uniqueid_len
, struct ifnet
**ifp
)
6475 struct ifnet
*ifp1
= NULL
;
6476 struct dlil_ifnet
*dlifp1
= NULL
;
6477 void *buf
, *base
, **pbuf
;
6481 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
6482 ifp1
= (struct ifnet
*)dlifp1
;
6484 if (ifp1
->if_family
!= family
)
6487 lck_mtx_lock(&dlifp1
->dl_if_lock
);
6488 /* same uniqueid and same len or no unique id specified */
6489 if ((uniqueid_len
== dlifp1
->dl_if_uniqueid_len
) &&
6490 bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
) == 0) {
6491 /* check for matching interface in use */
6492 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
6495 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6499 dlifp1
->dl_if_flags
|= (DLIF_INUSE
|DLIF_REUSE
);
6500 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6505 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6508 /* no interface found, allocate a new one */
6509 buf
= zalloc(dlif_zone
);
6514 bzero(buf
, dlif_bufsize
);
6516 /* Get the 64-bit aligned base address for this object */
6517 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
6518 sizeof (u_int64_t
));
6519 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
6522 * Wind back a pointer size from the aligned base and
6523 * save the original address so we can free it later.
6525 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
6530 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
6532 if (dlifp1
->dl_if_uniqueid
== NULL
) {
6533 zfree(dlif_zone
, dlifp1
);
6537 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
6538 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
6541 ifp1
= (struct ifnet
*)dlifp1
;
6542 dlifp1
->dl_if_flags
= DLIF_INUSE
;
6544 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
6545 dlifp1
->dl_if_trace
= dlil_if_trace
;
6547 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
6548 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
6550 /* initialize interface description */
6551 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
6552 ifp1
->if_desc
.ifd_len
= 0;
6553 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
6556 mac_ifnet_label_init(ifp1
);
6559 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
6560 DLIL_PRINTF("%s: failed to allocate if local stats, "
6561 "error: %d\n", __func__
, ret
);
6562 /* This probably shouldn't be fatal */
6566 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6567 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6568 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6569 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6570 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
6572 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6574 lck_rw_init(&ifp1
->if_inetdata_lock
, ifnet_lock_group
,
6576 ifp1
->if_inetdata
= NULL
;
6579 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
,
6581 ifp1
->if_inet6data
= NULL
;
6583 lck_rw_init(&ifp1
->if_link_status_lock
, ifnet_lock_group
,
6585 ifp1
->if_link_status
= NULL
;
6587 /* for send data paths */
6588 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
6590 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
6592 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
6595 /* for receive data paths */
6596 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
6599 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
6606 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof (u_int64_t
)) &&
6607 IS_P2ALIGNED(&ifp1
->if_data
, sizeof (u_int64_t
))));
6612 __private_extern__
void
6613 dlil_if_release(ifnet_t ifp
)
6615 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
6617 ifnet_lock_exclusive(ifp
);
6618 lck_mtx_lock(&dlifp
->dl_if_lock
);
6619 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
6620 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
6621 ifp
->if_name
= dlifp
->dl_if_namestorage
;
6622 /* Reset external name (name + unit) */
6623 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
6624 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
6625 "%s?", ifp
->if_name
);
6626 lck_mtx_unlock(&dlifp
->dl_if_lock
);
6629 * We can either recycle the MAC label here or in dlil_if_acquire().
6630 * It seems logical to do it here but this means that anything that
6631 * still has a handle on ifp will now see it as unlabeled.
6632 * Since the interface is "dead" that may be OK. Revisit later.
6634 mac_ifnet_label_recycle(ifp
);
6636 ifnet_lock_done(ifp
);
6639 __private_extern__
void
6642 lck_mtx_lock(&dlil_ifnet_lock
);
6645 __private_extern__
void
6646 dlil_if_unlock(void)
6648 lck_mtx_unlock(&dlil_ifnet_lock
);
6651 __private_extern__
void
6652 dlil_if_lock_assert(void)
6654 lck_mtx_assert(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
6657 __private_extern__
void
6658 dlil_proto_unplumb_all(struct ifnet
*ifp
)
6661 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6662 * each bucket contains exactly one entry; PF_VLAN does not need an
6665 * if_proto_hash[3] is for other protocols; we expect anything
6666 * in this bucket to respond to the DETACHING event (which would
6667 * have happened by now) and do the unplumb then.
6669 (void) proto_unplumb(PF_INET
, ifp
);
6671 (void) proto_unplumb(PF_INET6
, ifp
);
6676 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
6678 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6679 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6681 route_copyout(dst
, &ifp
->if_src_route
, sizeof (*dst
));
6683 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6687 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
6689 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6690 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6692 if (ifp
->if_fwd_cacheok
) {
6693 route_copyin(src
, &ifp
->if_src_route
, sizeof (*src
));
6697 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6702 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
6704 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6705 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6707 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
6710 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6714 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
6716 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6717 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6719 if (ifp
->if_fwd_cacheok
) {
6720 route_copyin((struct route
*)src
,
6721 (struct route
*)&ifp
->if_src_route6
, sizeof (*src
));
6725 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6730 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
6732 struct route src_rt
;
6733 struct sockaddr_in
*dst
;
6735 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
6737 ifp_src_route_copyout(ifp
, &src_rt
);
6739 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
6740 ROUTE_RELEASE(&src_rt
);
6741 if (dst
->sin_family
!= AF_INET
) {
6742 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
6743 dst
->sin_len
= sizeof (src_rt
.ro_dst
);
6744 dst
->sin_family
= AF_INET
;
6746 dst
->sin_addr
= src_ip
;
6748 if (src_rt
.ro_rt
== NULL
) {
6749 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
6750 0, 0, ifp
->if_index
);
6752 if (src_rt
.ro_rt
!= NULL
) {
6753 /* retain a ref, copyin consumes one */
6754 struct rtentry
*rte
= src_rt
.ro_rt
;
6756 ifp_src_route_copyin(ifp
, &src_rt
);
6762 return (src_rt
.ro_rt
);
6767 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
6769 struct route_in6 src_rt
;
6771 ifp_src_route6_copyout(ifp
, &src_rt
);
6773 if (ROUTE_UNUSABLE(&src_rt
) ||
6774 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
6775 ROUTE_RELEASE(&src_rt
);
6776 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
6777 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
6778 src_rt
.ro_dst
.sin6_len
= sizeof (src_rt
.ro_dst
);
6779 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
6781 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
6782 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
6783 sizeof (src_rt
.ro_dst
.sin6_addr
));
6785 if (src_rt
.ro_rt
== NULL
) {
6786 src_rt
.ro_rt
= rtalloc1_scoped(
6787 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
6790 if (src_rt
.ro_rt
!= NULL
) {
6791 /* retain a ref, copyin consumes one */
6792 struct rtentry
*rte
= src_rt
.ro_rt
;
6794 ifp_src_route6_copyin(ifp
, &src_rt
);
6800 return (src_rt
.ro_rt
);
6805 if_lqm_update(struct ifnet
*ifp
, int lqm
, int locked
)
6807 struct kev_dl_link_quality_metric_data ev_lqm_data
;
6809 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
6811 /* Normalize to edge */
6812 if (lqm
>= 0 && lqm
<= IFNET_LQM_THRESH_BAD
)
6813 lqm
= IFNET_LQM_THRESH_BAD
;
6814 else if (lqm
> IFNET_LQM_THRESH_BAD
&& lqm
<= IFNET_LQM_THRESH_POOR
)
6815 lqm
= IFNET_LQM_THRESH_POOR
;
6816 else if (lqm
> IFNET_LQM_THRESH_POOR
&& lqm
<= IFNET_LQM_THRESH_GOOD
)
6817 lqm
= IFNET_LQM_THRESH_GOOD
;
6820 * Take the lock if needed
6823 ifnet_lock_exclusive(ifp
);
6825 if (lqm
== ifp
->if_interface_state
.lqm_state
&&
6826 (ifp
->if_interface_state
.valid_bitmask
&
6827 IF_INTERFACE_STATE_LQM_STATE_VALID
)) {
6829 * Release the lock if was not held by the caller
6832 ifnet_lock_done(ifp
);
6833 return; /* nothing to update */
6835 ifp
->if_interface_state
.valid_bitmask
|=
6836 IF_INTERFACE_STATE_LQM_STATE_VALID
;
6837 ifp
->if_interface_state
.lqm_state
= lqm
;
6840 * Don't want to hold the lock when issuing kernel events
6842 ifnet_lock_done(ifp
);
6844 bzero(&ev_lqm_data
, sizeof (ev_lqm_data
));
6845 ev_lqm_data
.link_quality_metric
= lqm
;
6847 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
6848 (struct net_event_data
*)&ev_lqm_data
, sizeof (ev_lqm_data
));
6851 * Reacquire the lock for the caller
6854 ifnet_lock_exclusive(ifp
);
6858 if_rrc_state_update(struct ifnet
*ifp
, unsigned int rrc_state
)
6860 struct kev_dl_rrc_state kev
;
6862 if (rrc_state
== ifp
->if_interface_state
.rrc_state
&&
6863 (ifp
->if_interface_state
.valid_bitmask
&
6864 IF_INTERFACE_STATE_RRC_STATE_VALID
))
6867 ifp
->if_interface_state
.valid_bitmask
|=
6868 IF_INTERFACE_STATE_RRC_STATE_VALID
;
6870 ifp
->if_interface_state
.rrc_state
= rrc_state
;
6873 * Don't want to hold the lock when issuing kernel events
6875 ifnet_lock_done(ifp
);
6877 bzero(&kev
, sizeof(struct kev_dl_rrc_state
));
6878 kev
.rrc_state
= rrc_state
;
6880 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_RRC_STATE_CHANGED
,
6881 (struct net_event_data
*)&kev
, sizeof(struct kev_dl_rrc_state
));
6883 ifnet_lock_exclusive(ifp
);
6887 if_state_update(struct ifnet
*ifp
,
6888 struct if_interface_state
*if_interface_state
)
6890 u_short if_index_available
= 0;
6892 ifnet_lock_exclusive(ifp
);
6894 if ((ifp
->if_type
!= IFT_CELLULAR
) &&
6895 (if_interface_state
->valid_bitmask
&
6896 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
6897 ifnet_lock_done(ifp
);
6900 if ((if_interface_state
->valid_bitmask
&
6901 IF_INTERFACE_STATE_LQM_STATE_VALID
) &&
6902 (if_interface_state
->lqm_state
< IFNET_LQM_MIN
||
6903 if_interface_state
->lqm_state
> IFNET_LQM_MAX
)) {
6904 ifnet_lock_done(ifp
);
6907 if ((if_interface_state
->valid_bitmask
&
6908 IF_INTERFACE_STATE_RRC_STATE_VALID
) &&
6909 if_interface_state
->rrc_state
!=
6910 IF_INTERFACE_STATE_RRC_STATE_IDLE
&&
6911 if_interface_state
->rrc_state
!=
6912 IF_INTERFACE_STATE_RRC_STATE_CONNECTED
) {
6913 ifnet_lock_done(ifp
);
6917 if (if_interface_state
->valid_bitmask
&
6918 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
6919 if_lqm_update(ifp
, if_interface_state
->lqm_state
, 1);
6921 if (if_interface_state
->valid_bitmask
&
6922 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
6923 if_rrc_state_update(ifp
, if_interface_state
->rrc_state
);
6925 if (if_interface_state
->valid_bitmask
&
6926 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
6927 ifp
->if_interface_state
.valid_bitmask
|=
6928 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
6929 ifp
->if_interface_state
.interface_availability
=
6930 if_interface_state
->interface_availability
;
6932 if (ifp
->if_interface_state
.interface_availability
==
6933 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
) {
6934 if_index_available
= ifp
->if_index
;
6937 ifnet_lock_done(ifp
);
6940 * Check if the TCP connections going on this interface should be
6941 * forced to send probe packets instead of waiting for TCP timers
6942 * to fire. This will be done when there is an explicit
6943 * notification that the interface became available.
6945 if (if_index_available
> 0)
6946 tcp_interface_send_probe(if_index_available
);
6952 if_get_state(struct ifnet
*ifp
,
6953 struct if_interface_state
*if_interface_state
)
6955 ifnet_lock_shared(ifp
);
6957 if_interface_state
->valid_bitmask
= 0;
6959 if (ifp
->if_interface_state
.valid_bitmask
&
6960 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
6961 if_interface_state
->valid_bitmask
|=
6962 IF_INTERFACE_STATE_RRC_STATE_VALID
;
6963 if_interface_state
->rrc_state
=
6964 ifp
->if_interface_state
.rrc_state
;
6966 if (ifp
->if_interface_state
.valid_bitmask
&
6967 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
6968 if_interface_state
->valid_bitmask
|=
6969 IF_INTERFACE_STATE_LQM_STATE_VALID
;
6970 if_interface_state
->lqm_state
=
6971 ifp
->if_interface_state
.lqm_state
;
6973 if (ifp
->if_interface_state
.valid_bitmask
&
6974 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
6975 if_interface_state
->valid_bitmask
|=
6976 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
6977 if_interface_state
->interface_availability
=
6978 ifp
->if_interface_state
.interface_availability
;
6981 ifnet_lock_done(ifp
);
6985 if_probe_connectivity(struct ifnet
*ifp
, u_int32_t conn_probe
)
6987 ifnet_lock_exclusive(ifp
);
6988 if (conn_probe
> 1) {
6989 ifnet_lock_done(ifp
);
6992 if (conn_probe
== 0)
6993 ifp
->if_eflags
&= ~IFEF_PROBE_CONNECTIVITY
;
6995 ifp
->if_eflags
|= IFEF_PROBE_CONNECTIVITY
;
6996 ifnet_lock_done(ifp
);
6998 tcp_probe_connectivity(ifp
, conn_probe
);
7004 uuid_get_ethernet(u_int8_t
*node
)
7007 struct sockaddr_dl
*sdl
;
7009 ifnet_head_lock_shared();
7010 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
7011 ifnet_lock_shared(ifp
);
7012 IFA_LOCK_SPIN(ifp
->if_lladdr
);
7013 sdl
= (struct sockaddr_dl
*)(void *)ifp
->if_lladdr
->ifa_addr
;
7014 if (sdl
->sdl_type
== IFT_ETHER
) {
7015 memcpy(node
, LLADDR(sdl
), ETHER_ADDR_LEN
);
7016 IFA_UNLOCK(ifp
->if_lladdr
);
7017 ifnet_lock_done(ifp
);
7021 IFA_UNLOCK(ifp
->if_lladdr
);
7022 ifnet_lock_done(ifp
);
7030 sysctl_rxpoll SYSCTL_HANDLER_ARGS
7032 #pragma unused(arg1, arg2)
7038 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7039 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7042 if (net_rxpoll
== 0)
7050 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7052 #pragma unused(arg1, arg2)
7056 q
= if_rxpoll_mode_holdtime
;
7058 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7059 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7062 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
)
7063 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
7065 if_rxpoll_mode_holdtime
= q
;
7071 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7073 #pragma unused(arg1, arg2)
7077 q
= if_rxpoll_sample_holdtime
;
7079 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7080 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7083 if (q
< IF_RXPOLL_SAMPLETIME_MIN
)
7084 q
= IF_RXPOLL_SAMPLETIME_MIN
;
7086 if_rxpoll_sample_holdtime
= q
;
7092 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7094 #pragma unused(arg1, arg2)
7098 q
= if_rxpoll_interval_time
;
7100 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7101 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7104 if (q
< IF_RXPOLL_INTERVALTIME_MIN
)
7105 q
= IF_RXPOLL_INTERVALTIME_MIN
;
7107 if_rxpoll_interval_time
= q
;
7113 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7115 #pragma unused(arg1, arg2)
7119 i
= if_rxpoll_wlowat
;
7121 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7122 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7125 if (i
== 0 || i
>= if_rxpoll_whiwat
)
7128 if_rxpoll_wlowat
= i
;
7133 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7135 #pragma unused(arg1, arg2)
7139 i
= if_rxpoll_whiwat
;
7141 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7142 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7145 if (i
<= if_rxpoll_wlowat
)
7148 if_rxpoll_whiwat
= i
;
7153 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
7155 #pragma unused(arg1, arg2)
7160 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7161 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7164 if (i
< IF_SNDQ_MINLEN
)
7172 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
7174 #pragma unused(arg1, arg2)
7179 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7180 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7183 if (i
< IF_RCVQ_MINLEN
)
7191 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
7192 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
7194 struct kev_dl_node_presence kev
;
7195 struct sockaddr_dl
*sdl
;
7196 struct sockaddr_in6
*sin6
;
7200 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
7202 bzero(&kev
, sizeof (kev
));
7203 sin6
= &kev
.sin6_node_address
;
7204 sdl
= &kev
.sdl_node_address
;
7205 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
7207 kev
.link_quality_metric
= lqm
;
7208 kev
.node_proximity_metric
= npm
;
7209 bcopy(srvinfo
, kev
.node_service_info
, sizeof (kev
.node_service_info
));
7211 nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
7212 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
7213 &kev
.link_data
, sizeof (kev
));
7217 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
7219 struct kev_dl_node_absence kev
;
7220 struct sockaddr_in6
*sin6
;
7221 struct sockaddr_dl
*sdl
;
7225 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
7227 bzero(&kev
, sizeof (kev
));
7228 sin6
= &kev
.sin6_node_address
;
7229 sdl
= &kev
.sdl_node_address
;
7230 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
7232 nd6_alt_node_absent(ifp
, sin6
);
7233 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
7234 &kev
.link_data
, sizeof (kev
));
7238 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
7239 kauth_cred_t
*credp
)
7241 const u_int8_t
*bytes
;
7244 bytes
= CONST_LLADDR(sdl
);
7245 size
= sdl
->sdl_alen
;
7248 if (dlil_lladdr_ckreq
) {
7249 switch (sdl
->sdl_type
) {
7258 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
7259 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
7263 switch (sdl
->sdl_type
) {
7265 VERIFY(size
== ETHER_ADDR_LEN
);
7269 VERIFY(size
== FIREWIRE_EUI64_LEN
);
7279 #pragma unused(credp)
7282 if (sizep
!= NULL
) *sizep
= size
;
7287 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
7288 u_int8_t info
[DLIL_MODARGLEN
])
7290 struct kev_dl_issues kev
;
7293 VERIFY(ifp
!= NULL
);
7294 VERIFY(modid
!= NULL
);
7295 _CASSERT(sizeof (kev
.modid
) == DLIL_MODIDLEN
);
7296 _CASSERT(sizeof (kev
.info
) == DLIL_MODARGLEN
);
7298 bzero(&kev
, sizeof (kev
));
7301 kev
.timestamp
= tv
.tv_sec
;
7302 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
7304 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
7306 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
7307 &kev
.link_data
, sizeof (kev
));
7311 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7314 u_int32_t level
= IFNET_THROTTLE_OFF
;
7317 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
7319 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
7321 * XXX: Use priv_check_cred() instead of root check?
7323 if ((result
= proc_suser(p
)) != 0)
7326 if (ifr
->ifr_opportunistic
.ifo_flags
==
7327 IFRIFOF_BLOCK_OPPORTUNISTIC
)
7328 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
7329 else if (ifr
->ifr_opportunistic
.ifo_flags
== 0)
7330 level
= IFNET_THROTTLE_OFF
;
7335 result
= ifnet_set_throttle(ifp
, level
);
7336 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
7337 ifr
->ifr_opportunistic
.ifo_flags
= 0;
7338 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
7339 ifr
->ifr_opportunistic
.ifo_flags
|=
7340 IFRIFOF_BLOCK_OPPORTUNISTIC
;
7345 * Return the count of current opportunistic connections
7346 * over the interface.
7350 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
7351 INPCB_OPPORTUNISTIC_SETCMD
: 0;
7352 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
7353 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
7354 ifr
->ifr_opportunistic
.ifo_inuse
=
7355 udp_count_opportunistic(ifp
->if_index
, flags
) +
7356 tcp_count_opportunistic(ifp
->if_index
, flags
);
7359 if (result
== EALREADY
)
7366 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
7368 struct ifclassq
*ifq
;
7371 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7374 *level
= IFNET_THROTTLE_OFF
;
7378 /* Throttling works only for IFCQ, not ALTQ instances */
7379 if (IFCQ_IS_ENABLED(ifq
))
7380 IFCQ_GET_THROTTLE(ifq
, *level
, err
);
7387 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
7389 struct ifclassq
*ifq
;
7392 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7398 case IFNET_THROTTLE_OFF
:
7399 case IFNET_THROTTLE_OPPORTUNISTIC
:
7401 /* Throttling works only for IFCQ, not ALTQ instances */
7402 if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq
)))
7404 #endif /* PF_ALTQ */
7411 if (IFCQ_IS_ENABLED(ifq
))
7412 IFCQ_SET_THROTTLE(ifq
, level
, err
);
7416 printf("%s: throttling level set to %d\n", if_name(ifp
),
7418 if (level
== IFNET_THROTTLE_OFF
)
7426 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7432 int level
, category
, subcategory
;
7434 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
7436 if (cmd
== SIOCSIFLOG
) {
7437 if ((result
= priv_check_cred(kauth_cred_get(),
7438 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0)
7441 level
= ifr
->ifr_log
.ifl_level
;
7442 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
)
7445 flags
= ifr
->ifr_log
.ifl_flags
;
7446 if ((flags
&= IFNET_LOGF_MASK
) == 0)
7449 category
= ifr
->ifr_log
.ifl_category
;
7450 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
7453 result
= ifnet_set_log(ifp
, level
, flags
,
7454 category
, subcategory
);
7456 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
7459 ifr
->ifr_log
.ifl_level
= level
;
7460 ifr
->ifr_log
.ifl_flags
= flags
;
7461 ifr
->ifr_log
.ifl_category
= category
;
7462 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
7470 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
7471 int32_t category
, int32_t subcategory
)
7475 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
7476 VERIFY(flags
& IFNET_LOGF_MASK
);
7479 * The logging level applies to all facilities; make sure to
7480 * update them all with the most current level.
7482 flags
|= ifp
->if_log
.flags
;
7484 if (ifp
->if_output_ctl
!= NULL
) {
7485 struct ifnet_log_params l
;
7487 bzero(&l
, sizeof (l
));
7490 l
.flags
&= ~IFNET_LOGF_DLIL
;
7491 l
.category
= category
;
7492 l
.subcategory
= subcategory
;
7494 /* Send this request to lower layers */
7496 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
7499 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
7501 * If targeted to the lower layers without an output
7502 * control callback registered on the interface, just
7503 * silently ignore facilities other than ours.
7505 flags
&= IFNET_LOGF_DLIL
;
7506 if (flags
== 0 && (!(ifp
->if_log
.flags
& IFNET_LOGF_DLIL
)))
7511 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
)
7512 ifp
->if_log
.flags
= 0;
7514 ifp
->if_log
.flags
|= flags
;
7516 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
7517 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
7518 ifp
->if_log
.level
, ifp
->if_log
.flags
,
7519 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
7520 category
, subcategory
);
7527 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
7528 int32_t *category
, int32_t *subcategory
)
7531 *level
= ifp
->if_log
.level
;
7533 *flags
= ifp
->if_log
.flags
;
7534 if (category
!= NULL
)
7535 *category
= ifp
->if_log
.category
;
7536 if (subcategory
!= NULL
)
7537 *subcategory
= ifp
->if_log
.subcategory
;
7543 ifnet_notify_address(struct ifnet
*ifp
, int af
)
7545 struct ifnet_notify_address_params na
;
7548 (void) pf_ifaddr_hook(ifp
);
7551 if (ifp
->if_output_ctl
== NULL
)
7552 return (EOPNOTSUPP
);
7554 bzero(&na
, sizeof (na
));
7555 na
.address_family
= af
;
7557 return (ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
7562 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
7564 if (ifp
== NULL
|| flowid
== NULL
) {
7566 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7567 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
7571 *flowid
= ifp
->if_flowhash
;
7577 ifnet_disable_output(struct ifnet
*ifp
)
7583 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7584 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
7588 if ((err
= ifnet_fc_add(ifp
)) == 0) {
7589 lck_mtx_lock_spin(&ifp
->if_start_lock
);
7590 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
7591 lck_mtx_unlock(&ifp
->if_start_lock
);
7597 ifnet_enable_output(struct ifnet
*ifp
)
7601 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7602 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
7606 ifnet_start_common(ifp
, 1);
7611 ifnet_flowadv(uint32_t flowhash
)
7613 struct ifnet_fc_entry
*ifce
;
7616 ifce
= ifnet_fc_get(flowhash
);
7620 VERIFY(ifce
->ifce_ifp
!= NULL
);
7621 ifp
= ifce
->ifce_ifp
;
7623 /* flow hash gets recalculated per attach, so check */
7624 if (ifnet_is_attached(ifp
, 1)) {
7625 if (ifp
->if_flowhash
== flowhash
)
7626 (void) ifnet_enable_output(ifp
);
7627 ifnet_decr_iorefcnt(ifp
);
7629 ifnet_fc_entry_free(ifce
);
7633 * Function to compare ifnet_fc_entries in ifnet flow control tree
7636 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
7638 return (fc1
->ifce_flowhash
- fc2
->ifce_flowhash
);
7642 ifnet_fc_add(struct ifnet
*ifp
)
7644 struct ifnet_fc_entry keyfc
, *ifce
;
7647 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
7648 VERIFY(ifp
->if_flowhash
!= 0);
7649 flowhash
= ifp
->if_flowhash
;
7651 bzero(&keyfc
, sizeof (keyfc
));
7652 keyfc
.ifce_flowhash
= flowhash
;
7654 lck_mtx_lock_spin(&ifnet_fc_lock
);
7655 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
7656 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
7657 /* Entry is already in ifnet_fc_tree, return */
7658 lck_mtx_unlock(&ifnet_fc_lock
);
7664 * There is a different fc entry with the same flow hash
7665 * but different ifp pointer. There can be a collision
7666 * on flow hash but the probability is low. Let's just
7667 * avoid adding a second one when there is a collision.
7669 lck_mtx_unlock(&ifnet_fc_lock
);
7673 /* become regular mutex */
7674 lck_mtx_convert_spin(&ifnet_fc_lock
);
7676 ifce
= zalloc_noblock(ifnet_fc_zone
);
7678 /* memory allocation failed */
7679 lck_mtx_unlock(&ifnet_fc_lock
);
7682 bzero(ifce
, ifnet_fc_zone_size
);
7684 ifce
->ifce_flowhash
= flowhash
;
7685 ifce
->ifce_ifp
= ifp
;
7687 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
7688 lck_mtx_unlock(&ifnet_fc_lock
);
7692 static struct ifnet_fc_entry
*
7693 ifnet_fc_get(uint32_t flowhash
)
7695 struct ifnet_fc_entry keyfc
, *ifce
;
7698 bzero(&keyfc
, sizeof (keyfc
));
7699 keyfc
.ifce_flowhash
= flowhash
;
7701 lck_mtx_lock_spin(&ifnet_fc_lock
);
7702 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
7704 /* Entry is not present in ifnet_fc_tree, return */
7705 lck_mtx_unlock(&ifnet_fc_lock
);
7709 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
7711 VERIFY(ifce
->ifce_ifp
!= NULL
);
7712 ifp
= ifce
->ifce_ifp
;
7714 /* become regular mutex */
7715 lck_mtx_convert_spin(&ifnet_fc_lock
);
7717 if (!ifnet_is_attached(ifp
, 0)) {
7719 * This ifp is not attached or in the process of being
7720 * detached; just don't process it.
7722 ifnet_fc_entry_free(ifce
);
7725 lck_mtx_unlock(&ifnet_fc_lock
);
7731 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
7733 zfree(ifnet_fc_zone
, ifce
);
7737 ifnet_calc_flowhash(struct ifnet
*ifp
)
7739 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
7740 uint32_t flowhash
= 0;
7742 if (ifnet_flowhash_seed
== 0)
7743 ifnet_flowhash_seed
= RandomULong();
7745 bzero(&fh
, sizeof (fh
));
7747 (void) snprintf(fh
.ifk_name
, sizeof (fh
.ifk_name
), "%s", ifp
->if_name
);
7748 fh
.ifk_unit
= ifp
->if_unit
;
7749 fh
.ifk_flags
= ifp
->if_flags
;
7750 fh
.ifk_eflags
= ifp
->if_eflags
;
7751 fh
.ifk_capabilities
= ifp
->if_capabilities
;
7752 fh
.ifk_capenable
= ifp
->if_capenable
;
7753 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
7754 fh
.ifk_rand1
= RandomULong();
7755 fh
.ifk_rand2
= RandomULong();
7758 flowhash
= net_flowhash(&fh
, sizeof (fh
), ifnet_flowhash_seed
);
7759 if (flowhash
== 0) {
7760 /* try to get a non-zero flowhash */
7761 ifnet_flowhash_seed
= RandomULong();
7769 ifnet_set_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t len
,
7770 uint16_t flags
, uint8_t *data
)
7772 #pragma unused(flags)
7777 if_inetdata_lock_exclusive(ifp
);
7778 if (IN_IFEXTRA(ifp
) != NULL
) {
7780 /* Allow clearing the signature */
7781 IN_IFEXTRA(ifp
)->netsig_len
= 0;
7782 bzero(IN_IFEXTRA(ifp
)->netsig
,
7783 sizeof (IN_IFEXTRA(ifp
)->netsig
));
7784 if_inetdata_lock_done(ifp
);
7786 } else if (len
> sizeof (IN_IFEXTRA(ifp
)->netsig
)) {
7788 if_inetdata_lock_done(ifp
);
7791 IN_IFEXTRA(ifp
)->netsig_len
= len
;
7792 bcopy(data
, IN_IFEXTRA(ifp
)->netsig
, len
);
7796 if_inetdata_lock_done(ifp
);
7800 if_inet6data_lock_exclusive(ifp
);
7801 if (IN6_IFEXTRA(ifp
) != NULL
) {
7803 /* Allow clearing the signature */
7804 IN6_IFEXTRA(ifp
)->netsig_len
= 0;
7805 bzero(IN6_IFEXTRA(ifp
)->netsig
,
7806 sizeof (IN6_IFEXTRA(ifp
)->netsig
));
7807 if_inet6data_lock_done(ifp
);
7809 } else if (len
> sizeof (IN6_IFEXTRA(ifp
)->netsig
)) {
7811 if_inet6data_lock_done(ifp
);
7814 IN6_IFEXTRA(ifp
)->netsig_len
= len
;
7815 bcopy(data
, IN6_IFEXTRA(ifp
)->netsig
, len
);
7819 if_inet6data_lock_done(ifp
);
7831 ifnet_get_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t *len
,
7832 uint16_t *flags
, uint8_t *data
)
7836 if (ifp
== NULL
|| len
== NULL
|| flags
== NULL
|| data
== NULL
)
7841 if_inetdata_lock_shared(ifp
);
7842 if (IN_IFEXTRA(ifp
) != NULL
) {
7843 if (*len
== 0 || *len
< IN_IFEXTRA(ifp
)->netsig_len
) {
7845 if_inetdata_lock_done(ifp
);
7848 if ((*len
= IN_IFEXTRA(ifp
)->netsig_len
) > 0)
7849 bcopy(IN_IFEXTRA(ifp
)->netsig
, data
, *len
);
7855 if_inetdata_lock_done(ifp
);
7859 if_inet6data_lock_shared(ifp
);
7860 if (IN6_IFEXTRA(ifp
) != NULL
) {
7861 if (*len
== 0 || *len
< IN6_IFEXTRA(ifp
)->netsig_len
) {
7863 if_inet6data_lock_done(ifp
);
7866 if ((*len
= IN6_IFEXTRA(ifp
)->netsig_len
) > 0)
7867 bcopy(IN6_IFEXTRA(ifp
)->netsig
, data
, *len
);
7873 if_inet6data_lock_done(ifp
);
7888 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
7889 protocol_family_t pf
)
7894 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
7895 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
|CSUM_TSO_IPV6
)))
7900 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
7901 if (did_sw
& CSUM_DELAY_IP
)
7902 hwcksum_dbg_finalized_hdr
++;
7903 if (did_sw
& CSUM_DELAY_DATA
)
7904 hwcksum_dbg_finalized_data
++;
7909 * Checksum offload should not have been enabled when
7910 * extension headers exist; that also means that we
7911 * cannot force-finalize packets with extension headers.
7912 * Indicate to the callee should it skip such case by
7913 * setting optlen to -1.
7915 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
7916 m
->m_pkthdr
.csum_flags
);
7917 if (did_sw
& CSUM_DELAY_IPV6_DATA
)
7918 hwcksum_dbg_finalized_data
++;
7927 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
7928 protocol_family_t pf
)
7933 if (frame_header
== NULL
||
7934 frame_header
< (char *)mbuf_datastart(m
) ||
7935 frame_header
> (char *)m
->m_data
) {
7936 printf("%s: frame header pointer 0x%llx out of range "
7937 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
7938 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
7939 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
7940 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
7941 (uint64_t)VM_KERNEL_ADDRPERM(m
));
7944 hlen
= (m
->m_data
- frame_header
);
7957 * Force partial checksum offload; useful to simulate cases
7958 * where the hardware does not support partial checksum offload,
7959 * in order to validate correctness throughout the layers above.
7961 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
7962 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
7964 if (foff
> (uint32_t)m
->m_pkthdr
.len
)
7967 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
7969 /* Compute 16-bit 1's complement sum from forced offset */
7970 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
7972 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
7973 m
->m_pkthdr
.csum_rx_val
= sum
;
7974 m
->m_pkthdr
.csum_rx_start
= (foff
+ hlen
);
7976 hwcksum_dbg_partial_forced
++;
7977 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
7981 * Partial checksum offload verification (and adjustment);
7982 * useful to validate and test cases where the hardware
7983 * supports partial checksum offload.
7985 if ((m
->m_pkthdr
.csum_flags
&
7986 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
7987 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
7990 /* Start offset must begin after frame header */
7991 rxoff
= m
->m_pkthdr
.csum_rx_start
;
7993 hwcksum_dbg_bad_rxoff
++;
7995 printf("%s: partial cksum start offset %d "
7996 "is less than frame header length %d for "
7997 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
7998 (uint64_t)VM_KERNEL_ADDRPERM(m
));
8004 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
8006 * Compute the expected 16-bit 1's complement sum;
8007 * skip this if we've already computed it above
8008 * when partial checksum offload is forced.
8010 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
8012 /* Hardware or driver is buggy */
8013 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
8014 hwcksum_dbg_bad_cksum
++;
8016 printf("%s: bad partial cksum value "
8017 "0x%x (expected 0x%x) for mbuf "
8018 "0x%llx [rx_start %d]\n",
8020 m
->m_pkthdr
.csum_rx_val
, sum
,
8021 (uint64_t)VM_KERNEL_ADDRPERM(m
),
8022 m
->m_pkthdr
.csum_rx_start
);
8027 hwcksum_dbg_verified
++;
8030 * This code allows us to emulate various hardwares that
8031 * perform 16-bit 1's complement sum beginning at various
8032 * start offset values.
8034 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
8035 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
8037 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
)
8040 sum
= m_adj_sum16(m
, rxoff
, aoff
, sum
);
8042 m
->m_pkthdr
.csum_rx_val
= sum
;
8043 m
->m_pkthdr
.csum_rx_start
= (aoff
+ hlen
);
8045 hwcksum_dbg_adjusted
++;
8051 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8053 #pragma unused(arg1, arg2)
8057 i
= hwcksum_dbg_mode
;
8059 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8060 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8063 if (hwcksum_dbg
== 0)
8066 if ((i
& ~HWCKSUM_DBG_MASK
) != 0)
8069 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
8075 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8077 #pragma unused(arg1, arg2)
8081 i
= hwcksum_dbg_partial_rxoff_forced
;
8083 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8084 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8087 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
))
8090 hwcksum_dbg_partial_rxoff_forced
= i
;
8096 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8098 #pragma unused(arg1, arg2)
8102 i
= hwcksum_dbg_partial_rxoff_adj
;
8104 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8105 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8108 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
))
8111 hwcksum_dbg_partial_rxoff_adj
= i
;
8117 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8119 #pragma unused(oidp, arg1, arg2)
8122 if (req
->oldptr
== USER_ADDR_NULL
) {
8125 if (req
->newptr
!= USER_ADDR_NULL
) {
8128 err
= SYSCTL_OUT(req
, &tx_chain_len_stats
,
8129 sizeof(struct chain_len_stats
));
8136 /* Blob for sum16 verification */
8137 static uint8_t sumdata
[] = {
8138 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8139 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8140 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8141 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8142 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8143 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8144 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8145 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8146 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8147 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8148 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8149 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8150 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8151 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8152 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8153 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8154 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8155 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8156 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8157 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8158 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8159 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8160 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8161 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8162 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8163 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8164 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8165 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8166 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8167 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8168 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8169 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8170 0xc8, 0x28, 0x02, 0x00, 0x00
8173 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
8189 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8192 dlil_verify_sum16(void)
8198 /* Make sure test data plus extra room for alignment fits in cluster */
8199 _CASSERT((sizeof (sumdata
) + (sizeof (uint64_t) * 2)) <= MCLBYTES
);
8201 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
8202 MH_ALIGN(m
, sizeof (uint32_t)); /* 32-bit starting alignment */
8203 buf
= mtod(m
, uint8_t *); /* base address */
8205 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
8206 uint16_t len
= sumtbl
[n
].len
;
8209 /* Verify for all possible alignments */
8210 for (i
= 0; i
< (int)sizeof (uint64_t); i
++) {
8214 /* Copy over test data to mbuf */
8215 VERIFY(len
<= sizeof (sumdata
));
8217 bcopy(sumdata
, c
, len
);
8219 /* Zero-offset test (align by data pointer) */
8220 m
->m_data
= (caddr_t
)c
;
8222 sum
= m_sum16(m
, 0, len
);
8224 /* Something is horribly broken; stop now */
8225 if (sum
!= sumtbl
[n
].sum
) {
8226 panic("%s: broken m_sum16 for len=%d align=%d "
8227 "sum=0x%04x [expected=0x%04x]\n", __func__
,
8228 len
, i
, sum
, sumtbl
[n
].sum
);
8232 /* Alignment test by offset (fixed data pointer) */
8233 m
->m_data
= (caddr_t
)buf
;
8235 sum
= m_sum16(m
, i
, len
);
8237 /* Something is horribly broken; stop now */
8238 if (sum
!= sumtbl
[n
].sum
) {
8239 panic("%s: broken m_sum16 for len=%d offset=%d "
8240 "sum=0x%04x [expected=0x%04x]\n", __func__
,
8241 len
, i
, sum
, sumtbl
[n
].sum
);
8245 /* Simple sum16 contiguous buffer test by aligment */
8246 sum
= b_sum16(c
, len
);
8248 /* Something is horribly broken; stop now */
8249 if (sum
!= sumtbl
[n
].sum
) {
8250 panic("%s: broken b_sum16 for len=%d align=%d "
8251 "sum=0x%04x [expected=0x%04x]\n", __func__
,
8252 len
, i
, sum
, sumtbl
[n
].sum
);
8260 printf("DLIL: SUM16 self-tests PASSED\n");
8264 #define CASE_STRINGIFY(x) case x: return #x
8266 __private_extern__
const char *
8267 dlil_kev_dl_code_str(u_int32_t event_code
)
8269 switch (event_code
) {
8270 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
8271 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
8272 CASE_STRINGIFY(KEV_DL_SIFMTU
);
8273 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
8274 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
8275 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
8276 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
8277 CASE_STRINGIFY(KEV_DL_DELMULTI
);
8278 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
8279 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
8280 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
8281 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
8282 CASE_STRINGIFY(KEV_DL_LINK_ON
);
8283 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
8284 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
8285 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
8286 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
8287 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
8288 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
8289 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
8290 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
8291 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
8292 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
8293 CASE_STRINGIFY(KEV_DL_ISSUES
);
8294 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);
8302 * Mirror the arguments of ifnet_get_local_ports_extended()
8308 sysctl_get_ports_used SYSCTL_HANDLER_ARGS
8310 #pragma unused(oidp)
8311 int *name
= (int *)arg1
;
8315 protocol_family_t protocol
;
8318 u_int8_t
*bitfield
= NULL
;
8320 if (req
->newptr
!= USER_ADDR_NULL
) {
8329 if (req
->oldptr
== USER_ADDR_NULL
) {
8330 req
->oldidx
= bitstr_size(65536);
8333 if (req
->oldlen
< bitstr_size(65536)) {
8342 ifnet_head_lock_shared();
8343 if (idx
> if_index
) {
8348 ifp
= ifindex2ifnet
[idx
];
8351 bitfield
= _MALLOC(bitstr_size(65536), M_TEMP
, M_WAITOK
);
8352 if (bitfield
== NULL
) {
8356 error
= ifnet_get_local_ports_extended(ifp
, protocol
, flags
, bitfield
);
8358 printf("%s: ifnet_get_local_ports_extended() error %d\n",
8362 error
= SYSCTL_OUT(req
, bitfield
, bitstr_size(65536));
8364 if (bitfield
!= NULL
)
8365 _FREE(bitfield
, M_TEMP
);
8369 #if (DEVELOPMENT || DEBUG)
8371 * The sysctl variable name contains the input parameters of
8372 * ifnet_get_keepalive_offload_frames()
8373 * ifp (interface index): name[0]
8374 * frames_array_count: name[1]
8375 * frame_data_offset: name[2]
8376 * The return length gives used_frames_count
8379 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8381 #pragma unused(oidp)
8382 int *name
= (int *)arg1
;
8383 u_int namelen
= arg2
;
8386 u_int32_t frames_array_count
;
8387 size_t frame_data_offset
;
8388 u_int32_t used_frames_count
;
8389 struct ifnet_keepalive_offload_frame
*frames_array
= NULL
;
8394 * Only root can get look at other people TCP frames
8396 error
= proc_suser(current_proc());
8400 * Validate the input parameters
8402 if (req
->newptr
!= USER_ADDR_NULL
) {
8410 if (req
->oldptr
== USER_ADDR_NULL
) {
8414 if (req
->oldlen
== 0) {
8419 frames_array_count
= name
[1];
8420 frame_data_offset
= name
[2];
8422 /* Make sure the passed buffer is large enough */
8423 if (frames_array_count
* sizeof(struct ifnet_keepalive_offload_frame
) >
8429 ifnet_head_lock_shared();
8430 if (idx
> if_index
) {
8435 ifp
= ifindex2ifnet
[idx
];
8438 frames_array
= _MALLOC(frames_array_count
*
8439 sizeof(struct ifnet_keepalive_offload_frame
), M_TEMP
, M_WAITOK
);
8440 if (frames_array
== NULL
) {
8445 error
= ifnet_get_keepalive_offload_frames(ifp
, frames_array
,
8446 frames_array_count
, frame_data_offset
, &used_frames_count
);
8448 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8453 for (i
= 0; i
< used_frames_count
; i
++) {
8454 error
= SYSCTL_OUT(req
, frames_array
+ i
,
8455 sizeof(struct ifnet_keepalive_offload_frame
));
8461 if (frames_array
!= NULL
)
8462 _FREE(frames_array
, M_TEMP
);
8465 #endif /* DEVELOPMENT || DEBUG */