]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/dlil.c
50e85b2742365d0c9737c5bb0ddcc8a9aa86449f
[apple/xnu.git] / bsd / net / dlil.c
1 /*
2 * Copyright (c) 1999-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
43 #include <sys/user.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
47 #include <net/if.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
50 #include <net/dlil.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
59 #include <sys/priv.h>
60
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
67
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_llreach.h>
71 #include <net/kpi_interfacefilter.h>
72 #include <net/classq/classq.h>
73 #include <net/classq/classq_sfb.h>
74 #include <net/flowhash.h>
75 #include <net/ntstat.h>
76 #include <net/if_llatbl.h>
77 #include <net/net_api_stats.h>
78
79 #if INET
80 #include <netinet/in_var.h>
81 #include <netinet/igmp_var.h>
82 #include <netinet/ip_var.h>
83 #include <netinet/tcp.h>
84 #include <netinet/tcp_var.h>
85 #include <netinet/udp.h>
86 #include <netinet/udp_var.h>
87 #include <netinet/if_ether.h>
88 #include <netinet/in_pcb.h>
89 #include <netinet/in_tclass.h>
90 #endif /* INET */
91
92 #if INET6
93 #include <netinet6/in6_var.h>
94 #include <netinet6/nd6.h>
95 #include <netinet6/mld6_var.h>
96 #include <netinet6/scope6_var.h>
97 #endif /* INET6 */
98
99 #include <libkern/OSAtomic.h>
100 #include <libkern/tree.h>
101
102 #include <dev/random/randomdev.h>
103 #include <machine/machine_routines.h>
104
105 #include <mach/thread_act.h>
106 #include <mach/sdt.h>
107
108 #if CONFIG_MACF
109 #include <sys/kauth.h>
110 #include <security/mac_framework.h>
111 #include <net/ethernet.h>
112 #include <net/firewire.h>
113 #endif
114
115 #if PF
116 #include <net/pfvar.h>
117 #endif /* PF */
118 #include <net/pktsched/pktsched.h>
119
120 #if NECP
121 #include <net/necp.h>
122 #endif /* NECP */
123
124
125 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
126 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
127 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
128 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
129 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
130
131 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
132 #define MAX_LINKADDR 4 /* LONGWORDS */
133 #define M_NKE M_IFADDR
134
135 #if 1
136 #define DLIL_PRINTF printf
137 #else
138 #define DLIL_PRINTF kprintf
139 #endif
140
141 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
142 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
143
144 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
145 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
146
147 enum {
148 kProtoKPI_v1 = 1,
149 kProtoKPI_v2 = 2
150 };
151
152 /*
153 * List of if_proto structures in if_proto_hash[] is protected by
154 * the ifnet lock. The rest of the fields are initialized at protocol
155 * attach time and never change, thus no lock required as long as
156 * a reference to it is valid, via if_proto_ref().
157 */
158 struct if_proto {
159 SLIST_ENTRY(if_proto) next_hash;
160 u_int32_t refcount;
161 u_int32_t detached;
162 struct ifnet *ifp;
163 protocol_family_t protocol_family;
164 int proto_kpi;
165 union {
166 struct {
167 proto_media_input input;
168 proto_media_preout pre_output;
169 proto_media_event event;
170 proto_media_ioctl ioctl;
171 proto_media_detached detached;
172 proto_media_resolve_multi resolve_multi;
173 proto_media_send_arp send_arp;
174 } v1;
175 struct {
176 proto_media_input_v2 input;
177 proto_media_preout pre_output;
178 proto_media_event event;
179 proto_media_ioctl ioctl;
180 proto_media_detached detached;
181 proto_media_resolve_multi resolve_multi;
182 proto_media_send_arp send_arp;
183 } v2;
184 } kpi;
185 };
186
187 SLIST_HEAD(proto_hash_entry, if_proto);
188
189 #define DLIL_SDLDATALEN \
190 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
191
192 struct dlil_ifnet {
193 struct ifnet dl_if; /* public ifnet */
194 /*
195 * DLIL private fields, protected by dl_if_lock
196 */
197 decl_lck_mtx_data(, dl_if_lock);
198 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
199 u_int32_t dl_if_flags; /* flags (below) */
200 u_int32_t dl_if_refcnt; /* refcnt */
201 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
202 void *dl_if_uniqueid; /* unique interface id */
203 size_t dl_if_uniqueid_len; /* length of the unique id */
204 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
205 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
206 struct {
207 struct ifaddr ifa; /* lladdr ifa */
208 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
209 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
210 } dl_if_lladdr;
211 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
212 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
213 ctrace_t dl_if_attach; /* attach PC stacktrace */
214 ctrace_t dl_if_detach; /* detach PC stacktrace */
215 };
216
217 /* Values for dl_if_flags (private to DLIL) */
218 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
219 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
220 #define DLIF_DEBUG 0x4 /* has debugging info */
221
222 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
223
224 /* For gdb */
225 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
226
227 struct dlil_ifnet_dbg {
228 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
229 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
230 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
231 /*
232 * Circular lists of ifnet_{reference,release} callers.
233 */
234 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
235 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
236 };
237
238 #define DLIL_TO_IFP(s) (&s->dl_if)
239 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
240
241 struct ifnet_filter {
242 TAILQ_ENTRY(ifnet_filter) filt_next;
243 u_int32_t filt_skip;
244 u_int32_t filt_flags;
245 ifnet_t filt_ifp;
246 const char *filt_name;
247 void *filt_cookie;
248 protocol_family_t filt_protocol;
249 iff_input_func filt_input;
250 iff_output_func filt_output;
251 iff_event_func filt_event;
252 iff_ioctl_func filt_ioctl;
253 iff_detached_func filt_detached;
254 };
255
256 struct proto_input_entry;
257
258 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
259 static lck_grp_t *dlil_lock_group;
260 lck_grp_t *ifnet_lock_group;
261 static lck_grp_t *ifnet_head_lock_group;
262 static lck_grp_t *ifnet_snd_lock_group;
263 static lck_grp_t *ifnet_rcv_lock_group;
264 lck_attr_t *ifnet_lock_attr;
265 decl_lck_rw_data(static, ifnet_head_lock);
266 decl_lck_mtx_data(static, dlil_ifnet_lock);
267 u_int32_t dlil_filter_disable_tso_count = 0;
268
269 #if DEBUG
270 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
271 #else
272 static unsigned int ifnet_debug; /* debugging (disabled) */
273 #endif /* !DEBUG */
274 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
275 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
276 static struct zone *dlif_zone; /* zone for dlil_ifnet */
277
278 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
279 #define DLIF_ZONE_NAME "ifnet" /* zone name */
280
281 static unsigned int dlif_filt_size; /* size of ifnet_filter */
282 static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
283
284 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
285 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
286
287 static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
288 static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
289
290 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
291 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
292
293 static unsigned int dlif_proto_size; /* size of if_proto */
294 static struct zone *dlif_proto_zone; /* zone for if_proto */
295
296 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
297 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
298
299 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
300 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
301 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
302
303 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
304 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
305
306 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
307 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
308 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
309
310 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
311 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
312
313 static u_int32_t net_rtref;
314
315 static struct dlil_main_threading_info dlil_main_input_thread_info;
316 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
317 (struct dlil_threading_info *)&dlil_main_input_thread_info;
318
319 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
320 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
321 static void dlil_if_trace(struct dlil_ifnet *, int);
322 static void if_proto_ref(struct if_proto *);
323 static void if_proto_free(struct if_proto *);
324 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
325 static int dlil_ifp_proto_count(struct ifnet *);
326 static void if_flt_monitor_busy(struct ifnet *);
327 static void if_flt_monitor_unbusy(struct ifnet *);
328 static void if_flt_monitor_enter(struct ifnet *);
329 static void if_flt_monitor_leave(struct ifnet *);
330 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
331 char **, protocol_family_t);
332 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
333 protocol_family_t);
334 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
335 const struct sockaddr_dl *);
336 static int ifnet_lookup(struct ifnet *);
337 static void if_purgeaddrs(struct ifnet *);
338
339 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
340 struct mbuf *, char *);
341 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
342 struct mbuf *);
343 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
344 mbuf_t *, const struct sockaddr *, void *, char *, char *);
345 static void ifproto_media_event(struct ifnet *, protocol_family_t,
346 const struct kev_msg *);
347 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
348 unsigned long, void *);
349 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
350 struct sockaddr_dl *, size_t);
351 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
352 const struct sockaddr_dl *, const struct sockaddr *,
353 const struct sockaddr_dl *, const struct sockaddr *);
354
355 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
356 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
357 boolean_t poll, struct thread *tp);
358 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
359 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
360 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
361 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
362 protocol_family_t *);
363 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
364 const struct ifnet_demux_desc *, u_int32_t);
365 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
366 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
367 #if CONFIG_EMBEDDED
368 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
369 const struct sockaddr *, const char *, const char *,
370 u_int32_t *, u_int32_t *);
371 #else
372 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
373 const struct sockaddr *, const char *, const char *);
374 #endif /* CONFIG_EMBEDDED */
375 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
376 const struct sockaddr *, const char *, const char *,
377 u_int32_t *, u_int32_t *);
378 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
379 static void ifp_if_free(struct ifnet *);
380 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
381 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
382 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
383
384 static void dlil_main_input_thread_func(void *, wait_result_t);
385 static void dlil_input_thread_func(void *, wait_result_t);
386 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
387 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
388 static void dlil_terminate_input_thread(struct dlil_threading_info *);
389 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
390 struct dlil_threading_info *, boolean_t);
391 static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
392 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
393 u_int32_t, ifnet_model_t, boolean_t);
394 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
395 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
396
397 #if DEBUG || DEVELOPMENT
398 static void dlil_verify_sum16(void);
399 #endif /* DEBUG || DEVELOPMENT */
400 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
401 protocol_family_t);
402 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
403 protocol_family_t);
404
405 static void ifnet_detacher_thread_func(void *, wait_result_t);
406 static int ifnet_detacher_thread_cont(int);
407 static void ifnet_detach_final(struct ifnet *);
408 static void ifnet_detaching_enqueue(struct ifnet *);
409 static struct ifnet *ifnet_detaching_dequeue(void);
410
411 static void ifnet_start_thread_fn(void *, wait_result_t);
412 static void ifnet_poll_thread_fn(void *, wait_result_t);
413 static void ifnet_poll(struct ifnet *);
414 static errno_t ifnet_enqueue_common(struct ifnet *, void *,
415 classq_pkt_type_t, boolean_t, boolean_t *);
416
417 static void ifp_src_route_copyout(struct ifnet *, struct route *);
418 static void ifp_src_route_copyin(struct ifnet *, struct route *);
419 #if INET6
420 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
421 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
422 #endif /* INET6 */
423
424 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
425 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
426 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
427 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
428 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
429 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
430 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
431 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
432 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
433 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
434 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
435 static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS;
436
437 struct chain_len_stats tx_chain_len_stats;
438 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
439
440 #if TEST_INPUT_THREAD_TERMINATION
441 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
442 #endif /* TEST_INPUT_THREAD_TERMINATION */
443
444 /* The following are protected by dlil_ifnet_lock */
445 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
446 static u_int32_t ifnet_detaching_cnt;
447 static void *ifnet_delayed_run; /* wait channel for detaching thread */
448
449 decl_lck_mtx_data(static, ifnet_fc_lock);
450
451 static uint32_t ifnet_flowhash_seed;
452
453 struct ifnet_flowhash_key {
454 char ifk_name[IFNAMSIZ];
455 uint32_t ifk_unit;
456 uint32_t ifk_flags;
457 uint32_t ifk_eflags;
458 uint32_t ifk_capabilities;
459 uint32_t ifk_capenable;
460 uint32_t ifk_output_sched_model;
461 uint32_t ifk_rand1;
462 uint32_t ifk_rand2;
463 };
464
465 /* Flow control entry per interface */
466 struct ifnet_fc_entry {
467 RB_ENTRY(ifnet_fc_entry) ifce_entry;
468 u_int32_t ifce_flowhash;
469 struct ifnet *ifce_ifp;
470 };
471
472 static uint32_t ifnet_calc_flowhash(struct ifnet *);
473 static int ifce_cmp(const struct ifnet_fc_entry *,
474 const struct ifnet_fc_entry *);
475 static int ifnet_fc_add(struct ifnet *);
476 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
477 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
478
479 /* protected by ifnet_fc_lock */
480 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
481 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
482 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
483
484 static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
485 static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
486
487 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
488 #define IFNET_FC_ZONE_MAX 32
489
490 extern void bpfdetach(struct ifnet *);
491 extern void proto_input_run(void);
492
493 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
494 u_int32_t flags);
495 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
496 u_int32_t flags);
497
498 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
499
500 #if CONFIG_MACF
501 #ifdef CONFIG_EMBEDDED
502 int dlil_lladdr_ckreq = 1;
503 #else
504 int dlil_lladdr_ckreq = 0;
505 #endif
506 #endif
507
508 #if DEBUG
509 int dlil_verbose = 1;
510 #else
511 int dlil_verbose = 0;
512 #endif /* DEBUG */
513 #if IFNET_INPUT_SANITY_CHK
514 /* sanity checking of input packet lists received */
515 static u_int32_t dlil_input_sanity_check = 0;
516 #endif /* IFNET_INPUT_SANITY_CHK */
517 /* rate limit debug messages */
518 struct timespec dlil_dbgrate = { 1, 0 };
519
520 SYSCTL_DECL(_net_link_generic_system);
521
522 #if CONFIG_MACF
523 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_lladdr_ckreq,
524 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_lladdr_ckreq, 0,
525 "Require MACF system info check to expose link-layer address");
526 #endif
527
528 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
529 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
530
531 #define IF_SNDQ_MINLEN 32
532 u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
533 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
534 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
535 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
536
537 #define IF_RCVQ_MINLEN 32
538 #define IF_RCVQ_MAXLEN 256
539 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
540 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
541 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
542 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
543
544 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
545 static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
546 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
547 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
548 "ilog2 of EWMA decay rate of avg inbound packets");
549
550 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
551 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
552 static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
553 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
554 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
555 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
556 "Q", "input poll mode freeze time");
557
558 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
559 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
560 static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
561 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
562 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
563 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
564 "Q", "input poll sampling time");
565
566 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
567 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
568 static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
569 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
570 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
571 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
572 "Q", "input poll interval (time)");
573
574 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
575 static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
576 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
577 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
578 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
579
580 #define IF_RXPOLL_WLOWAT 10
581 static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
582 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
583 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
584 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
585 "I", "input poll wakeup low watermark");
586
587 #define IF_RXPOLL_WHIWAT 100
588 static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
589 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
590 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
591 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
592 "I", "input poll wakeup high watermark");
593
594 static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
595 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
596 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
597 "max packets per poll call");
598
599 static u_int32_t if_rxpoll = 1;
600 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
601 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
602 sysctl_rxpoll, "I", "enable opportunistic input polling");
603
604 #if TEST_INPUT_THREAD_TERMINATION
605 static u_int32_t if_input_thread_termination_spin = 0;
606 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
607 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
608 &if_input_thread_termination_spin, 0,
609 sysctl_input_thread_termination_spin,
610 "I", "input thread termination spin limit");
611 #endif /* TEST_INPUT_THREAD_TERMINATION */
612
613 static u_int32_t cur_dlil_input_threads = 0;
614 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
615 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
616 "Current number of DLIL input threads");
617
618 #if IFNET_INPUT_SANITY_CHK
619 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
620 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
621 "Turn on sanity checking in DLIL input");
622 #endif /* IFNET_INPUT_SANITY_CHK */
623
624 static u_int32_t if_flowadv = 1;
625 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
626 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
627 "enable flow-advisory mechanism");
628
629 static u_int32_t if_delaybased_queue = 1;
630 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
631 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
632 "enable delay based dynamic queue sizing");
633
634 static uint64_t hwcksum_in_invalidated = 0;
635 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
636 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
637 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
638
639 uint32_t hwcksum_dbg = 0;
640 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
641 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
642 "enable hardware cksum debugging");
643
644 u_int32_t ifnet_start_delayed = 0;
645 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
646 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
647 "number of times start was delayed");
648
649 u_int32_t ifnet_delay_start_disabled = 0;
650 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
651 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
652 "number of times start was delayed");
653
654 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
655 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
656 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
657 #define HWCKSUM_DBG_MASK \
658 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
659 HWCKSUM_DBG_FINALIZE_FORCED)
660
661 static uint32_t hwcksum_dbg_mode = 0;
662 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
663 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
664 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
665
666 static uint64_t hwcksum_dbg_partial_forced = 0;
667 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
668 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
669 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
670
671 static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
672 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
673 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
674 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
675
676 static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
677 SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
678 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
679 &hwcksum_dbg_partial_rxoff_forced, 0,
680 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
681 "forced partial cksum rx offset");
682
683 static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
684 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
685 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
686 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
687 "adjusted partial cksum rx offset");
688
689 static uint64_t hwcksum_dbg_verified = 0;
690 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
691 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
692 &hwcksum_dbg_verified, "packets verified for having good checksum");
693
694 static uint64_t hwcksum_dbg_bad_cksum = 0;
695 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
696 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
697 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
698
699 static uint64_t hwcksum_dbg_bad_rxoff = 0;
700 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
701 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
702 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
703
704 static uint64_t hwcksum_dbg_adjusted = 0;
705 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
706 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
707 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
708
709 static uint64_t hwcksum_dbg_finalized_hdr = 0;
710 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
711 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
712 &hwcksum_dbg_finalized_hdr, "finalized headers");
713
714 static uint64_t hwcksum_dbg_finalized_data = 0;
715 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
716 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
717 &hwcksum_dbg_finalized_data, "finalized payloads");
718
719 uint32_t hwcksum_tx = 1;
720 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
721 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
722 "enable transmit hardware checksum offload");
723
724 uint32_t hwcksum_rx = 1;
725 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
726 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
727 "enable receive hardware checksum offload");
728
729 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
730 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
731 sysctl_tx_chain_len_stats, "S", "");
732
733 uint32_t tx_chain_len_count = 0;
734 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
735 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
736
737 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_ports_used,
738 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_ports_used, "");
739
740 static uint32_t threshold_notify = 1; /* enable/disable */
741 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
742 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
743
744 static uint32_t threshold_interval = 2; /* in seconds */
745 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
746 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
747
748 #if (DEVELOPMENT || DEBUG)
749 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
750 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
751 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
752 #endif /* DEVELOPMENT || DEBUG */
753
754 struct net_api_stats net_api_stats;
755 SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD|CTLFLAG_LOCKED,
756 &net_api_stats, net_api_stats, "");
757
758
759 unsigned int net_rxpoll = 1;
760 unsigned int net_affinity = 1;
761 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
762
763 extern u_int32_t inject_buckets;
764
765 static lck_grp_attr_t *dlil_grp_attributes = NULL;
766 static lck_attr_t *dlil_lck_attributes = NULL;
767
768 /* DLIL data threshold thread call */
769 static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
770
771 static void dlil_mit_tcall_fn(thread_call_param_t, thread_call_param_t);
772
773 uint32_t dlil_rcv_mit_pkts_min = 5;
774 uint32_t dlil_rcv_mit_pkts_max = 64;
775 uint32_t dlil_rcv_mit_interval = (500 * 1000);
776
777 #if (DEVELOPMENT || DEBUG)
778 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_min,
779 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_min, 0, "");
780 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_max,
781 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_max, 0, "");
782 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_interval,
783 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_interval, 0, "");
784 #endif /* DEVELOPMENT || DEBUG */
785
786
787 #define DLIL_INPUT_CHECK(m, ifp) { \
788 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
789 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
790 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
791 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
792 /* NOTREACHED */ \
793 } \
794 }
795
796 #define DLIL_EWMA(old, new, decay) do { \
797 u_int32_t _avg; \
798 if ((_avg = (old)) > 0) \
799 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
800 else \
801 _avg = (new); \
802 (old) = _avg; \
803 } while (0)
804
805 #define MBPS (1ULL * 1000 * 1000)
806 #define GBPS (MBPS * 1000)
807
808 struct rxpoll_time_tbl {
809 u_int64_t speed; /* downlink speed */
810 u_int32_t plowat; /* packets low watermark */
811 u_int32_t phiwat; /* packets high watermark */
812 u_int32_t blowat; /* bytes low watermark */
813 u_int32_t bhiwat; /* bytes high watermark */
814 };
815
816 static struct rxpoll_time_tbl rxpoll_tbl[] = {
817 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
818 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
819 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
820 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
821 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
822 { 0, 0, 0, 0, 0 }
823 };
824
825 int
826 proto_hash_value(u_int32_t protocol_family)
827 {
828 /*
829 * dlil_proto_unplumb_all() depends on the mapping between
830 * the hash bucket index and the protocol family defined
831 * here; future changes must be applied there as well.
832 */
833 switch (protocol_family) {
834 case PF_INET:
835 return (0);
836 case PF_INET6:
837 return (1);
838 case PF_VLAN:
839 return (2);
840 case PF_UNSPEC:
841 default:
842 return (3);
843 }
844 }
845
846 /*
847 * Caller must already be holding ifnet lock.
848 */
849 static struct if_proto *
850 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
851 {
852 struct if_proto *proto = NULL;
853 u_int32_t i = proto_hash_value(protocol_family);
854
855 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
856
857 if (ifp->if_proto_hash != NULL)
858 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
859
860 while (proto != NULL && proto->protocol_family != protocol_family)
861 proto = SLIST_NEXT(proto, next_hash);
862
863 if (proto != NULL)
864 if_proto_ref(proto);
865
866 return (proto);
867 }
868
869 static void
870 if_proto_ref(struct if_proto *proto)
871 {
872 atomic_add_32(&proto->refcount, 1);
873 }
874
875 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
876
877 static void
878 if_proto_free(struct if_proto *proto)
879 {
880 u_int32_t oldval;
881 struct ifnet *ifp = proto->ifp;
882 u_int32_t proto_family = proto->protocol_family;
883 struct kev_dl_proto_data ev_pr_data;
884
885 oldval = atomic_add_32_ov(&proto->refcount, -1);
886 if (oldval > 1)
887 return;
888
889 /* No more reference on this, protocol must have been detached */
890 VERIFY(proto->detached);
891
892 if (proto->proto_kpi == kProtoKPI_v1) {
893 if (proto->kpi.v1.detached)
894 proto->kpi.v1.detached(ifp, proto->protocol_family);
895 }
896 if (proto->proto_kpi == kProtoKPI_v2) {
897 if (proto->kpi.v2.detached)
898 proto->kpi.v2.detached(ifp, proto->protocol_family);
899 }
900
901 /*
902 * Cleanup routes that may still be in the routing table for that
903 * interface/protocol pair.
904 */
905 if_rtproto_del(ifp, proto_family);
906
907 /*
908 * The reserved field carries the number of protocol still attached
909 * (subject to change)
910 */
911 ifnet_lock_shared(ifp);
912 ev_pr_data.proto_family = proto_family;
913 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
914 ifnet_lock_done(ifp);
915
916 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
917 (struct net_event_data *)&ev_pr_data,
918 sizeof (struct kev_dl_proto_data));
919
920 zfree(dlif_proto_zone, proto);
921 }
922
923 __private_extern__ void
924 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
925 {
926 #if !MACH_ASSERT
927 #pragma unused(ifp)
928 #endif
929 unsigned int type = 0;
930 int ass = 1;
931
932 switch (what) {
933 case IFNET_LCK_ASSERT_EXCLUSIVE:
934 type = LCK_RW_ASSERT_EXCLUSIVE;
935 break;
936
937 case IFNET_LCK_ASSERT_SHARED:
938 type = LCK_RW_ASSERT_SHARED;
939 break;
940
941 case IFNET_LCK_ASSERT_OWNED:
942 type = LCK_RW_ASSERT_HELD;
943 break;
944
945 case IFNET_LCK_ASSERT_NOTOWNED:
946 /* nothing to do here for RW lock; bypass assert */
947 ass = 0;
948 break;
949
950 default:
951 panic("bad ifnet assert type: %d", what);
952 /* NOTREACHED */
953 }
954 if (ass)
955 LCK_RW_ASSERT(&ifp->if_lock, type);
956 }
957
958 __private_extern__ void
959 ifnet_lock_shared(struct ifnet *ifp)
960 {
961 lck_rw_lock_shared(&ifp->if_lock);
962 }
963
964 __private_extern__ void
965 ifnet_lock_exclusive(struct ifnet *ifp)
966 {
967 lck_rw_lock_exclusive(&ifp->if_lock);
968 }
969
970 __private_extern__ void
971 ifnet_lock_done(struct ifnet *ifp)
972 {
973 lck_rw_done(&ifp->if_lock);
974 }
975
976 #if INET
977 __private_extern__ void
978 if_inetdata_lock_shared(struct ifnet *ifp)
979 {
980 lck_rw_lock_shared(&ifp->if_inetdata_lock);
981 }
982
983 __private_extern__ void
984 if_inetdata_lock_exclusive(struct ifnet *ifp)
985 {
986 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
987 }
988
989 __private_extern__ void
990 if_inetdata_lock_done(struct ifnet *ifp)
991 {
992 lck_rw_done(&ifp->if_inetdata_lock);
993 }
994 #endif
995
996 #if INET6
997 __private_extern__ void
998 if_inet6data_lock_shared(struct ifnet *ifp)
999 {
1000 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1001 }
1002
1003 __private_extern__ void
1004 if_inet6data_lock_exclusive(struct ifnet *ifp)
1005 {
1006 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1007 }
1008
1009 __private_extern__ void
1010 if_inet6data_lock_done(struct ifnet *ifp)
1011 {
1012 lck_rw_done(&ifp->if_inet6data_lock);
1013 }
1014 #endif
1015
1016 __private_extern__ void
1017 ifnet_head_lock_shared(void)
1018 {
1019 lck_rw_lock_shared(&ifnet_head_lock);
1020 }
1021
1022 __private_extern__ void
1023 ifnet_head_lock_exclusive(void)
1024 {
1025 lck_rw_lock_exclusive(&ifnet_head_lock);
1026 }
1027
1028 __private_extern__ void
1029 ifnet_head_done(void)
1030 {
1031 lck_rw_done(&ifnet_head_lock);
1032 }
1033
1034 __private_extern__ void
1035 ifnet_head_assert_exclusive(void)
1036 {
1037 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1038 }
1039
1040 /*
1041 * Caller must already be holding ifnet lock.
1042 */
1043 static int
1044 dlil_ifp_proto_count(struct ifnet *ifp)
1045 {
1046 int i, count = 0;
1047
1048 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1049
1050 if (ifp->if_proto_hash == NULL)
1051 goto done;
1052
1053 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1054 struct if_proto *proto;
1055 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1056 count++;
1057 }
1058 }
1059 done:
1060 return (count);
1061 }
1062
1063 __private_extern__ void
1064 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1065 u_int32_t event_code, struct net_event_data *event_data,
1066 u_int32_t event_data_len)
1067 {
1068 struct net_event_data ev_data;
1069 struct kev_msg ev_msg;
1070
1071 bzero(&ev_msg, sizeof (ev_msg));
1072 bzero(&ev_data, sizeof (ev_data));
1073 /*
1074 * a net event always starts with a net_event_data structure
1075 * but the caller can generate a simple net event or
1076 * provide a longer event structure to post
1077 */
1078 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1079 ev_msg.kev_class = KEV_NETWORK_CLASS;
1080 ev_msg.kev_subclass = event_subclass;
1081 ev_msg.event_code = event_code;
1082
1083 if (event_data == NULL) {
1084 event_data = &ev_data;
1085 event_data_len = sizeof (struct net_event_data);
1086 }
1087
1088 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1089 event_data->if_family = ifp->if_family;
1090 event_data->if_unit = (u_int32_t)ifp->if_unit;
1091
1092 ev_msg.dv[0].data_length = event_data_len;
1093 ev_msg.dv[0].data_ptr = event_data;
1094 ev_msg.dv[1].data_length = 0;
1095
1096 /* Don't update interface generation for quality and RRC state changess */
1097 bool update_generation = (event_subclass != KEV_DL_SUBCLASS ||
1098 (event_code != KEV_DL_LINK_QUALITY_METRIC_CHANGED &&
1099 event_code != KEV_DL_RRC_STATE_CHANGED));
1100
1101 dlil_event_internal(ifp, &ev_msg, update_generation);
1102 }
1103
1104 __private_extern__ int
1105 dlil_alloc_local_stats(struct ifnet *ifp)
1106 {
1107 int ret = EINVAL;
1108 void *buf, *base, **pbuf;
1109
1110 if (ifp == NULL)
1111 goto end;
1112
1113 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1114 /* allocate tcpstat_local structure */
1115 buf = zalloc(dlif_tcpstat_zone);
1116 if (buf == NULL) {
1117 ret = ENOMEM;
1118 goto end;
1119 }
1120 bzero(buf, dlif_tcpstat_bufsize);
1121
1122 /* Get the 64-bit aligned base address for this object */
1123 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1124 sizeof (u_int64_t));
1125 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1126 ((intptr_t)buf + dlif_tcpstat_bufsize));
1127
1128 /*
1129 * Wind back a pointer size from the aligned base and
1130 * save the original address so we can free it later.
1131 */
1132 pbuf = (void **)((intptr_t)base - sizeof (void *));
1133 *pbuf = buf;
1134 ifp->if_tcp_stat = base;
1135
1136 /* allocate udpstat_local structure */
1137 buf = zalloc(dlif_udpstat_zone);
1138 if (buf == NULL) {
1139 ret = ENOMEM;
1140 goto end;
1141 }
1142 bzero(buf, dlif_udpstat_bufsize);
1143
1144 /* Get the 64-bit aligned base address for this object */
1145 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1146 sizeof (u_int64_t));
1147 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1148 ((intptr_t)buf + dlif_udpstat_bufsize));
1149
1150 /*
1151 * Wind back a pointer size from the aligned base and
1152 * save the original address so we can free it later.
1153 */
1154 pbuf = (void **)((intptr_t)base - sizeof (void *));
1155 *pbuf = buf;
1156 ifp->if_udp_stat = base;
1157
1158 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
1159 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
1160
1161 ret = 0;
1162 }
1163
1164 if (ifp->if_ipv4_stat == NULL) {
1165 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1166 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1167 if (ifp->if_ipv4_stat == NULL) {
1168 ret = ENOMEM;
1169 goto end;
1170 }
1171 }
1172
1173 if (ifp->if_ipv6_stat == NULL) {
1174 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1175 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1176 if (ifp->if_ipv6_stat == NULL) {
1177 ret = ENOMEM;
1178 goto end;
1179 }
1180 }
1181 end:
1182 if (ret != 0) {
1183 if (ifp->if_tcp_stat != NULL) {
1184 pbuf = (void **)
1185 ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
1186 zfree(dlif_tcpstat_zone, *pbuf);
1187 ifp->if_tcp_stat = NULL;
1188 }
1189 if (ifp->if_udp_stat != NULL) {
1190 pbuf = (void **)
1191 ((intptr_t)ifp->if_udp_stat - sizeof (void *));
1192 zfree(dlif_udpstat_zone, *pbuf);
1193 ifp->if_udp_stat = NULL;
1194 }
1195 if (ifp->if_ipv4_stat != NULL) {
1196 FREE(ifp->if_ipv4_stat, M_TEMP);
1197 ifp->if_ipv4_stat = NULL;
1198 }
1199 if (ifp->if_ipv6_stat != NULL) {
1200 FREE(ifp->if_ipv6_stat, M_TEMP);
1201 ifp->if_ipv6_stat = NULL;
1202 }
1203 }
1204
1205 return (ret);
1206 }
1207
1208 static int
1209 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
1210 {
1211 thread_continue_t func;
1212 u_int32_t limit;
1213 int error;
1214
1215 /* NULL ifp indicates the main input thread, called at dlil_init time */
1216 if (ifp == NULL) {
1217 func = dlil_main_input_thread_func;
1218 VERIFY(inp == dlil_main_input_thread);
1219 (void) strlcat(inp->input_name,
1220 "main_input", DLIL_THREADNAME_LEN);
1221 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1222 func = dlil_rxpoll_input_thread_func;
1223 VERIFY(inp != dlil_main_input_thread);
1224 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1225 "%s_input_poll", if_name(ifp));
1226 } else {
1227 func = dlil_input_thread_func;
1228 VERIFY(inp != dlil_main_input_thread);
1229 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1230 "%s_input", if_name(ifp));
1231 }
1232 VERIFY(inp->input_thr == THREAD_NULL);
1233
1234 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1235 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1236
1237 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1238 inp->ifp = ifp; /* NULL for main input thread */
1239
1240 net_timerclear(&inp->mode_holdtime);
1241 net_timerclear(&inp->mode_lasttime);
1242 net_timerclear(&inp->sample_holdtime);
1243 net_timerclear(&inp->sample_lasttime);
1244 net_timerclear(&inp->dbg_lasttime);
1245
1246 /*
1247 * For interfaces that support opportunistic polling, set the
1248 * low and high watermarks for outstanding inbound packets/bytes.
1249 * Also define freeze times for transitioning between modes
1250 * and updating the average.
1251 */
1252 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1253 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1254 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1255 } else {
1256 limit = (u_int32_t)-1;
1257 }
1258
1259 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1260 if (inp == dlil_main_input_thread) {
1261 struct dlil_main_threading_info *inpm =
1262 (struct dlil_main_threading_info *)inp;
1263 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1264 }
1265
1266 error = kernel_thread_start(func, inp, &inp->input_thr);
1267 if (error == KERN_SUCCESS) {
1268 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
1269 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
1270 /*
1271 * We create an affinity set so that the matching workloop
1272 * thread or the starter thread (for loopback) can be
1273 * scheduled on the same processor set as the input thread.
1274 */
1275 if (net_affinity) {
1276 struct thread *tp = inp->input_thr;
1277 u_int32_t tag;
1278 /*
1279 * Randomize to reduce the probability
1280 * of affinity tag namespace collision.
1281 */
1282 read_frandom(&tag, sizeof (tag));
1283 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1284 thread_reference(tp);
1285 inp->tag = tag;
1286 inp->net_affinity = TRUE;
1287 }
1288 }
1289 } else if (inp == dlil_main_input_thread) {
1290 panic_plain("%s: couldn't create main input thread", __func__);
1291 /* NOTREACHED */
1292 } else {
1293 panic_plain("%s: couldn't create %s input thread", __func__,
1294 if_name(ifp));
1295 /* NOTREACHED */
1296 }
1297 OSAddAtomic(1, &cur_dlil_input_threads);
1298
1299 return (error);
1300 }
1301
1302 #if TEST_INPUT_THREAD_TERMINATION
1303 static int
1304 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1305 {
1306 #pragma unused(arg1, arg2)
1307 uint32_t i;
1308 int err;
1309
1310 i = if_input_thread_termination_spin;
1311
1312 err = sysctl_handle_int(oidp, &i, 0, req);
1313 if (err != 0 || req->newptr == USER_ADDR_NULL)
1314 return (err);
1315
1316 if (net_rxpoll == 0)
1317 return (ENXIO);
1318
1319 if_input_thread_termination_spin = i;
1320 return (err);
1321 }
1322 #endif /* TEST_INPUT_THREAD_TERMINATION */
1323
1324 static void
1325 dlil_clean_threading_info(struct dlil_threading_info *inp)
1326 {
1327 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1328 lck_grp_free(inp->lck_grp);
1329
1330 inp->input_waiting = 0;
1331 inp->wtot = 0;
1332 bzero(inp->input_name, sizeof (inp->input_name));
1333 inp->ifp = NULL;
1334 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1335 qlimit(&inp->rcvq_pkts) = 0;
1336 bzero(&inp->stats, sizeof (inp->stats));
1337
1338 VERIFY(!inp->net_affinity);
1339 inp->input_thr = THREAD_NULL;
1340 VERIFY(inp->wloop_thr == THREAD_NULL);
1341 VERIFY(inp->poll_thr == THREAD_NULL);
1342 VERIFY(inp->tag == 0);
1343
1344 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1345 bzero(&inp->tstats, sizeof (inp->tstats));
1346 bzero(&inp->pstats, sizeof (inp->pstats));
1347 bzero(&inp->sstats, sizeof (inp->sstats));
1348
1349 net_timerclear(&inp->mode_holdtime);
1350 net_timerclear(&inp->mode_lasttime);
1351 net_timerclear(&inp->sample_holdtime);
1352 net_timerclear(&inp->sample_lasttime);
1353 net_timerclear(&inp->dbg_lasttime);
1354
1355 #if IFNET_INPUT_SANITY_CHK
1356 inp->input_mbuf_cnt = 0;
1357 #endif /* IFNET_INPUT_SANITY_CHK */
1358 }
1359
1360 static void
1361 dlil_terminate_input_thread(struct dlil_threading_info *inp)
1362 {
1363 struct ifnet *ifp = inp->ifp;
1364
1365 VERIFY(current_thread() == inp->input_thr);
1366 VERIFY(inp != dlil_main_input_thread);
1367
1368 OSAddAtomic(-1, &cur_dlil_input_threads);
1369
1370 #if TEST_INPUT_THREAD_TERMINATION
1371 { /* do something useless that won't get optimized away */
1372 uint32_t v = 1;
1373 for (uint32_t i = 0;
1374 i < if_input_thread_termination_spin;
1375 i++) {
1376 v = (i + 1) * v;
1377 }
1378 printf("the value is %d\n", v);
1379 }
1380 #endif /* TEST_INPUT_THREAD_TERMINATION */
1381
1382 lck_mtx_lock_spin(&inp->input_lck);
1383 VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0);
1384 inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE;
1385 wakeup_one((caddr_t)&inp->input_waiting);
1386 lck_mtx_unlock(&inp->input_lck);
1387
1388 /* for the extra refcnt from kernel_thread_start() */
1389 thread_deallocate(current_thread());
1390
1391 if (dlil_verbose) {
1392 printf("%s: input thread terminated\n",
1393 if_name(ifp));
1394 }
1395
1396 /* this is the end */
1397 thread_terminate(current_thread());
1398 /* NOTREACHED */
1399 }
1400
1401 static kern_return_t
1402 dlil_affinity_set(struct thread *tp, u_int32_t tag)
1403 {
1404 thread_affinity_policy_data_t policy;
1405
1406 bzero(&policy, sizeof (policy));
1407 policy.affinity_tag = tag;
1408 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1409 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
1410 }
1411
1412 void
1413 dlil_init(void)
1414 {
1415 thread_t thread = THREAD_NULL;
1416
1417 /*
1418 * The following fields must be 64-bit aligned for atomic operations.
1419 */
1420 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1421 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1422 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1423 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1424 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1425 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1426 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1427 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1428 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1429 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1430 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1431 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1432 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1433 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1434 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1435
1436 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1437 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1438 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1439 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1440 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1441 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1442 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1443 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1444 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1445 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1446 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1447 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1448 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1449 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1450 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1451
1452 /*
1453 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1454 */
1455 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1456 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1457 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1458 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1459 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1460 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1461 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1462 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1463 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1464 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1465 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1466 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1467 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1468 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1469
1470 /*
1471 * ... as well as the mbuf checksum flags counterparts.
1472 */
1473 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1474 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1475 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1476 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1477 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1478 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1479 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1480 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1481 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1482 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1483 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1484
1485 /*
1486 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1487 */
1488 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1489 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1490
1491 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1492 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1493 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1494 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1495
1496 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1497 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1498 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1499
1500 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1501 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1502 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1503 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1504 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1505 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1506 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1507 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1508 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1509 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1510 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1511 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1512 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1513 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1514 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1515 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1516
1517 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1518 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1519 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1520 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1521 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1522 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1523 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1524
1525 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1526 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1527
1528 PE_parse_boot_argn("net_affinity", &net_affinity,
1529 sizeof (net_affinity));
1530
1531 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
1532
1533 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
1534
1535 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
1536
1537 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
1538 sizeof (struct dlil_ifnet_dbg);
1539 /* Enforce 64-bit alignment for dlil_ifnet structure */
1540 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
1541 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
1542 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1543 0, DLIF_ZONE_NAME);
1544 if (dlif_zone == NULL) {
1545 panic_plain("%s: failed allocating %s", __func__,
1546 DLIF_ZONE_NAME);
1547 /* NOTREACHED */
1548 }
1549 zone_change(dlif_zone, Z_EXPAND, TRUE);
1550 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1551
1552 dlif_filt_size = sizeof (struct ifnet_filter);
1553 dlif_filt_zone = zinit(dlif_filt_size,
1554 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1555 if (dlif_filt_zone == NULL) {
1556 panic_plain("%s: failed allocating %s", __func__,
1557 DLIF_FILT_ZONE_NAME);
1558 /* NOTREACHED */
1559 }
1560 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1561 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1562
1563 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
1564 dlif_phash_zone = zinit(dlif_phash_size,
1565 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1566 if (dlif_phash_zone == NULL) {
1567 panic_plain("%s: failed allocating %s", __func__,
1568 DLIF_PHASH_ZONE_NAME);
1569 /* NOTREACHED */
1570 }
1571 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1572 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1573
1574 dlif_proto_size = sizeof (struct if_proto);
1575 dlif_proto_zone = zinit(dlif_proto_size,
1576 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1577 if (dlif_proto_zone == NULL) {
1578 panic_plain("%s: failed allocating %s", __func__,
1579 DLIF_PROTO_ZONE_NAME);
1580 /* NOTREACHED */
1581 }
1582 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1583 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1584
1585 dlif_tcpstat_size = sizeof (struct tcpstat_local);
1586 /* Enforce 64-bit alignment for tcpstat_local structure */
1587 dlif_tcpstat_bufsize =
1588 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
1589 dlif_tcpstat_bufsize =
1590 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
1591 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1592 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1593 DLIF_TCPSTAT_ZONE_NAME);
1594 if (dlif_tcpstat_zone == NULL) {
1595 panic_plain("%s: failed allocating %s", __func__,
1596 DLIF_TCPSTAT_ZONE_NAME);
1597 /* NOTREACHED */
1598 }
1599 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1600 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1601
1602 dlif_udpstat_size = sizeof (struct udpstat_local);
1603 /* Enforce 64-bit alignment for udpstat_local structure */
1604 dlif_udpstat_bufsize =
1605 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
1606 dlif_udpstat_bufsize =
1607 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
1608 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1609 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1610 DLIF_UDPSTAT_ZONE_NAME);
1611 if (dlif_udpstat_zone == NULL) {
1612 panic_plain("%s: failed allocating %s", __func__,
1613 DLIF_UDPSTAT_ZONE_NAME);
1614 /* NOTREACHED */
1615 }
1616 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1617 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1618
1619 ifnet_llreach_init();
1620 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1621
1622 TAILQ_INIT(&dlil_ifnet_head);
1623 TAILQ_INIT(&ifnet_head);
1624 TAILQ_INIT(&ifnet_detaching_head);
1625 TAILQ_INIT(&ifnet_ordered_head);
1626
1627 /* Setup the lock groups we will use */
1628 dlil_grp_attributes = lck_grp_attr_alloc_init();
1629
1630 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1631 dlil_grp_attributes);
1632 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1633 dlil_grp_attributes);
1634 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1635 dlil_grp_attributes);
1636 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1637 dlil_grp_attributes);
1638 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1639 dlil_grp_attributes);
1640
1641 /* Setup the lock attributes we will use */
1642 dlil_lck_attributes = lck_attr_alloc_init();
1643
1644 ifnet_lock_attr = lck_attr_alloc_init();
1645
1646 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1647 dlil_lck_attributes);
1648 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1649
1650 /* Setup interface flow control related items */
1651 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1652
1653 ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry);
1654 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1655 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1656 if (ifnet_fc_zone == NULL) {
1657 panic_plain("%s: failed allocating %s", __func__,
1658 IFNET_FC_ZONE_NAME);
1659 /* NOTREACHED */
1660 }
1661 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1662 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
1663
1664 /* Initialize interface address subsystem */
1665 ifa_init();
1666
1667 #if PF
1668 /* Initialize the packet filter */
1669 pfinit();
1670 #endif /* PF */
1671
1672 /* Initialize queue algorithms */
1673 classq_init();
1674
1675 /* Initialize packet schedulers */
1676 pktsched_init();
1677
1678 /* Initialize flow advisory subsystem */
1679 flowadv_init();
1680
1681 /* Initialize the pktap virtual interface */
1682 pktap_init();
1683
1684 /* Initialize the service class to dscp map */
1685 net_qos_map_init();
1686
1687 #if DEBUG || DEVELOPMENT
1688 /* Run self-tests */
1689 dlil_verify_sum16();
1690 #endif /* DEBUG || DEVELOPMENT */
1691
1692 /* Initialize link layer table */
1693 lltable_glbl_init();
1694
1695 /*
1696 * Create and start up the main DLIL input thread and the interface
1697 * detacher threads once everything is initialized.
1698 */
1699 dlil_create_input_thread(NULL, dlil_main_input_thread);
1700
1701 if (kernel_thread_start(ifnet_detacher_thread_func,
1702 NULL, &thread) != KERN_SUCCESS) {
1703 panic_plain("%s: couldn't create detacher thread", __func__);
1704 /* NOTREACHED */
1705 }
1706 thread_deallocate(thread);
1707
1708 }
1709
1710 static void
1711 if_flt_monitor_busy(struct ifnet *ifp)
1712 {
1713 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1714
1715 ++ifp->if_flt_busy;
1716 VERIFY(ifp->if_flt_busy != 0);
1717 }
1718
1719 static void
1720 if_flt_monitor_unbusy(struct ifnet *ifp)
1721 {
1722 if_flt_monitor_leave(ifp);
1723 }
1724
1725 static void
1726 if_flt_monitor_enter(struct ifnet *ifp)
1727 {
1728 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1729
1730 while (ifp->if_flt_busy) {
1731 ++ifp->if_flt_waiters;
1732 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1733 (PZERO - 1), "if_flt_monitor", NULL);
1734 }
1735 if_flt_monitor_busy(ifp);
1736 }
1737
1738 static void
1739 if_flt_monitor_leave(struct ifnet *ifp)
1740 {
1741 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1742
1743 VERIFY(ifp->if_flt_busy != 0);
1744 --ifp->if_flt_busy;
1745
1746 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1747 ifp->if_flt_waiters = 0;
1748 wakeup(&ifp->if_flt_head);
1749 }
1750 }
1751
1752 __private_extern__ int
1753 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1754 interface_filter_t *filter_ref, u_int32_t flags)
1755 {
1756 int retval = 0;
1757 struct ifnet_filter *filter = NULL;
1758
1759 ifnet_head_lock_shared();
1760 /* Check that the interface is in the global list */
1761 if (!ifnet_lookup(ifp)) {
1762 retval = ENXIO;
1763 goto done;
1764 }
1765
1766 filter = zalloc(dlif_filt_zone);
1767 if (filter == NULL) {
1768 retval = ENOMEM;
1769 goto done;
1770 }
1771 bzero(filter, dlif_filt_size);
1772
1773 /* refcnt held above during lookup */
1774 filter->filt_flags = flags;
1775 filter->filt_ifp = ifp;
1776 filter->filt_cookie = if_filter->iff_cookie;
1777 filter->filt_name = if_filter->iff_name;
1778 filter->filt_protocol = if_filter->iff_protocol;
1779 /*
1780 * Do not install filter callbacks for internal coproc interface
1781 */
1782 if (!IFNET_IS_INTCOPROC(ifp)) {
1783 filter->filt_input = if_filter->iff_input;
1784 filter->filt_output = if_filter->iff_output;
1785 filter->filt_event = if_filter->iff_event;
1786 filter->filt_ioctl = if_filter->iff_ioctl;
1787 }
1788 filter->filt_detached = if_filter->iff_detached;
1789
1790 lck_mtx_lock(&ifp->if_flt_lock);
1791 if_flt_monitor_enter(ifp);
1792
1793 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1794 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1795
1796 if_flt_monitor_leave(ifp);
1797 lck_mtx_unlock(&ifp->if_flt_lock);
1798
1799 *filter_ref = filter;
1800
1801 /*
1802 * Bump filter count and route_generation ID to let TCP
1803 * know it shouldn't do TSO on this connection
1804 */
1805 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1806 OSAddAtomic(1, &dlil_filter_disable_tso_count);
1807 routegenid_update();
1808 }
1809 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1810 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1811 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1812 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1813 }
1814 if (dlil_verbose) {
1815 printf("%s: %s filter attached\n", if_name(ifp),
1816 if_filter->iff_name);
1817 }
1818 done:
1819 ifnet_head_done();
1820 if (retval != 0 && ifp != NULL) {
1821 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1822 if_name(ifp), if_filter->iff_name, retval);
1823 }
1824 if (retval != 0 && filter != NULL)
1825 zfree(dlif_filt_zone, filter);
1826
1827 return (retval);
1828 }
1829
1830 static int
1831 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1832 {
1833 int retval = 0;
1834
1835 if (detached == 0) {
1836 ifnet_t ifp = NULL;
1837
1838 ifnet_head_lock_shared();
1839 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1840 interface_filter_t entry = NULL;
1841
1842 lck_mtx_lock(&ifp->if_flt_lock);
1843 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1844 if (entry != filter || entry->filt_skip)
1845 continue;
1846 /*
1847 * We've found a match; since it's possible
1848 * that the thread gets blocked in the monitor,
1849 * we do the lock dance. Interface should
1850 * not be detached since we still have a use
1851 * count held during filter attach.
1852 */
1853 entry->filt_skip = 1; /* skip input/output */
1854 lck_mtx_unlock(&ifp->if_flt_lock);
1855 ifnet_head_done();
1856
1857 lck_mtx_lock(&ifp->if_flt_lock);
1858 if_flt_monitor_enter(ifp);
1859 LCK_MTX_ASSERT(&ifp->if_flt_lock,
1860 LCK_MTX_ASSERT_OWNED);
1861
1862 /* Remove the filter from the list */
1863 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1864 filt_next);
1865
1866 if_flt_monitor_leave(ifp);
1867 lck_mtx_unlock(&ifp->if_flt_lock);
1868 if (dlil_verbose) {
1869 printf("%s: %s filter detached\n",
1870 if_name(ifp), filter->filt_name);
1871 }
1872 goto destroy;
1873 }
1874 lck_mtx_unlock(&ifp->if_flt_lock);
1875 }
1876 ifnet_head_done();
1877
1878 /* filter parameter is not a valid filter ref */
1879 retval = EINVAL;
1880 goto done;
1881 }
1882
1883 if (dlil_verbose)
1884 printf("%s filter detached\n", filter->filt_name);
1885
1886 destroy:
1887
1888 /* Call the detached function if there is one */
1889 if (filter->filt_detached)
1890 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1891
1892 /*
1893 * Decrease filter count and route_generation ID to let TCP
1894 * know it should reevalute doing TSO or not
1895 */
1896 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1897 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
1898 routegenid_update();
1899 }
1900
1901 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1902
1903 /* Free the filter */
1904 zfree(dlif_filt_zone, filter);
1905 filter = NULL;
1906 done:
1907 if (retval != 0 && filter != NULL) {
1908 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1909 filter->filt_name, retval);
1910 }
1911
1912 return (retval);
1913 }
1914
1915 __private_extern__ void
1916 dlil_detach_filter(interface_filter_t filter)
1917 {
1918 if (filter == NULL)
1919 return;
1920 dlil_detach_filter_internal(filter, 0);
1921 }
1922
1923 /*
1924 * Main input thread:
1925 *
1926 * a) handles all inbound packets for lo0
1927 * b) handles all inbound packets for interfaces with no dedicated
1928 * input thread (e.g. anything but Ethernet/PDP or those that support
1929 * opportunistic polling.)
1930 * c) protocol registrations
1931 * d) packet injections
1932 */
1933 __attribute__((noreturn))
1934 static void
1935 dlil_main_input_thread_func(void *v, wait_result_t w)
1936 {
1937 #pragma unused(w)
1938 struct dlil_main_threading_info *inpm = v;
1939 struct dlil_threading_info *inp = v;
1940
1941 VERIFY(inp == dlil_main_input_thread);
1942 VERIFY(inp->ifp == NULL);
1943 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1944
1945 while (1) {
1946 struct mbuf *m = NULL, *m_loop = NULL;
1947 u_int32_t m_cnt, m_cnt_loop;
1948 boolean_t proto_req;
1949
1950 lck_mtx_lock_spin(&inp->input_lck);
1951
1952 /* Wait until there is work to be done */
1953 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1954 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1955 (void) msleep(&inp->input_waiting, &inp->input_lck,
1956 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1957 }
1958
1959 inp->input_waiting |= DLIL_INPUT_RUNNING;
1960 inp->input_waiting &= ~DLIL_INPUT_WAITING;
1961
1962 /* Main input thread cannot be terminated */
1963 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
1964
1965 proto_req = (inp->input_waiting &
1966 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
1967
1968 /* Packets for non-dedicated interfaces other than lo0 */
1969 m_cnt = qlen(&inp->rcvq_pkts);
1970 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
1971
1972 /* Packets exclusive to lo0 */
1973 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
1974 m_loop = _getq_all(&inpm->lo_rcvq_pkts, NULL, NULL, NULL);
1975
1976 inp->wtot = 0;
1977
1978 lck_mtx_unlock(&inp->input_lck);
1979
1980 /*
1981 * NOTE warning %%% attention !!!!
1982 * We should think about putting some thread starvation
1983 * safeguards if we deal with long chains of packets.
1984 */
1985 if (m_loop != NULL)
1986 dlil_input_packet_list_extended(lo_ifp, m_loop,
1987 m_cnt_loop, inp->mode);
1988
1989 if (m != NULL)
1990 dlil_input_packet_list_extended(NULL, m,
1991 m_cnt, inp->mode);
1992
1993 if (proto_req)
1994 proto_input_run();
1995 }
1996
1997 /* NOTREACHED */
1998 VERIFY(0); /* we should never get here */
1999 }
2000
2001 /*
2002 * Input thread for interfaces with legacy input model.
2003 */
2004 static void
2005 dlil_input_thread_func(void *v, wait_result_t w)
2006 {
2007 #pragma unused(w)
2008 char thread_name[MAXTHREADNAMESIZE];
2009 struct dlil_threading_info *inp = v;
2010 struct ifnet *ifp = inp->ifp;
2011
2012 /* Construct the name for this thread, and then apply it. */
2013 bzero(thread_name, sizeof(thread_name));
2014 snprintf(thread_name, sizeof(thread_name), "dlil_input_%s", ifp->if_xname);
2015 thread_set_thread_name(inp->input_thr, thread_name);
2016
2017 VERIFY(inp != dlil_main_input_thread);
2018 VERIFY(ifp != NULL);
2019 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
2020 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2021
2022 while (1) {
2023 struct mbuf *m = NULL;
2024 u_int32_t m_cnt;
2025
2026 lck_mtx_lock_spin(&inp->input_lck);
2027
2028 /* Wait until there is work to be done */
2029 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2030 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2031 (void) msleep(&inp->input_waiting, &inp->input_lck,
2032 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2033 }
2034
2035 inp->input_waiting |= DLIL_INPUT_RUNNING;
2036 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2037
2038 /*
2039 * Protocol registration and injection must always use
2040 * the main input thread; in theory the latter can utilize
2041 * the corresponding input thread where the packet arrived
2042 * on, but that requires our knowing the interface in advance
2043 * (and the benefits might not worth the trouble.)
2044 */
2045 VERIFY(!(inp->input_waiting &
2046 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2047
2048 /* Packets for this interface */
2049 m_cnt = qlen(&inp->rcvq_pkts);
2050 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2051
2052 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2053 lck_mtx_unlock(&inp->input_lck);
2054
2055 /* Free up pending packets */
2056 if (m != NULL)
2057 mbuf_freem_list(m);
2058
2059 dlil_terminate_input_thread(inp);
2060 /* NOTREACHED */
2061 return;
2062 }
2063
2064 inp->wtot = 0;
2065
2066 dlil_input_stats_sync(ifp, inp);
2067
2068 lck_mtx_unlock(&inp->input_lck);
2069
2070 /*
2071 * NOTE warning %%% attention !!!!
2072 * We should think about putting some thread starvation
2073 * safeguards if we deal with long chains of packets.
2074 */
2075 if (m != NULL)
2076 dlil_input_packet_list_extended(NULL, m,
2077 m_cnt, inp->mode);
2078 }
2079
2080 /* NOTREACHED */
2081 VERIFY(0); /* we should never get here */
2082 }
2083
2084 /*
2085 * Input thread for interfaces with opportunistic polling input model.
2086 */
2087 static void
2088 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2089 {
2090 #pragma unused(w)
2091 struct dlil_threading_info *inp = v;
2092 struct ifnet *ifp = inp->ifp;
2093 struct timespec ts;
2094
2095 VERIFY(inp != dlil_main_input_thread);
2096 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
2097
2098 while (1) {
2099 struct mbuf *m = NULL;
2100 u_int32_t m_cnt, m_size, poll_req = 0;
2101 ifnet_model_t mode;
2102 struct timespec now, delta;
2103 u_int64_t ival;
2104
2105 lck_mtx_lock_spin(&inp->input_lck);
2106
2107 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
2108 ival = IF_RXPOLL_INTERVALTIME_MIN;
2109
2110 /* Link parameters changed? */
2111 if (ifp->if_poll_update != 0) {
2112 ifp->if_poll_update = 0;
2113 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
2114 }
2115
2116 /* Current operating mode */
2117 mode = inp->mode;
2118
2119 /* Wait until there is work to be done */
2120 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2121 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2122 (void) msleep(&inp->input_waiting, &inp->input_lck,
2123 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2124 }
2125
2126 inp->input_waiting |= DLIL_INPUT_RUNNING;
2127 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2128
2129 /*
2130 * Protocol registration and injection must always use
2131 * the main input thread; in theory the latter can utilize
2132 * the corresponding input thread where the packet arrived
2133 * on, but that requires our knowing the interface in advance
2134 * (and the benefits might not worth the trouble.)
2135 */
2136 VERIFY(!(inp->input_waiting &
2137 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2138
2139 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2140 /* Free up pending packets */
2141 lck_mtx_convert_spin(&inp->input_lck);
2142 _flushq(&inp->rcvq_pkts);
2143 if (inp->input_mit_tcall != NULL) {
2144 if (thread_call_isactive(inp->input_mit_tcall))
2145 thread_call_cancel(inp->input_mit_tcall);
2146 }
2147 lck_mtx_unlock(&inp->input_lck);
2148
2149 dlil_terminate_input_thread(inp);
2150 /* NOTREACHED */
2151 return;
2152 }
2153
2154 /* Total count of all packets */
2155 m_cnt = qlen(&inp->rcvq_pkts);
2156
2157 /* Total bytes of all packets */
2158 m_size = qsize(&inp->rcvq_pkts);
2159
2160 /* Packets for this interface */
2161 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2162 VERIFY(m != NULL || m_cnt == 0);
2163
2164 nanouptime(&now);
2165 if (!net_timerisset(&inp->sample_lasttime))
2166 *(&inp->sample_lasttime) = *(&now);
2167
2168 net_timersub(&now, &inp->sample_lasttime, &delta);
2169 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
2170 u_int32_t ptot, btot;
2171
2172 /* Accumulate statistics for current sampling */
2173 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
2174
2175 if (net_timercmp(&delta, &inp->sample_holdtime, <))
2176 goto skip;
2177
2178 *(&inp->sample_lasttime) = *(&now);
2179
2180 /* Calculate min/max of inbound bytes */
2181 btot = (u_int32_t)inp->sstats.bytes;
2182 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
2183 inp->rxpoll_bmin = btot;
2184 if (btot > inp->rxpoll_bmax)
2185 inp->rxpoll_bmax = btot;
2186
2187 /* Calculate EWMA of inbound bytes */
2188 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
2189
2190 /* Calculate min/max of inbound packets */
2191 ptot = (u_int32_t)inp->sstats.packets;
2192 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
2193 inp->rxpoll_pmin = ptot;
2194 if (ptot > inp->rxpoll_pmax)
2195 inp->rxpoll_pmax = ptot;
2196
2197 /* Calculate EWMA of inbound packets */
2198 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
2199
2200 /* Reset sampling statistics */
2201 PKTCNTR_CLEAR(&inp->sstats);
2202
2203 /* Calculate EWMA of wakeup requests */
2204 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2205 inp->wtot = 0;
2206
2207 if (dlil_verbose) {
2208 if (!net_timerisset(&inp->dbg_lasttime))
2209 *(&inp->dbg_lasttime) = *(&now);
2210 net_timersub(&now, &inp->dbg_lasttime, &delta);
2211 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2212 *(&inp->dbg_lasttime) = *(&now);
2213 printf("%s: [%s] pkts avg %d max %d "
2214 "limits [%d/%d], wreq avg %d "
2215 "limits [%d/%d], bytes avg %d "
2216 "limits [%d/%d]\n", if_name(ifp),
2217 (inp->mode ==
2218 IFNET_MODEL_INPUT_POLL_ON) ?
2219 "ON" : "OFF", inp->rxpoll_pavg,
2220 inp->rxpoll_pmax,
2221 inp->rxpoll_plowat,
2222 inp->rxpoll_phiwat,
2223 inp->rxpoll_wavg,
2224 inp->rxpoll_wlowat,
2225 inp->rxpoll_whiwat,
2226 inp->rxpoll_bavg,
2227 inp->rxpoll_blowat,
2228 inp->rxpoll_bhiwat);
2229 }
2230 }
2231
2232 /* Perform mode transition, if necessary */
2233 if (!net_timerisset(&inp->mode_lasttime))
2234 *(&inp->mode_lasttime) = *(&now);
2235
2236 net_timersub(&now, &inp->mode_lasttime, &delta);
2237 if (net_timercmp(&delta, &inp->mode_holdtime, <))
2238 goto skip;
2239
2240 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2241 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
2242 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2243 mode = IFNET_MODEL_INPUT_POLL_OFF;
2244 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2245 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2246 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2247 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2248 mode = IFNET_MODEL_INPUT_POLL_ON;
2249 }
2250
2251 if (mode != inp->mode) {
2252 inp->mode = mode;
2253 *(&inp->mode_lasttime) = *(&now);
2254 poll_req++;
2255 }
2256 }
2257 skip:
2258 dlil_input_stats_sync(ifp, inp);
2259
2260 lck_mtx_unlock(&inp->input_lck);
2261
2262 /*
2263 * If there's a mode change and interface is still attached,
2264 * perform a downcall to the driver for the new mode. Also
2265 * hold an IO refcnt on the interface to prevent it from
2266 * being detached (will be release below.)
2267 */
2268 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2269 struct ifnet_model_params p = { mode, { 0 } };
2270 errno_t err;
2271
2272 if (dlil_verbose) {
2273 printf("%s: polling is now %s, "
2274 "pkts avg %d max %d limits [%d/%d], "
2275 "wreq avg %d limits [%d/%d], "
2276 "bytes avg %d limits [%d/%d]\n",
2277 if_name(ifp),
2278 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2279 "ON" : "OFF", inp->rxpoll_pavg,
2280 inp->rxpoll_pmax, inp->rxpoll_plowat,
2281 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2282 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2283 inp->rxpoll_bavg, inp->rxpoll_blowat,
2284 inp->rxpoll_bhiwat);
2285 }
2286
2287 if ((err = ((*ifp->if_input_ctl)(ifp,
2288 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
2289 printf("%s: error setting polling mode "
2290 "to %s (%d)\n", if_name(ifp),
2291 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2292 "ON" : "OFF", err);
2293 }
2294
2295 switch (mode) {
2296 case IFNET_MODEL_INPUT_POLL_OFF:
2297 ifnet_set_poll_cycle(ifp, NULL);
2298 inp->rxpoll_offreq++;
2299 if (err != 0)
2300 inp->rxpoll_offerr++;
2301 break;
2302
2303 case IFNET_MODEL_INPUT_POLL_ON:
2304 net_nsectimer(&ival, &ts);
2305 ifnet_set_poll_cycle(ifp, &ts);
2306 ifnet_poll(ifp);
2307 inp->rxpoll_onreq++;
2308 if (err != 0)
2309 inp->rxpoll_onerr++;
2310 break;
2311
2312 default:
2313 VERIFY(0);
2314 /* NOTREACHED */
2315 }
2316
2317 /* Release the IO refcnt */
2318 ifnet_decr_iorefcnt(ifp);
2319 }
2320
2321 /*
2322 * NOTE warning %%% attention !!!!
2323 * We should think about putting some thread starvation
2324 * safeguards if we deal with long chains of packets.
2325 */
2326 if (m != NULL)
2327 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2328 }
2329
2330 /* NOTREACHED */
2331 VERIFY(0); /* we should never get here */
2332 }
2333
2334 /*
2335 * Must be called on an attached ifnet (caller is expected to check.)
2336 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2337 */
2338 errno_t
2339 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2340 boolean_t locked)
2341 {
2342 struct dlil_threading_info *inp;
2343 u_int64_t sample_holdtime, inbw;
2344
2345 VERIFY(ifp != NULL);
2346 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2347 return (ENXIO);
2348
2349 if (p != NULL) {
2350 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2351 (p->packets_lowat != 0 && p->packets_hiwat == 0))
2352 return (EINVAL);
2353 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2354 p->packets_lowat >= p->packets_hiwat)
2355 return (EINVAL);
2356 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2357 (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
2358 return (EINVAL);
2359 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2360 p->bytes_lowat >= p->bytes_hiwat)
2361 return (EINVAL);
2362 if (p->interval_time != 0 &&
2363 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
2364 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2365 }
2366
2367 if (!locked)
2368 lck_mtx_lock(&inp->input_lck);
2369
2370 LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2371
2372 /*
2373 * Normally, we'd reset the parameters to the auto-tuned values
2374 * if the the input thread detects a change in link rate. If the
2375 * driver provides its own parameters right after a link rate
2376 * changes, but before the input thread gets to run, we want to
2377 * make sure to keep the driver's values. Clearing if_poll_update
2378 * will achieve that.
2379 */
2380 if (p != NULL && !locked && ifp->if_poll_update != 0)
2381 ifp->if_poll_update = 0;
2382
2383 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2384 sample_holdtime = 0; /* polling is disabled */
2385 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2386 inp->rxpoll_blowat = 0;
2387 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2388 inp->rxpoll_bhiwat = (u_int32_t)-1;
2389 inp->rxpoll_plim = 0;
2390 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2391 } else {
2392 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2393 u_int64_t ival;
2394 unsigned int n, i;
2395
2396 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2397 if (inbw < rxpoll_tbl[i].speed)
2398 break;
2399 n = i;
2400 }
2401 /* auto-tune if caller didn't specify a value */
2402 plowat = ((p == NULL || p->packets_lowat == 0) ?
2403 rxpoll_tbl[n].plowat : p->packets_lowat);
2404 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2405 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2406 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2407 rxpoll_tbl[n].blowat : p->bytes_lowat);
2408 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2409 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2410 plim = ((p == NULL || p->packets_limit == 0) ?
2411 if_rxpoll_max : p->packets_limit);
2412 ival = ((p == NULL || p->interval_time == 0) ?
2413 if_rxpoll_interval_time : p->interval_time);
2414
2415 VERIFY(plowat != 0 && phiwat != 0);
2416 VERIFY(blowat != 0 && bhiwat != 0);
2417 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2418
2419 sample_holdtime = if_rxpoll_sample_holdtime;
2420 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2421 inp->rxpoll_whiwat = if_rxpoll_whiwat;
2422 inp->rxpoll_plowat = plowat;
2423 inp->rxpoll_phiwat = phiwat;
2424 inp->rxpoll_blowat = blowat;
2425 inp->rxpoll_bhiwat = bhiwat;
2426 inp->rxpoll_plim = plim;
2427 inp->rxpoll_ival = ival;
2428 }
2429
2430 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2431 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2432
2433 if (dlil_verbose) {
2434 printf("%s: speed %llu bps, sample per %llu nsec, "
2435 "poll interval %llu nsec, pkts per poll %u, "
2436 "pkt limits [%u/%u], wreq limits [%u/%u], "
2437 "bytes limits [%u/%u]\n", if_name(ifp),
2438 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2439 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2440 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
2441 }
2442
2443 if (!locked)
2444 lck_mtx_unlock(&inp->input_lck);
2445
2446 return (0);
2447 }
2448
2449 /*
2450 * Must be called on an attached ifnet (caller is expected to check.)
2451 */
2452 errno_t
2453 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2454 {
2455 struct dlil_threading_info *inp;
2456
2457 VERIFY(ifp != NULL && p != NULL);
2458 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2459 return (ENXIO);
2460
2461 bzero(p, sizeof (*p));
2462
2463 lck_mtx_lock(&inp->input_lck);
2464 p->packets_limit = inp->rxpoll_plim;
2465 p->packets_lowat = inp->rxpoll_plowat;
2466 p->packets_hiwat = inp->rxpoll_phiwat;
2467 p->bytes_lowat = inp->rxpoll_blowat;
2468 p->bytes_hiwat = inp->rxpoll_bhiwat;
2469 p->interval_time = inp->rxpoll_ival;
2470 lck_mtx_unlock(&inp->input_lck);
2471
2472 return (0);
2473 }
2474
2475 errno_t
2476 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2477 const struct ifnet_stat_increment_param *s)
2478 {
2479 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
2480 }
2481
2482 errno_t
2483 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2484 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2485 {
2486 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
2487 }
2488
2489 static errno_t
2490 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2491 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2492 {
2493 dlil_input_func input_func;
2494 struct ifnet_stat_increment_param _s;
2495 u_int32_t m_cnt = 0, m_size = 0;
2496 struct mbuf *last;
2497 errno_t err = 0;
2498
2499 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2500 if (m_head != NULL)
2501 mbuf_freem_list(m_head);
2502 return (EINVAL);
2503 }
2504
2505 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2506 VERIFY(m_tail == NULL || ext);
2507 VERIFY(s != NULL || !ext);
2508
2509 /*
2510 * Drop the packet(s) if the parameters are invalid, or if the
2511 * interface is no longer attached; else hold an IO refcnt to
2512 * prevent it from being detached (will be released below.)
2513 */
2514 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
2515 if (m_head != NULL)
2516 mbuf_freem_list(m_head);
2517 return (EINVAL);
2518 }
2519
2520 input_func = ifp->if_input_dlil;
2521 VERIFY(input_func != NULL);
2522
2523 if (m_tail == NULL) {
2524 last = m_head;
2525 while (m_head != NULL) {
2526 #if IFNET_INPUT_SANITY_CHK
2527 if (dlil_input_sanity_check != 0)
2528 DLIL_INPUT_CHECK(last, ifp);
2529 #endif /* IFNET_INPUT_SANITY_CHK */
2530 m_cnt++;
2531 m_size += m_length(last);
2532 if (mbuf_nextpkt(last) == NULL)
2533 break;
2534 last = mbuf_nextpkt(last);
2535 }
2536 m_tail = last;
2537 } else {
2538 #if IFNET_INPUT_SANITY_CHK
2539 if (dlil_input_sanity_check != 0) {
2540 last = m_head;
2541 while (1) {
2542 DLIL_INPUT_CHECK(last, ifp);
2543 m_cnt++;
2544 m_size += m_length(last);
2545 if (mbuf_nextpkt(last) == NULL)
2546 break;
2547 last = mbuf_nextpkt(last);
2548 }
2549 } else {
2550 m_cnt = s->packets_in;
2551 m_size = s->bytes_in;
2552 last = m_tail;
2553 }
2554 #else
2555 m_cnt = s->packets_in;
2556 m_size = s->bytes_in;
2557 last = m_tail;
2558 #endif /* IFNET_INPUT_SANITY_CHK */
2559 }
2560
2561 if (last != m_tail) {
2562 panic_plain("%s: invalid input packet chain for %s, "
2563 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2564 m_tail, last);
2565 }
2566
2567 /*
2568 * Assert packet count only for the extended variant, for backwards
2569 * compatibility, since this came directly from the device driver.
2570 * Relax this assertion for input bytes, as the driver may have
2571 * included the link-layer headers in the computation; hence
2572 * m_size is just an approximation.
2573 */
2574 if (ext && s->packets_in != m_cnt) {
2575 panic_plain("%s: input packet count mismatch for %s, "
2576 "%d instead of %d\n", __func__, if_name(ifp),
2577 s->packets_in, m_cnt);
2578 }
2579
2580 if (s == NULL) {
2581 bzero(&_s, sizeof (_s));
2582 s = &_s;
2583 } else {
2584 _s = *s;
2585 }
2586 _s.packets_in = m_cnt;
2587 _s.bytes_in = m_size;
2588
2589 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2590
2591 if (ifp != lo_ifp) {
2592 /* Release the IO refcnt */
2593 ifnet_decr_iorefcnt(ifp);
2594 }
2595
2596 return (err);
2597 }
2598
2599
2600 errno_t
2601 dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2602 {
2603 return (ifp->if_output(ifp, m));
2604 }
2605
2606 errno_t
2607 dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2608 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2609 boolean_t poll, struct thread *tp)
2610 {
2611 struct dlil_threading_info *inp;
2612 u_int32_t m_cnt = s->packets_in;
2613 u_int32_t m_size = s->bytes_in;
2614
2615 if ((inp = ifp->if_inp) == NULL)
2616 inp = dlil_main_input_thread;
2617
2618 /*
2619 * If there is a matching DLIL input thread associated with an
2620 * affinity set, associate this thread with the same set. We
2621 * will only do this once.
2622 */
2623 lck_mtx_lock_spin(&inp->input_lck);
2624 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
2625 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2626 (poll && inp->poll_thr == THREAD_NULL))) {
2627 u_int32_t tag = inp->tag;
2628
2629 if (poll) {
2630 VERIFY(inp->poll_thr == THREAD_NULL);
2631 inp->poll_thr = tp;
2632 } else {
2633 VERIFY(inp->wloop_thr == THREAD_NULL);
2634 inp->wloop_thr = tp;
2635 }
2636 lck_mtx_unlock(&inp->input_lck);
2637
2638 /* Associate the current thread with the new affinity tag */
2639 (void) dlil_affinity_set(tp, tag);
2640
2641 /*
2642 * Take a reference on the current thread; during detach,
2643 * we will need to refer to it in order to tear down its
2644 * affinity.
2645 */
2646 thread_reference(tp);
2647 lck_mtx_lock_spin(&inp->input_lck);
2648 }
2649
2650 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2651
2652 /*
2653 * Because of loopbacked multicast we cannot stuff the ifp in
2654 * the rcvif of the packet header: loopback (lo0) packets use a
2655 * dedicated list so that we can later associate them with lo_ifp
2656 * on their way up the stack. Packets for other interfaces without
2657 * dedicated input threads go to the regular list.
2658 */
2659 if (m_head != NULL) {
2660 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2661 struct dlil_main_threading_info *inpm =
2662 (struct dlil_main_threading_info *)inp;
2663 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2664 m_cnt, m_size);
2665 } else {
2666 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2667 m_cnt, m_size);
2668 }
2669 }
2670
2671 #if IFNET_INPUT_SANITY_CHK
2672 if (dlil_input_sanity_check != 0) {
2673 u_int32_t count;
2674 struct mbuf *m0;
2675
2676 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
2677 count++;
2678
2679 if (count != m_cnt) {
2680 panic_plain("%s: invalid packet count %d "
2681 "(expected %d)\n", if_name(ifp),
2682 count, m_cnt);
2683 /* NOTREACHED */
2684 }
2685
2686 inp->input_mbuf_cnt += m_cnt;
2687 }
2688 #endif /* IFNET_INPUT_SANITY_CHK */
2689
2690 dlil_input_stats_add(s, inp, poll);
2691 /*
2692 * If we're using the main input thread, synchronize the
2693 * stats now since we have the interface context. All
2694 * other cases involving dedicated input threads will
2695 * have their stats synchronized there.
2696 */
2697 if (inp == dlil_main_input_thread)
2698 dlil_input_stats_sync(ifp, inp);
2699
2700 if (qlen(&inp->rcvq_pkts) >= dlil_rcv_mit_pkts_min &&
2701 qlen(&inp->rcvq_pkts) < dlil_rcv_mit_pkts_max &&
2702 (ifp->if_family == IFNET_FAMILY_ETHERNET ||
2703 ifp->if_type == IFT_CELLULAR)
2704 ) {
2705 if (!thread_call_isactive(inp->input_mit_tcall)) {
2706 uint64_t deadline;
2707 clock_interval_to_deadline(dlil_rcv_mit_interval,
2708 1, &deadline);
2709 (void) thread_call_enter_delayed(
2710 inp->input_mit_tcall, deadline);
2711 }
2712 } else {
2713 inp->input_waiting |= DLIL_INPUT_WAITING;
2714 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2715 inp->wtot++;
2716 wakeup_one((caddr_t)&inp->input_waiting);
2717 }
2718 }
2719 lck_mtx_unlock(&inp->input_lck);
2720
2721 return (0);
2722 }
2723
2724
2725 static void
2726 ifnet_start_common(struct ifnet *ifp, int resetfc)
2727 {
2728 if (!(ifp->if_eflags & IFEF_TXSTART))
2729 return;
2730 /*
2731 * If the starter thread is inactive, signal it to do work,
2732 * unless the interface is being flow controlled from below,
2733 * e.g. a virtual interface being flow controlled by a real
2734 * network interface beneath it.
2735 */
2736 lck_mtx_lock_spin(&ifp->if_start_lock);
2737 if (resetfc) {
2738 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2739 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2740 lck_mtx_unlock(&ifp->if_start_lock);
2741 return;
2742 }
2743 ifp->if_start_req++;
2744 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2745 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2746 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
2747 ifp->if_start_delayed == 0)) {
2748 (void) thread_wakeup_thread((caddr_t)&ifp->if_start_thread,
2749 ifp->if_start_thread);
2750 }
2751 lck_mtx_unlock(&ifp->if_start_lock);
2752 }
2753
2754 void
2755 ifnet_start(struct ifnet *ifp)
2756 {
2757 ifnet_start_common(ifp, 0);
2758 }
2759
2760 static void
2761 ifnet_start_thread_fn(void *v, wait_result_t w)
2762 {
2763 #pragma unused(w)
2764 struct ifnet *ifp = v;
2765 char ifname[IFNAMSIZ + 1];
2766 char thread_name[MAXTHREADNAMESIZE];
2767 struct timespec *ts = NULL;
2768 struct ifclassq *ifq = &ifp->if_snd;
2769 struct timespec delay_start_ts;
2770
2771 /* Construct the name for this thread, and then apply it. */
2772 bzero(thread_name, sizeof(thread_name));
2773 (void) snprintf(thread_name, sizeof (thread_name),
2774 "ifnet_start_%s", ifp->if_xname);
2775 thread_set_thread_name(ifp->if_start_thread, thread_name);
2776
2777 /*
2778 * Treat the dedicated starter thread for lo0 as equivalent to
2779 * the driver workloop thread; if net_affinity is enabled for
2780 * the main input thread, associate this starter thread to it
2781 * by binding them with the same affinity tag. This is done
2782 * only once (as we only have one lo_ifp which never goes away.)
2783 */
2784 if (ifp == lo_ifp) {
2785 struct dlil_threading_info *inp = dlil_main_input_thread;
2786 struct thread *tp = current_thread();
2787
2788 lck_mtx_lock(&inp->input_lck);
2789 if (inp->net_affinity) {
2790 u_int32_t tag = inp->tag;
2791
2792 VERIFY(inp->wloop_thr == THREAD_NULL);
2793 VERIFY(inp->poll_thr == THREAD_NULL);
2794 inp->wloop_thr = tp;
2795 lck_mtx_unlock(&inp->input_lck);
2796
2797 /* Associate this thread with the affinity tag */
2798 (void) dlil_affinity_set(tp, tag);
2799 } else {
2800 lck_mtx_unlock(&inp->input_lck);
2801 }
2802 }
2803
2804 (void) snprintf(ifname, sizeof (ifname), "%s_starter", if_name(ifp));
2805
2806 lck_mtx_lock_spin(&ifp->if_start_lock);
2807
2808 for (;;) {
2809 if (ifp->if_start_thread != NULL) {
2810 (void) msleep(&ifp->if_start_thread,
2811 &ifp->if_start_lock,
2812 (PZERO - 1) | PSPIN, ifname, ts);
2813 }
2814 /* interface is detached? */
2815 if (ifp->if_start_thread == THREAD_NULL) {
2816 ifnet_set_start_cycle(ifp, NULL);
2817 lck_mtx_unlock(&ifp->if_start_lock);
2818 ifnet_purge(ifp);
2819
2820 if (dlil_verbose) {
2821 printf("%s: starter thread terminated\n",
2822 if_name(ifp));
2823 }
2824
2825 /* for the extra refcnt from kernel_thread_start() */
2826 thread_deallocate(current_thread());
2827 /* this is the end */
2828 thread_terminate(current_thread());
2829 /* NOTREACHED */
2830 return;
2831 }
2832
2833 ifp->if_start_active = 1;
2834
2835 for (;;) {
2836 u_int32_t req = ifp->if_start_req;
2837 if (!IFCQ_IS_EMPTY(ifq) &&
2838 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2839 ifp->if_start_delayed == 0 &&
2840 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2841 (ifp->if_eflags & IFEF_DELAY_START)) {
2842 ifp->if_start_delayed = 1;
2843 ifnet_start_delayed++;
2844 break;
2845 } else {
2846 ifp->if_start_delayed = 0;
2847 }
2848 lck_mtx_unlock(&ifp->if_start_lock);
2849
2850 /*
2851 * If no longer attached, don't call start because ifp
2852 * is being destroyed; else hold an IO refcnt to
2853 * prevent the interface from being detached (will be
2854 * released below.)
2855 */
2856 if (!ifnet_is_attached(ifp, 1)) {
2857 lck_mtx_lock_spin(&ifp->if_start_lock);
2858 break;
2859 }
2860
2861 /* invoke the driver's start routine */
2862 ((*ifp->if_start)(ifp));
2863
2864 /*
2865 * Release the io ref count taken by ifnet_is_attached.
2866 */
2867 ifnet_decr_iorefcnt(ifp);
2868
2869 lck_mtx_lock_spin(&ifp->if_start_lock);
2870
2871 /* if there's no pending request, we're done */
2872 if (req == ifp->if_start_req)
2873 break;
2874 }
2875
2876 ifp->if_start_req = 0;
2877 ifp->if_start_active = 0;
2878
2879 /*
2880 * Wakeup N ns from now if rate-controlled by TBR, and if
2881 * there are still packets in the send queue which haven't
2882 * been dequeued so far; else sleep indefinitely (ts = NULL)
2883 * until ifnet_start() is called again.
2884 */
2885 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2886 &ifp->if_start_cycle : NULL);
2887
2888 if (ts == NULL && ifp->if_start_delayed == 1) {
2889 delay_start_ts.tv_sec = 0;
2890 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2891 ts = &delay_start_ts;
2892 }
2893
2894 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
2895 ts = NULL;
2896 }
2897
2898 /* NOTREACHED */
2899 }
2900
2901 void
2902 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2903 {
2904 if (ts == NULL)
2905 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
2906 else
2907 *(&ifp->if_start_cycle) = *ts;
2908
2909 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
2910 printf("%s: restart interval set to %lu nsec\n",
2911 if_name(ifp), ts->tv_nsec);
2912 }
2913
2914 static void
2915 ifnet_poll(struct ifnet *ifp)
2916 {
2917 /*
2918 * If the poller thread is inactive, signal it to do work.
2919 */
2920 lck_mtx_lock_spin(&ifp->if_poll_lock);
2921 ifp->if_poll_req++;
2922 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
2923 wakeup_one((caddr_t)&ifp->if_poll_thread);
2924 }
2925 lck_mtx_unlock(&ifp->if_poll_lock);
2926 }
2927
2928 static void
2929 ifnet_poll_thread_fn(void *v, wait_result_t w)
2930 {
2931 #pragma unused(w)
2932 struct dlil_threading_info *inp;
2933 struct ifnet *ifp = v;
2934 char ifname[IFNAMSIZ + 1];
2935 struct timespec *ts = NULL;
2936 struct ifnet_stat_increment_param s;
2937
2938 snprintf(ifname, sizeof (ifname), "%s_poller",
2939 if_name(ifp));
2940 bzero(&s, sizeof (s));
2941
2942 lck_mtx_lock_spin(&ifp->if_poll_lock);
2943
2944 inp = ifp->if_inp;
2945 VERIFY(inp != NULL);
2946
2947 for (;;) {
2948 if (ifp->if_poll_thread != THREAD_NULL) {
2949 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
2950 (PZERO - 1) | PSPIN, ifname, ts);
2951 }
2952
2953 /* interface is detached (maybe while asleep)? */
2954 if (ifp->if_poll_thread == THREAD_NULL) {
2955 ifnet_set_poll_cycle(ifp, NULL);
2956 lck_mtx_unlock(&ifp->if_poll_lock);
2957
2958 if (dlil_verbose) {
2959 printf("%s: poller thread terminated\n",
2960 if_name(ifp));
2961 }
2962
2963 /* for the extra refcnt from kernel_thread_start() */
2964 thread_deallocate(current_thread());
2965 /* this is the end */
2966 thread_terminate(current_thread());
2967 /* NOTREACHED */
2968 return;
2969 }
2970
2971 ifp->if_poll_active = 1;
2972 for (;;) {
2973 struct mbuf *m_head, *m_tail;
2974 u_int32_t m_lim, m_cnt, m_totlen;
2975 u_int16_t req = ifp->if_poll_req;
2976
2977 lck_mtx_unlock(&ifp->if_poll_lock);
2978
2979 /*
2980 * If no longer attached, there's nothing to do;
2981 * else hold an IO refcnt to prevent the interface
2982 * from being detached (will be released below.)
2983 */
2984 if (!ifnet_is_attached(ifp, 1)) {
2985 lck_mtx_lock_spin(&ifp->if_poll_lock);
2986 break;
2987 }
2988
2989 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
2990 MAX((qlimit(&inp->rcvq_pkts)),
2991 (inp->rxpoll_phiwat << 2));
2992
2993 if (dlil_verbose > 1) {
2994 printf("%s: polling up to %d pkts, "
2995 "pkts avg %d max %d, wreq avg %d, "
2996 "bytes avg %d\n",
2997 if_name(ifp), m_lim,
2998 inp->rxpoll_pavg, inp->rxpoll_pmax,
2999 inp->rxpoll_wavg, inp->rxpoll_bavg);
3000 }
3001
3002 /* invoke the driver's input poll routine */
3003 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3004 &m_cnt, &m_totlen));
3005
3006 if (m_head != NULL) {
3007 VERIFY(m_tail != NULL && m_cnt > 0);
3008
3009 if (dlil_verbose > 1) {
3010 printf("%s: polled %d pkts, "
3011 "pkts avg %d max %d, wreq avg %d, "
3012 "bytes avg %d\n",
3013 if_name(ifp), m_cnt,
3014 inp->rxpoll_pavg, inp->rxpoll_pmax,
3015 inp->rxpoll_wavg, inp->rxpoll_bavg);
3016 }
3017
3018 /* stats are required for extended variant */
3019 s.packets_in = m_cnt;
3020 s.bytes_in = m_totlen;
3021
3022 (void) ifnet_input_common(ifp, m_head, m_tail,
3023 &s, TRUE, TRUE);
3024 } else {
3025 if (dlil_verbose > 1) {
3026 printf("%s: no packets, "
3027 "pkts avg %d max %d, wreq avg %d, "
3028 "bytes avg %d\n",
3029 if_name(ifp), inp->rxpoll_pavg,
3030 inp->rxpoll_pmax, inp->rxpoll_wavg,
3031 inp->rxpoll_bavg);
3032 }
3033
3034 (void) ifnet_input_common(ifp, NULL, NULL,
3035 NULL, FALSE, TRUE);
3036 }
3037
3038 /* Release the io ref count */
3039 ifnet_decr_iorefcnt(ifp);
3040
3041 lck_mtx_lock_spin(&ifp->if_poll_lock);
3042
3043 /* if there's no pending request, we're done */
3044 if (req == ifp->if_poll_req)
3045 break;
3046 }
3047 ifp->if_poll_req = 0;
3048 ifp->if_poll_active = 0;
3049
3050 /*
3051 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3052 * until ifnet_poll() is called again.
3053 */
3054 ts = &ifp->if_poll_cycle;
3055 if (ts->tv_sec == 0 && ts->tv_nsec == 0)
3056 ts = NULL;
3057 }
3058
3059 /* NOTREACHED */
3060 }
3061
3062 void
3063 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3064 {
3065 if (ts == NULL)
3066 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
3067 else
3068 *(&ifp->if_poll_cycle) = *ts;
3069
3070 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
3071 printf("%s: poll interval set to %lu nsec\n",
3072 if_name(ifp), ts->tv_nsec);
3073 }
3074
3075 void
3076 ifnet_purge(struct ifnet *ifp)
3077 {
3078 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
3079 if_qflush(ifp, 0);
3080 }
3081
3082 void
3083 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3084 {
3085 IFCQ_LOCK_ASSERT_HELD(ifq);
3086
3087 if (!(IFCQ_IS_READY(ifq)))
3088 return;
3089
3090 if (IFCQ_TBR_IS_ENABLED(ifq)) {
3091 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
3092 ifq->ifcq_tbr.tbr_percent, 0 };
3093 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3094 }
3095
3096 ifclassq_update(ifq, ev);
3097 }
3098
3099 void
3100 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3101 {
3102 switch (ev) {
3103 case CLASSQ_EV_LINK_BANDWIDTH:
3104 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
3105 ifp->if_poll_update++;
3106 break;
3107
3108 default:
3109 break;
3110 }
3111 }
3112
3113 errno_t
3114 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3115 {
3116 struct ifclassq *ifq;
3117 u_int32_t omodel;
3118 errno_t err;
3119
3120 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX)
3121 return (EINVAL);
3122 else if (!(ifp->if_eflags & IFEF_TXSTART))
3123 return (ENXIO);
3124
3125 ifq = &ifp->if_snd;
3126 IFCQ_LOCK(ifq);
3127 omodel = ifp->if_output_sched_model;
3128 ifp->if_output_sched_model = model;
3129 if ((err = ifclassq_pktsched_setup(ifq)) != 0)
3130 ifp->if_output_sched_model = omodel;
3131 IFCQ_UNLOCK(ifq);
3132
3133 return (err);
3134 }
3135
3136 errno_t
3137 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3138 {
3139 if (ifp == NULL)
3140 return (EINVAL);
3141 else if (!(ifp->if_eflags & IFEF_TXSTART))
3142 return (ENXIO);
3143
3144 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3145
3146 return (0);
3147 }
3148
3149 errno_t
3150 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3151 {
3152 if (ifp == NULL || maxqlen == NULL)
3153 return (EINVAL);
3154 else if (!(ifp->if_eflags & IFEF_TXSTART))
3155 return (ENXIO);
3156
3157 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3158
3159 return (0);
3160 }
3161
3162 errno_t
3163 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
3164 {
3165 errno_t err;
3166
3167 if (ifp == NULL || pkts == NULL)
3168 err = EINVAL;
3169 else if (!(ifp->if_eflags & IFEF_TXSTART))
3170 err = ENXIO;
3171 else
3172 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3173 pkts, NULL);
3174
3175 return (err);
3176 }
3177
3178 errno_t
3179 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3180 u_int32_t *pkts, u_int32_t *bytes)
3181 {
3182 errno_t err;
3183
3184 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3185 (pkts == NULL && bytes == NULL))
3186 err = EINVAL;
3187 else if (!(ifp->if_eflags & IFEF_TXSTART))
3188 err = ENXIO;
3189 else
3190 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3191
3192 return (err);
3193 }
3194
3195 errno_t
3196 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3197 {
3198 struct dlil_threading_info *inp;
3199
3200 if (ifp == NULL)
3201 return (EINVAL);
3202 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3203 return (ENXIO);
3204
3205 if (maxqlen == 0)
3206 maxqlen = if_rcvq_maxlen;
3207 else if (maxqlen < IF_RCVQ_MINLEN)
3208 maxqlen = IF_RCVQ_MINLEN;
3209
3210 inp = ifp->if_inp;
3211 lck_mtx_lock(&inp->input_lck);
3212 qlimit(&inp->rcvq_pkts) = maxqlen;
3213 lck_mtx_unlock(&inp->input_lck);
3214
3215 return (0);
3216 }
3217
3218 errno_t
3219 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3220 {
3221 struct dlil_threading_info *inp;
3222
3223 if (ifp == NULL || maxqlen == NULL)
3224 return (EINVAL);
3225 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3226 return (ENXIO);
3227
3228 inp = ifp->if_inp;
3229 lck_mtx_lock(&inp->input_lck);
3230 *maxqlen = qlimit(&inp->rcvq_pkts);
3231 lck_mtx_unlock(&inp->input_lck);
3232 return (0);
3233 }
3234
3235 void
3236 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3237 uint16_t delay_timeout)
3238 {
3239 if (delay_qlen > 0 && delay_timeout > 0) {
3240 ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
3241 ifp->if_start_delay_qlen = min(100, delay_qlen);
3242 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3243 /* convert timeout to nanoseconds */
3244 ifp->if_start_delay_timeout *= 1000;
3245 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3246 ifp->if_xname, (uint32_t)delay_qlen,
3247 (uint32_t)delay_timeout);
3248 } else {
3249 ifp->if_eflags &= ~IFEF_ENQUEUE_MULTI;
3250 }
3251 }
3252
3253 static inline errno_t
3254 ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype,
3255 boolean_t flush, boolean_t *pdrop)
3256 {
3257 volatile uint64_t *fg_ts = NULL;
3258 volatile uint64_t *rt_ts = NULL;
3259 struct mbuf *m = p;
3260 struct timespec now;
3261 u_int64_t now_nsec = 0;
3262 int error = 0;
3263
3264 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3265
3266 /*
3267 * If packet already carries a timestamp, either from dlil_output()
3268 * or from flowswitch, use it here. Otherwise, record timestamp.
3269 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3270 * the timestamp value is used internally there.
3271 */
3272 switch (ptype) {
3273 case QP_MBUF:
3274 ASSERT(m->m_flags & M_PKTHDR);
3275 ASSERT(m->m_nextpkt == NULL);
3276
3277 if (!(m->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3278 m->m_pkthdr.pkt_timestamp == 0) {
3279 nanouptime(&now);
3280 net_timernsec(&now, &now_nsec);
3281 m->m_pkthdr.pkt_timestamp = now_nsec;
3282 }
3283 m->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3284 /*
3285 * If the packet service class is not background,
3286 * update the timestamp to indicate recent activity
3287 * on a foreground socket.
3288 */
3289 if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3290 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3291 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND)) {
3292 ifp->if_fg_sendts = _net_uptime;
3293 if (fg_ts != NULL)
3294 *fg_ts = _net_uptime;
3295 }
3296 if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3297 ifp->if_rt_sendts = _net_uptime;
3298 if (rt_ts != NULL)
3299 *rt_ts = _net_uptime;
3300 }
3301 }
3302 break;
3303
3304
3305 default:
3306 VERIFY(0);
3307 /* NOTREACHED */
3308 }
3309
3310 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3311 if (now_nsec == 0) {
3312 nanouptime(&now);
3313 net_timernsec(&now, &now_nsec);
3314 }
3315 /*
3316 * If the driver chose to delay start callback for
3317 * coalescing multiple packets, Then use the following
3318 * heuristics to make sure that start callback will
3319 * be delayed only when bulk data transfer is detected.
3320 * 1. number of packets enqueued in (delay_win * 2) is
3321 * greater than or equal to the delay qlen.
3322 * 2. If delay_start is enabled it will stay enabled for
3323 * another 10 idle windows. This is to take into account
3324 * variable RTT and burst traffic.
3325 * 3. If the time elapsed since last enqueue is more
3326 * than 200ms we disable delaying start callback. This is
3327 * is to take idle time into account.
3328 */
3329 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3330 if (ifp->if_start_delay_swin > 0) {
3331 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3332 ifp->if_start_delay_cnt++;
3333 } else if ((now_nsec - ifp->if_start_delay_swin)
3334 >= (200 * 1000 * 1000)) {
3335 ifp->if_start_delay_swin = now_nsec;
3336 ifp->if_start_delay_cnt = 1;
3337 ifp->if_start_delay_idle = 0;
3338 if (ifp->if_eflags & IFEF_DELAY_START) {
3339 ifp->if_eflags &=
3340 ~(IFEF_DELAY_START);
3341 ifnet_delay_start_disabled++;
3342 }
3343 } else {
3344 if (ifp->if_start_delay_cnt >=
3345 ifp->if_start_delay_qlen) {
3346 ifp->if_eflags |= IFEF_DELAY_START;
3347 ifp->if_start_delay_idle = 0;
3348 } else {
3349 if (ifp->if_start_delay_idle >= 10) {
3350 ifp->if_eflags &= ~(IFEF_DELAY_START);
3351 ifnet_delay_start_disabled++;
3352 } else {
3353 ifp->if_start_delay_idle++;
3354 }
3355 }
3356 ifp->if_start_delay_swin = now_nsec;
3357 ifp->if_start_delay_cnt = 1;
3358 }
3359 } else {
3360 ifp->if_start_delay_swin = now_nsec;
3361 ifp->if_start_delay_cnt = 1;
3362 ifp->if_start_delay_idle = 0;
3363 ifp->if_eflags &= ~(IFEF_DELAY_START);
3364 }
3365 } else {
3366 ifp->if_eflags &= ~(IFEF_DELAY_START);
3367 }
3368
3369 switch (ptype) {
3370 case QP_MBUF:
3371 /* enqueue the packet (caller consumes object) */
3372 error = ifclassq_enqueue(&ifp->if_snd, m, QP_MBUF, pdrop);
3373 m = NULL;
3374 break;
3375
3376
3377 default:
3378 break;
3379 }
3380
3381 /*
3382 * Tell the driver to start dequeueing; do this even when the queue
3383 * for the packet is suspended (EQSUSPENDED), as the driver could still
3384 * be dequeueing from other unsuspended queues.
3385 */
3386 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3387 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED))
3388 ifnet_start(ifp);
3389
3390 return (error);
3391 }
3392
3393 errno_t
3394 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3395 {
3396 boolean_t pdrop;
3397 return (ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop));
3398 }
3399
3400 errno_t
3401 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3402 boolean_t *pdrop)
3403 {
3404 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3405 m->m_nextpkt != NULL) {
3406 if (m != NULL) {
3407 m_freem_list(m);
3408 *pdrop = TRUE;
3409 }
3410 return (EINVAL);
3411 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3412 !IF_FULLY_ATTACHED(ifp)) {
3413 /* flag tested without lock for performance */
3414 m_freem(m);
3415 *pdrop = TRUE;
3416 return (ENXIO);
3417 } else if (!(ifp->if_flags & IFF_UP)) {
3418 m_freem(m);
3419 *pdrop = TRUE;
3420 return (ENETDOWN);
3421 }
3422
3423 return (ifnet_enqueue_common(ifp, m, QP_MBUF, flush, pdrop));
3424 }
3425
3426
3427 errno_t
3428 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3429 {
3430 errno_t rc;
3431 classq_pkt_type_t ptype;
3432 if (ifp == NULL || mp == NULL)
3433 return (EINVAL);
3434 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3435 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3436 return (ENXIO);
3437 if (!ifnet_is_attached(ifp, 1))
3438 return (ENXIO);
3439
3440 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3441 (void **)mp, NULL, NULL, NULL, &ptype);
3442 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
3443 ifnet_decr_iorefcnt(ifp);
3444
3445 return (rc);
3446 }
3447
3448 errno_t
3449 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3450 struct mbuf **mp)
3451 {
3452 errno_t rc;
3453 classq_pkt_type_t ptype;
3454 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
3455 return (EINVAL);
3456 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3457 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3458 return (ENXIO);
3459 if (!ifnet_is_attached(ifp, 1))
3460 return (ENXIO);
3461
3462 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
3463 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)mp, NULL, NULL,
3464 NULL, &ptype);
3465 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
3466 ifnet_decr_iorefcnt(ifp);
3467 return (rc);
3468 }
3469
3470 errno_t
3471 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3472 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3473 {
3474 errno_t rc;
3475 classq_pkt_type_t ptype;
3476 if (ifp == NULL || head == NULL || pkt_limit < 1)
3477 return (EINVAL);
3478 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3479 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3480 return (ENXIO);
3481 if (!ifnet_is_attached(ifp, 1))
3482 return (ENXIO);
3483
3484 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
3485 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head, (void **)tail, cnt,
3486 len, &ptype);
3487 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3488 ifnet_decr_iorefcnt(ifp);
3489 return (rc);
3490 }
3491
3492 errno_t
3493 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3494 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3495 {
3496 errno_t rc;
3497 classq_pkt_type_t ptype;
3498 if (ifp == NULL || head == NULL || byte_limit < 1)
3499 return (EINVAL);
3500 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3501 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3502 return (ENXIO);
3503 if (!ifnet_is_attached(ifp, 1))
3504 return (ENXIO);
3505
3506 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3507 byte_limit, (void **)head, (void **)tail, cnt, len, &ptype);
3508 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3509 ifnet_decr_iorefcnt(ifp);
3510 return (rc);
3511 }
3512
3513 errno_t
3514 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3515 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3516 u_int32_t *len)
3517 {
3518 errno_t rc;
3519 classq_pkt_type_t ptype;
3520 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3521 !MBUF_VALID_SC(sc))
3522 return (EINVAL);
3523 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3524 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3525 return (ENXIO);
3526 if (!ifnet_is_attached(ifp, 1))
3527 return (ENXIO);
3528
3529 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
3530 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head,
3531 (void **)tail, cnt, len, &ptype);
3532 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3533 ifnet_decr_iorefcnt(ifp);
3534 return (rc);
3535 }
3536
3537 #if !CONFIG_EMBEDDED
3538 errno_t
3539 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3540 const struct sockaddr *dest, const char *dest_linkaddr,
3541 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3542 {
3543 if (pre != NULL)
3544 *pre = 0;
3545 if (post != NULL)
3546 *post = 0;
3547
3548 return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
3549 }
3550 #endif /* !CONFIG_EMBEDDED */
3551
3552 static int
3553 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3554 char **frame_header_p, protocol_family_t protocol_family)
3555 {
3556 struct ifnet_filter *filter;
3557
3558 /*
3559 * Pass the inbound packet to the interface filters
3560 */
3561 lck_mtx_lock_spin(&ifp->if_flt_lock);
3562 /* prevent filter list from changing in case we drop the lock */
3563 if_flt_monitor_busy(ifp);
3564 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3565 int result;
3566
3567 if (!filter->filt_skip && filter->filt_input != NULL &&
3568 (filter->filt_protocol == 0 ||
3569 filter->filt_protocol == protocol_family)) {
3570 lck_mtx_unlock(&ifp->if_flt_lock);
3571
3572 result = (*filter->filt_input)(filter->filt_cookie,
3573 ifp, protocol_family, m_p, frame_header_p);
3574
3575 lck_mtx_lock_spin(&ifp->if_flt_lock);
3576 if (result != 0) {
3577 /* we're done with the filter list */
3578 if_flt_monitor_unbusy(ifp);
3579 lck_mtx_unlock(&ifp->if_flt_lock);
3580 return (result);
3581 }
3582 }
3583 }
3584 /* we're done with the filter list */
3585 if_flt_monitor_unbusy(ifp);
3586 lck_mtx_unlock(&ifp->if_flt_lock);
3587
3588 /*
3589 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3590 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3591 */
3592 if (*m_p != NULL)
3593 (*m_p)->m_flags &= ~M_PROTO1;
3594
3595 return (0);
3596 }
3597
3598 static int
3599 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3600 protocol_family_t protocol_family)
3601 {
3602 struct ifnet_filter *filter;
3603
3604 /*
3605 * Pass the outbound packet to the interface filters
3606 */
3607 lck_mtx_lock_spin(&ifp->if_flt_lock);
3608 /* prevent filter list from changing in case we drop the lock */
3609 if_flt_monitor_busy(ifp);
3610 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3611 int result;
3612
3613 if (!filter->filt_skip && filter->filt_output != NULL &&
3614 (filter->filt_protocol == 0 ||
3615 filter->filt_protocol == protocol_family)) {
3616 lck_mtx_unlock(&ifp->if_flt_lock);
3617
3618 result = filter->filt_output(filter->filt_cookie, ifp,
3619 protocol_family, m_p);
3620
3621 lck_mtx_lock_spin(&ifp->if_flt_lock);
3622 if (result != 0) {
3623 /* we're done with the filter list */
3624 if_flt_monitor_unbusy(ifp);
3625 lck_mtx_unlock(&ifp->if_flt_lock);
3626 return (result);
3627 }
3628 }
3629 }
3630 /* we're done with the filter list */
3631 if_flt_monitor_unbusy(ifp);
3632 lck_mtx_unlock(&ifp->if_flt_lock);
3633
3634 return (0);
3635 }
3636
3637 static void
3638 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
3639 {
3640 int error;
3641
3642 if (ifproto->proto_kpi == kProtoKPI_v1) {
3643 /* Version 1 protocols get one packet at a time */
3644 while (m != NULL) {
3645 char * frame_header;
3646 mbuf_t next_packet;
3647
3648 next_packet = m->m_nextpkt;
3649 m->m_nextpkt = NULL;
3650 frame_header = m->m_pkthdr.pkt_hdr;
3651 m->m_pkthdr.pkt_hdr = NULL;
3652 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3653 ifproto->protocol_family, m, frame_header);
3654 if (error != 0 && error != EJUSTRETURN)
3655 m_freem(m);
3656 m = next_packet;
3657 }
3658 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
3659 /* Version 2 protocols support packet lists */
3660 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
3661 ifproto->protocol_family, m);
3662 if (error != 0 && error != EJUSTRETURN)
3663 m_freem_list(m);
3664 }
3665 }
3666
3667 static void
3668 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3669 struct dlil_threading_info *inp, boolean_t poll)
3670 {
3671 struct ifnet_stat_increment_param *d = &inp->stats;
3672
3673 if (s->packets_in != 0)
3674 d->packets_in += s->packets_in;
3675 if (s->bytes_in != 0)
3676 d->bytes_in += s->bytes_in;
3677 if (s->errors_in != 0)
3678 d->errors_in += s->errors_in;
3679
3680 if (s->packets_out != 0)
3681 d->packets_out += s->packets_out;
3682 if (s->bytes_out != 0)
3683 d->bytes_out += s->bytes_out;
3684 if (s->errors_out != 0)
3685 d->errors_out += s->errors_out;
3686
3687 if (s->collisions != 0)
3688 d->collisions += s->collisions;
3689 if (s->dropped != 0)
3690 d->dropped += s->dropped;
3691
3692 if (poll)
3693 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
3694 }
3695
3696 static void
3697 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3698 {
3699 struct ifnet_stat_increment_param *s = &inp->stats;
3700
3701 /*
3702 * Use of atomic operations is unavoidable here because
3703 * these stats may also be incremented elsewhere via KPIs.
3704 */
3705 if (s->packets_in != 0) {
3706 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3707 s->packets_in = 0;
3708 }
3709 if (s->bytes_in != 0) {
3710 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3711 s->bytes_in = 0;
3712 }
3713 if (s->errors_in != 0) {
3714 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3715 s->errors_in = 0;
3716 }
3717
3718 if (s->packets_out != 0) {
3719 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3720 s->packets_out = 0;
3721 }
3722 if (s->bytes_out != 0) {
3723 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3724 s->bytes_out = 0;
3725 }
3726 if (s->errors_out != 0) {
3727 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3728 s->errors_out = 0;
3729 }
3730
3731 if (s->collisions != 0) {
3732 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3733 s->collisions = 0;
3734 }
3735 if (s->dropped != 0) {
3736 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3737 s->dropped = 0;
3738 }
3739
3740 if (ifp->if_data_threshold != 0) {
3741 lck_mtx_convert_spin(&inp->input_lck);
3742 ifnet_notify_data_threshold(ifp);
3743 }
3744
3745 /*
3746 * No need for atomic operations as they are modified here
3747 * only from within the DLIL input thread context.
3748 */
3749 if (inp->tstats.packets != 0) {
3750 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3751 inp->tstats.packets = 0;
3752 }
3753 if (inp->tstats.bytes != 0) {
3754 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3755 inp->tstats.bytes = 0;
3756 }
3757 }
3758
3759 __private_extern__ void
3760 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3761 {
3762 return (dlil_input_packet_list_common(ifp, m, 0,
3763 IFNET_MODEL_INPUT_POLL_OFF, FALSE));
3764 }
3765
3766 __private_extern__ void
3767 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3768 u_int32_t cnt, ifnet_model_t mode)
3769 {
3770 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
3771 }
3772
3773 static void
3774 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3775 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
3776 {
3777 int error = 0;
3778 protocol_family_t protocol_family;
3779 mbuf_t next_packet;
3780 ifnet_t ifp = ifp_param;
3781 char * frame_header;
3782 struct if_proto * last_ifproto = NULL;
3783 mbuf_t pkt_first = NULL;
3784 mbuf_t * pkt_next = NULL;
3785 u_int32_t poll_thresh = 0, poll_ival = 0;
3786
3787 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
3788
3789 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
3790 (poll_ival = if_rxpoll_interval_pkts) > 0)
3791 poll_thresh = cnt;
3792
3793 while (m != NULL) {
3794 struct if_proto *ifproto = NULL;
3795 int iorefcnt = 0;
3796 uint32_t pktf_mask; /* pkt flags to preserve */
3797
3798 if (ifp_param == NULL)
3799 ifp = m->m_pkthdr.rcvif;
3800
3801 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
3802 poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
3803 ifnet_poll(ifp);
3804
3805 /* Check if this mbuf looks valid */
3806 MBUF_INPUT_CHECK(m, ifp);
3807
3808 next_packet = m->m_nextpkt;
3809 m->m_nextpkt = NULL;
3810 frame_header = m->m_pkthdr.pkt_hdr;
3811 m->m_pkthdr.pkt_hdr = NULL;
3812
3813 /*
3814 * Get an IO reference count if the interface is not
3815 * loopback (lo0) and it is attached; lo0 never goes
3816 * away, so optimize for that.
3817 */
3818 if (ifp != lo_ifp) {
3819 if (!ifnet_is_attached(ifp, 1)) {
3820 m_freem(m);
3821 goto next;
3822 }
3823 iorefcnt = 1;
3824 /*
3825 * Preserve the time stamp if it was set.
3826 */
3827 pktf_mask = PKTF_TS_VALID;
3828 } else {
3829 /*
3830 * If this arrived on lo0, preserve interface addr
3831 * info to allow for connectivity between loopback
3832 * and local interface addresses.
3833 */
3834 pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
3835 }
3836
3837 /* make sure packet comes in clean */
3838 m_classifier_init(m, pktf_mask);
3839
3840 ifp_inc_traffic_class_in(ifp, m);
3841
3842 /* find which protocol family this packet is for */
3843 ifnet_lock_shared(ifp);
3844 error = (*ifp->if_demux)(ifp, m, frame_header,
3845 &protocol_family);
3846 ifnet_lock_done(ifp);
3847 if (error != 0) {
3848 if (error == EJUSTRETURN)
3849 goto next;
3850 protocol_family = 0;
3851 }
3852
3853 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
3854 !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
3855 dlil_input_cksum_dbg(ifp, m, frame_header,
3856 protocol_family);
3857
3858 /*
3859 * For partial checksum offload, we expect the driver to
3860 * set the start offset indicating the start of the span
3861 * that is covered by the hardware-computed checksum;
3862 * adjust this start offset accordingly because the data
3863 * pointer has been advanced beyond the link-layer header.
3864 *
3865 * Don't adjust if the interface is a bridge member, as
3866 * the adjustment will occur from the context of the
3867 * bridge interface during input.
3868 */
3869 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
3870 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
3871 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
3872 int adj;
3873
3874 if (frame_header == NULL ||
3875 frame_header < (char *)mbuf_datastart(m) ||
3876 frame_header > (char *)m->m_data ||
3877 (adj = (m->m_data - frame_header)) >
3878 m->m_pkthdr.csum_rx_start) {
3879 m->m_pkthdr.csum_data = 0;
3880 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
3881 hwcksum_in_invalidated++;
3882 } else {
3883 m->m_pkthdr.csum_rx_start -= adj;
3884 }
3885 }
3886
3887 pktap_input(ifp, protocol_family, m, frame_header);
3888
3889 if (m->m_flags & (M_BCAST|M_MCAST))
3890 atomic_add_64(&ifp->if_imcasts, 1);
3891
3892 /* run interface filters, exclude VLAN packets PR-3586856 */
3893 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
3894 error = dlil_interface_filters_input(ifp, &m,
3895 &frame_header, protocol_family);
3896 if (error != 0) {
3897 if (error != EJUSTRETURN)
3898 m_freem(m);
3899 goto next;
3900 }
3901 }
3902 if (error != 0 || ((m->m_flags & M_PROMISC) != 0)) {
3903 m_freem(m);
3904 goto next;
3905 }
3906
3907 /* Lookup the protocol attachment to this interface */
3908 if (protocol_family == 0) {
3909 ifproto = NULL;
3910 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
3911 (last_ifproto->protocol_family == protocol_family)) {
3912 VERIFY(ifproto == NULL);
3913 ifproto = last_ifproto;
3914 if_proto_ref(last_ifproto);
3915 } else {
3916 VERIFY(ifproto == NULL);
3917 ifnet_lock_shared(ifp);
3918 /* callee holds a proto refcnt upon success */
3919 ifproto = find_attached_proto(ifp, protocol_family);
3920 ifnet_lock_done(ifp);
3921 }
3922 if (ifproto == NULL) {
3923 /* no protocol for this packet, discard */
3924 m_freem(m);
3925 goto next;
3926 }
3927 if (ifproto != last_ifproto) {
3928 if (last_ifproto != NULL) {
3929 /* pass up the list for the previous protocol */
3930 dlil_ifproto_input(last_ifproto, pkt_first);
3931 pkt_first = NULL;
3932 if_proto_free(last_ifproto);
3933 }
3934 last_ifproto = ifproto;
3935 if_proto_ref(ifproto);
3936 }
3937 /* extend the list */
3938 m->m_pkthdr.pkt_hdr = frame_header;
3939 if (pkt_first == NULL) {
3940 pkt_first = m;
3941 } else {
3942 *pkt_next = m;
3943 }
3944 pkt_next = &m->m_nextpkt;
3945
3946 next:
3947 if (next_packet == NULL && last_ifproto != NULL) {
3948 /* pass up the last list of packets */
3949 dlil_ifproto_input(last_ifproto, pkt_first);
3950 if_proto_free(last_ifproto);
3951 last_ifproto = NULL;
3952 }
3953 if (ifproto != NULL) {
3954 if_proto_free(ifproto);
3955 ifproto = NULL;
3956 }
3957
3958 m = next_packet;
3959
3960 /* update the driver's multicast filter, if needed */
3961 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
3962 ifp->if_updatemcasts = 0;
3963 if (iorefcnt == 1)
3964 ifnet_decr_iorefcnt(ifp);
3965 }
3966
3967 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
3968 }
3969
3970 errno_t
3971 if_mcasts_update(struct ifnet *ifp)
3972 {
3973 errno_t err;
3974
3975 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
3976 if (err == EAFNOSUPPORT)
3977 err = 0;
3978 printf("%s: %s %d suspended link-layer multicast membership(s) "
3979 "(err=%d)\n", if_name(ifp),
3980 (err == 0 ? "successfully restored" : "failed to restore"),
3981 ifp->if_updatemcasts, err);
3982
3983 /* just return success */
3984 return (0);
3985 }
3986
3987 /* If ifp is set, we will increment the generation for the interface */
3988 int
3989 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
3990 {
3991 if (ifp != NULL) {
3992 ifnet_increment_generation(ifp);
3993 }
3994
3995 #if NECP
3996 necp_update_all_clients();
3997 #endif /* NECP */
3998
3999 return (kev_post_msg(event));
4000 }
4001
4002 #define TMP_IF_PROTO_ARR_SIZE 10
4003 static int
4004 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
4005 {
4006 struct ifnet_filter *filter = NULL;
4007 struct if_proto *proto = NULL;
4008 int if_proto_count = 0;
4009 struct if_proto **tmp_ifproto_arr = NULL;
4010 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4011 int tmp_ifproto_arr_idx = 0;
4012 bool tmp_malloc = false;
4013
4014 /*
4015 * Pass the event to the interface filters
4016 */
4017 lck_mtx_lock_spin(&ifp->if_flt_lock);
4018 /* prevent filter list from changing in case we drop the lock */
4019 if_flt_monitor_busy(ifp);
4020 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4021 if (filter->filt_event != NULL) {
4022 lck_mtx_unlock(&ifp->if_flt_lock);
4023
4024 filter->filt_event(filter->filt_cookie, ifp,
4025 filter->filt_protocol, event);
4026
4027 lck_mtx_lock_spin(&ifp->if_flt_lock);
4028 }
4029 }
4030 /* we're done with the filter list */
4031 if_flt_monitor_unbusy(ifp);
4032 lck_mtx_unlock(&ifp->if_flt_lock);
4033
4034 /* Get an io ref count if the interface is attached */
4035 if (!ifnet_is_attached(ifp, 1))
4036 goto done;
4037
4038 /*
4039 * An embedded tmp_list_entry in if_proto may still get
4040 * over-written by another thread after giving up ifnet lock,
4041 * therefore we are avoiding embedded pointers here.
4042 */
4043 ifnet_lock_shared(ifp);
4044 if_proto_count = dlil_ifp_proto_count(ifp);
4045 if (if_proto_count) {
4046 int i;
4047 VERIFY(ifp->if_proto_hash != NULL);
4048 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
4049 tmp_ifproto_arr = tmp_ifproto_stack_arr;
4050 } else {
4051 MALLOC(tmp_ifproto_arr, struct if_proto **,
4052 sizeof (*tmp_ifproto_arr) * if_proto_count,
4053 M_TEMP, M_ZERO);
4054 if (tmp_ifproto_arr == NULL) {
4055 ifnet_lock_done(ifp);
4056 goto cleanup;
4057 }
4058 tmp_malloc = true;
4059 }
4060
4061 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
4062 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
4063 next_hash) {
4064 if_proto_ref(proto);
4065 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
4066 tmp_ifproto_arr_idx++;
4067 }
4068 }
4069 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
4070 }
4071 ifnet_lock_done(ifp);
4072
4073 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
4074 tmp_ifproto_arr_idx++) {
4075 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
4076 VERIFY(proto != NULL);
4077 proto_media_event eventp =
4078 (proto->proto_kpi == kProtoKPI_v1 ?
4079 proto->kpi.v1.event :
4080 proto->kpi.v2.event);
4081
4082 if (eventp != NULL) {
4083 eventp(ifp, proto->protocol_family,
4084 event);
4085 }
4086 if_proto_free(proto);
4087 }
4088
4089 cleanup:
4090 if (tmp_malloc) {
4091 FREE(tmp_ifproto_arr, M_TEMP);
4092 }
4093
4094 /* Pass the event to the interface */
4095 if (ifp->if_event != NULL)
4096 ifp->if_event(ifp, event);
4097
4098 /* Release the io ref count */
4099 ifnet_decr_iorefcnt(ifp);
4100 done:
4101 return (dlil_post_complete_msg(update_generation ? ifp : NULL, event));
4102 }
4103
4104 errno_t
4105 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
4106 {
4107 struct kev_msg kev_msg;
4108 int result = 0;
4109
4110 if (ifp == NULL || event == NULL)
4111 return (EINVAL);
4112
4113 bzero(&kev_msg, sizeof (kev_msg));
4114 kev_msg.vendor_code = event->vendor_code;
4115 kev_msg.kev_class = event->kev_class;
4116 kev_msg.kev_subclass = event->kev_subclass;
4117 kev_msg.event_code = event->event_code;
4118 kev_msg.dv[0].data_ptr = &event->event_data[0];
4119 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
4120 kev_msg.dv[1].data_length = 0;
4121
4122 result = dlil_event_internal(ifp, &kev_msg, TRUE);
4123
4124 return (result);
4125 }
4126
4127 #if CONFIG_MACF_NET
4128 #include <netinet/ip6.h>
4129 #include <netinet/ip.h>
4130 static int
4131 dlil_get_socket_type(struct mbuf **mp, int family, int raw)
4132 {
4133 struct mbuf *m;
4134 struct ip *ip;
4135 struct ip6_hdr *ip6;
4136 int type = SOCK_RAW;
4137
4138 if (!raw) {
4139 switch (family) {
4140 case PF_INET:
4141 m = m_pullup(*mp, sizeof(struct ip));
4142 if (m == NULL)
4143 break;
4144 *mp = m;
4145 ip = mtod(m, struct ip *);
4146 if (ip->ip_p == IPPROTO_TCP)
4147 type = SOCK_STREAM;
4148 else if (ip->ip_p == IPPROTO_UDP)
4149 type = SOCK_DGRAM;
4150 break;
4151 case PF_INET6:
4152 m = m_pullup(*mp, sizeof(struct ip6_hdr));
4153 if (m == NULL)
4154 break;
4155 *mp = m;
4156 ip6 = mtod(m, struct ip6_hdr *);
4157 if (ip6->ip6_nxt == IPPROTO_TCP)
4158 type = SOCK_STREAM;
4159 else if (ip6->ip6_nxt == IPPROTO_UDP)
4160 type = SOCK_DGRAM;
4161 break;
4162 }
4163 }
4164
4165 return (type);
4166 }
4167 #endif
4168
4169 static void
4170 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4171 {
4172 mbuf_t n = m;
4173 int chainlen = 0;
4174
4175 while (n != NULL) {
4176 chainlen++;
4177 n = n->m_next;
4178 }
4179 switch (chainlen) {
4180 case 0:
4181 break;
4182 case 1:
4183 atomic_add_64(&cls->cls_one, 1);
4184 break;
4185 case 2:
4186 atomic_add_64(&cls->cls_two, 1);
4187 break;
4188 case 3:
4189 atomic_add_64(&cls->cls_three, 1);
4190 break;
4191 case 4:
4192 atomic_add_64(&cls->cls_four, 1);
4193 break;
4194 case 5:
4195 default:
4196 atomic_add_64(&cls->cls_five_or_more, 1);
4197 break;
4198 }
4199 }
4200
4201 /*
4202 * dlil_output
4203 *
4204 * Caller should have a lock on the protocol domain if the protocol
4205 * doesn't support finer grained locking. In most cases, the lock
4206 * will be held from the socket layer and won't be released until
4207 * we return back to the socket layer.
4208 *
4209 * This does mean that we must take a protocol lock before we take
4210 * an interface lock if we're going to take both. This makes sense
4211 * because a protocol is likely to interact with an ifp while it
4212 * is under the protocol lock.
4213 *
4214 * An advisory code will be returned if adv is not null. This
4215 * can be used to provide feedback about interface queues to the
4216 * application.
4217 */
4218 errno_t
4219 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
4220 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
4221 {
4222 char *frame_type = NULL;
4223 char *dst_linkaddr = NULL;
4224 int retval = 0;
4225 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4226 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
4227 struct if_proto *proto = NULL;
4228 mbuf_t m;
4229 mbuf_t send_head = NULL;
4230 mbuf_t *send_tail = &send_head;
4231 int iorefcnt = 0;
4232 u_int32_t pre = 0, post = 0;
4233 u_int32_t fpkts = 0, fbytes = 0;
4234 int32_t flen = 0;
4235 struct timespec now;
4236 u_int64_t now_nsec;
4237
4238 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4239
4240 /*
4241 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4242 * from happening while this operation is in progress
4243 */
4244 if (!ifnet_is_attached(ifp, 1)) {
4245 retval = ENXIO;
4246 goto cleanup;
4247 }
4248 iorefcnt = 1;
4249
4250 VERIFY(ifp->if_output_dlil != NULL);
4251
4252 /* update the driver's multicast filter, if needed */
4253 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
4254 ifp->if_updatemcasts = 0;
4255
4256 frame_type = frame_type_buffer;
4257 dst_linkaddr = dst_linkaddr_buffer;
4258
4259 if (raw == 0) {
4260 ifnet_lock_shared(ifp);
4261 /* callee holds a proto refcnt upon success */
4262 proto = find_attached_proto(ifp, proto_family);
4263 if (proto == NULL) {
4264 ifnet_lock_done(ifp);
4265 retval = ENXIO;
4266 goto cleanup;
4267 }
4268 ifnet_lock_done(ifp);
4269 }
4270
4271 preout_again:
4272 if (packetlist == NULL)
4273 goto cleanup;
4274
4275 m = packetlist;
4276 packetlist = packetlist->m_nextpkt;
4277 m->m_nextpkt = NULL;
4278
4279 if (raw == 0) {
4280 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
4281 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
4282 retval = 0;
4283 if (preoutp != NULL) {
4284 retval = preoutp(ifp, proto_family, &m, dest, route,
4285 frame_type, dst_linkaddr);
4286
4287 if (retval != 0) {
4288 if (retval == EJUSTRETURN)
4289 goto preout_again;
4290 m_freem(m);
4291 goto cleanup;
4292 }
4293 }
4294 }
4295
4296 #if CONFIG_MACF_NET
4297 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
4298 dlil_get_socket_type(&m, proto_family, raw));
4299 if (retval != 0) {
4300 m_freem(m);
4301 goto cleanup;
4302 }
4303 #endif
4304
4305 do {
4306 #if CONFIG_DTRACE
4307 if (!raw && proto_family == PF_INET) {
4308 struct ip *ip = mtod(m, struct ip *);
4309 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4310 struct ip *, ip, struct ifnet *, ifp,
4311 struct ip *, ip, struct ip6_hdr *, NULL);
4312
4313 } else if (!raw && proto_family == PF_INET6) {
4314 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
4315 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4316 struct ip6_hdr *, ip6, struct ifnet *, ifp,
4317 struct ip *, NULL, struct ip6_hdr *, ip6);
4318 }
4319 #endif /* CONFIG_DTRACE */
4320
4321 if (raw == 0 && ifp->if_framer != NULL) {
4322 int rcvif_set = 0;
4323
4324 /*
4325 * If this is a broadcast packet that needs to be
4326 * looped back into the system, set the inbound ifp
4327 * to that of the outbound ifp. This will allow
4328 * us to determine that it is a legitimate packet
4329 * for the system. Only set the ifp if it's not
4330 * already set, just to be safe.
4331 */
4332 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
4333 m->m_pkthdr.rcvif == NULL) {
4334 m->m_pkthdr.rcvif = ifp;
4335 rcvif_set = 1;
4336 }
4337
4338 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
4339 frame_type, &pre, &post);
4340 if (retval != 0) {
4341 if (retval != EJUSTRETURN)
4342 m_freem(m);
4343 goto next;
4344 }
4345
4346 /*
4347 * For partial checksum offload, adjust the start
4348 * and stuff offsets based on the prepended header.
4349 */
4350 if ((m->m_pkthdr.csum_flags &
4351 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4352 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4353 m->m_pkthdr.csum_tx_stuff += pre;
4354 m->m_pkthdr.csum_tx_start += pre;
4355 }
4356
4357 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
4358 dlil_output_cksum_dbg(ifp, m, pre,
4359 proto_family);
4360
4361 /*
4362 * Clear the ifp if it was set above, and to be
4363 * safe, only if it is still the same as the
4364 * outbound ifp we have in context. If it was
4365 * looped back, then a copy of it was sent to the
4366 * loopback interface with the rcvif set, and we
4367 * are clearing the one that will go down to the
4368 * layer below.
4369 */
4370 if (rcvif_set && m->m_pkthdr.rcvif == ifp)
4371 m->m_pkthdr.rcvif = NULL;
4372 }
4373
4374 /*
4375 * Let interface filters (if any) do their thing ...
4376 */
4377 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4378 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
4379 retval = dlil_interface_filters_output(ifp,
4380 &m, proto_family);
4381 if (retval != 0) {
4382 if (retval != EJUSTRETURN)
4383 m_freem(m);
4384 goto next;
4385 }
4386 }
4387 /*
4388 * Strip away M_PROTO1 bit prior to sending packet
4389 * to the driver as this field may be used by the driver
4390 */
4391 m->m_flags &= ~M_PROTO1;
4392
4393 /*
4394 * If the underlying interface is not capable of handling a
4395 * packet whose data portion spans across physically disjoint
4396 * pages, we need to "normalize" the packet so that we pass
4397 * down a chain of mbufs where each mbuf points to a span that
4398 * resides in the system page boundary. If the packet does
4399 * not cross page(s), the following is a no-op.
4400 */
4401 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
4402 if ((m = m_normalize(m)) == NULL)
4403 goto next;
4404 }
4405
4406 /*
4407 * If this is a TSO packet, make sure the interface still
4408 * advertise TSO capability.
4409 */
4410 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
4411 retval = EMSGSIZE;
4412 m_freem(m);
4413 goto cleanup;
4414 }
4415
4416 ifp_inc_traffic_class_out(ifp, m);
4417 pktap_output(ifp, proto_family, m, pre, post);
4418
4419 /*
4420 * Count the number of elements in the mbuf chain
4421 */
4422 if (tx_chain_len_count) {
4423 dlil_count_chain_len(m, &tx_chain_len_stats);
4424 }
4425
4426 /*
4427 * Record timestamp; ifnet_enqueue() will use this info
4428 * rather than redoing the work. An optimization could
4429 * involve doing this just once at the top, if there are
4430 * no interface filters attached, but that's probably
4431 * not a big deal.
4432 */
4433 nanouptime(&now);
4434 net_timernsec(&now, &now_nsec);
4435 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
4436
4437 /*
4438 * Discard partial sum information if this packet originated
4439 * from another interface; the packet would already have the
4440 * final checksum and we shouldn't recompute it.
4441 */
4442 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
4443 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID|CSUM_PARTIAL)) ==
4444 (CSUM_DATA_VALID|CSUM_PARTIAL)) {
4445 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4446 m->m_pkthdr.csum_data = 0;
4447 }
4448
4449 /*
4450 * Finally, call the driver.
4451 */
4452 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
4453 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4454 flen += (m_pktlen(m) - (pre + post));
4455 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4456 }
4457 *send_tail = m;
4458 send_tail = &m->m_nextpkt;
4459 } else {
4460 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4461 flen = (m_pktlen(m) - (pre + post));
4462 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4463 } else {
4464 flen = 0;
4465 }
4466 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4467 0, 0, 0, 0, 0);
4468 retval = (*ifp->if_output_dlil)(ifp, m);
4469 if (retval == EQFULL || retval == EQSUSPENDED) {
4470 if (adv != NULL && adv->code == FADV_SUCCESS) {
4471 adv->code = (retval == EQFULL ?
4472 FADV_FLOW_CONTROLLED :
4473 FADV_SUSPENDED);
4474 }
4475 retval = 0;
4476 }
4477 if (retval == 0 && flen > 0) {
4478 fbytes += flen;
4479 fpkts++;
4480 }
4481 if (retval != 0 && dlil_verbose) {
4482 printf("%s: output error on %s retval = %d\n",
4483 __func__, if_name(ifp),
4484 retval);
4485 }
4486 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
4487 0, 0, 0, 0, 0);
4488 }
4489 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4490
4491 next:
4492 m = packetlist;
4493 if (m != NULL) {
4494 packetlist = packetlist->m_nextpkt;
4495 m->m_nextpkt = NULL;
4496 }
4497 } while (m != NULL);
4498
4499 if (send_head != NULL) {
4500 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4501 0, 0, 0, 0, 0);
4502 if (ifp->if_eflags & IFEF_SENDLIST) {
4503 retval = (*ifp->if_output_dlil)(ifp, send_head);
4504 if (retval == EQFULL || retval == EQSUSPENDED) {
4505 if (adv != NULL) {
4506 adv->code = (retval == EQFULL ?
4507 FADV_FLOW_CONTROLLED :
4508 FADV_SUSPENDED);
4509 }
4510 retval = 0;
4511 }
4512 if (retval == 0 && flen > 0) {
4513 fbytes += flen;
4514 fpkts++;
4515 }
4516 if (retval != 0 && dlil_verbose) {
4517 printf("%s: output error on %s retval = %d\n",
4518 __func__, if_name(ifp), retval);
4519 }
4520 } else {
4521 struct mbuf *send_m;
4522 int enq_cnt = 0;
4523 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
4524 while (send_head != NULL) {
4525 send_m = send_head;
4526 send_head = send_m->m_nextpkt;
4527 send_m->m_nextpkt = NULL;
4528 retval = (*ifp->if_output_dlil)(ifp, send_m);
4529 if (retval == EQFULL || retval == EQSUSPENDED) {
4530 if (adv != NULL) {
4531 adv->code = (retval == EQFULL ?
4532 FADV_FLOW_CONTROLLED :
4533 FADV_SUSPENDED);
4534 }
4535 retval = 0;
4536 }
4537 if (retval == 0) {
4538 enq_cnt++;
4539 if (flen > 0)
4540 fpkts++;
4541 }
4542 if (retval != 0 && dlil_verbose) {
4543 printf("%s: output error on %s "
4544 "retval = %d\n",
4545 __func__, if_name(ifp), retval);
4546 }
4547 }
4548 if (enq_cnt > 0) {
4549 fbytes += flen;
4550 ifnet_start(ifp);
4551 }
4552 }
4553 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4554 }
4555
4556 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4557
4558 cleanup:
4559 if (fbytes > 0)
4560 ifp->if_fbytes += fbytes;
4561 if (fpkts > 0)
4562 ifp->if_fpackets += fpkts;
4563 if (proto != NULL)
4564 if_proto_free(proto);
4565 if (packetlist) /* if any packets are left, clean up */
4566 mbuf_freem_list(packetlist);
4567 if (retval == EJUSTRETURN)
4568 retval = 0;
4569 if (iorefcnt == 1)
4570 ifnet_decr_iorefcnt(ifp);
4571
4572 return (retval);
4573 }
4574
4575 errno_t
4576 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
4577 void *ioctl_arg)
4578 {
4579 struct ifnet_filter *filter;
4580 int retval = EOPNOTSUPP;
4581 int result = 0;
4582
4583 if (ifp == NULL || ioctl_code == 0)
4584 return (EINVAL);
4585
4586 /* Get an io ref count if the interface is attached */
4587 if (!ifnet_is_attached(ifp, 1))
4588 return (EOPNOTSUPP);
4589
4590 /*
4591 * Run the interface filters first.
4592 * We want to run all filters before calling the protocol,
4593 * interface family, or interface.
4594 */
4595 lck_mtx_lock_spin(&ifp->if_flt_lock);
4596 /* prevent filter list from changing in case we drop the lock */
4597 if_flt_monitor_busy(ifp);
4598 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4599 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
4600 filter->filt_protocol == proto_fam)) {
4601 lck_mtx_unlock(&ifp->if_flt_lock);
4602
4603 result = filter->filt_ioctl(filter->filt_cookie, ifp,
4604 proto_fam, ioctl_code, ioctl_arg);
4605
4606 lck_mtx_lock_spin(&ifp->if_flt_lock);
4607
4608 /* Only update retval if no one has handled the ioctl */
4609 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4610 if (result == ENOTSUP)
4611 result = EOPNOTSUPP;
4612 retval = result;
4613 if (retval != 0 && retval != EOPNOTSUPP) {
4614 /* we're done with the filter list */
4615 if_flt_monitor_unbusy(ifp);
4616 lck_mtx_unlock(&ifp->if_flt_lock);
4617 goto cleanup;
4618 }
4619 }
4620 }
4621 }
4622 /* we're done with the filter list */
4623 if_flt_monitor_unbusy(ifp);
4624 lck_mtx_unlock(&ifp->if_flt_lock);
4625
4626 /* Allow the protocol to handle the ioctl */
4627 if (proto_fam != 0) {
4628 struct if_proto *proto;
4629
4630 /* callee holds a proto refcnt upon success */
4631 ifnet_lock_shared(ifp);
4632 proto = find_attached_proto(ifp, proto_fam);
4633 ifnet_lock_done(ifp);
4634 if (proto != NULL) {
4635 proto_media_ioctl ioctlp =
4636 (proto->proto_kpi == kProtoKPI_v1 ?
4637 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
4638 result = EOPNOTSUPP;
4639 if (ioctlp != NULL)
4640 result = ioctlp(ifp, proto_fam, ioctl_code,
4641 ioctl_arg);
4642 if_proto_free(proto);
4643
4644 /* Only update retval if no one has handled the ioctl */
4645 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4646 if (result == ENOTSUP)
4647 result = EOPNOTSUPP;
4648 retval = result;
4649 if (retval && retval != EOPNOTSUPP)
4650 goto cleanup;
4651 }
4652 }
4653 }
4654
4655 /* retval is either 0 or EOPNOTSUPP */
4656
4657 /*
4658 * Let the interface handle this ioctl.
4659 * If it returns EOPNOTSUPP, ignore that, we may have
4660 * already handled this in the protocol or family.
4661 */
4662 if (ifp->if_ioctl)
4663 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
4664
4665 /* Only update retval if no one has handled the ioctl */
4666 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4667 if (result == ENOTSUP)
4668 result = EOPNOTSUPP;
4669 retval = result;
4670 if (retval && retval != EOPNOTSUPP) {
4671 goto cleanup;
4672 }
4673 }
4674
4675 cleanup:
4676 if (retval == EJUSTRETURN)
4677 retval = 0;
4678
4679 ifnet_decr_iorefcnt(ifp);
4680
4681 return (retval);
4682 }
4683
4684 __private_extern__ errno_t
4685 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
4686 {
4687 errno_t error = 0;
4688
4689
4690 if (ifp->if_set_bpf_tap) {
4691 /* Get an io reference on the interface if it is attached */
4692 if (!ifnet_is_attached(ifp, 1))
4693 return (ENXIO);
4694 error = ifp->if_set_bpf_tap(ifp, mode, callback);
4695 ifnet_decr_iorefcnt(ifp);
4696 }
4697 return (error);
4698 }
4699
4700 errno_t
4701 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4702 struct sockaddr *ll_addr, size_t ll_len)
4703 {
4704 errno_t result = EOPNOTSUPP;
4705 struct if_proto *proto;
4706 const struct sockaddr *verify;
4707 proto_media_resolve_multi resolvep;
4708
4709 if (!ifnet_is_attached(ifp, 1))
4710 return (result);
4711
4712 bzero(ll_addr, ll_len);
4713
4714 /* Call the protocol first; callee holds a proto refcnt upon success */
4715 ifnet_lock_shared(ifp);
4716 proto = find_attached_proto(ifp, proto_addr->sa_family);
4717 ifnet_lock_done(ifp);
4718 if (proto != NULL) {
4719 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4720 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4721 if (resolvep != NULL)
4722 result = resolvep(ifp, proto_addr,
4723 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
4724 if_proto_free(proto);
4725 }
4726
4727 /* Let the interface verify the multicast address */
4728 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4729 if (result == 0)
4730 verify = ll_addr;
4731 else
4732 verify = proto_addr;
4733 result = ifp->if_check_multi(ifp, verify);
4734 }
4735
4736 ifnet_decr_iorefcnt(ifp);
4737 return (result);
4738 }
4739
4740 __private_extern__ errno_t
4741 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4742 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4743 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4744 {
4745 struct if_proto *proto;
4746 errno_t result = 0;
4747
4748 /* callee holds a proto refcnt upon success */
4749 ifnet_lock_shared(ifp);
4750 proto = find_attached_proto(ifp, target_proto->sa_family);
4751 ifnet_lock_done(ifp);
4752 if (proto == NULL) {
4753 result = ENOTSUP;
4754 } else {
4755 proto_media_send_arp arpp;
4756 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4757 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4758 if (arpp == NULL) {
4759 result = ENOTSUP;
4760 } else {
4761 switch (arpop) {
4762 case ARPOP_REQUEST:
4763 arpstat.txrequests++;
4764 if (target_hw != NULL)
4765 arpstat.txurequests++;
4766 break;
4767 case ARPOP_REPLY:
4768 arpstat.txreplies++;
4769 break;
4770 }
4771 result = arpp(ifp, arpop, sender_hw, sender_proto,
4772 target_hw, target_proto);
4773 }
4774 if_proto_free(proto);
4775 }
4776
4777 return (result);
4778 }
4779
4780 struct net_thread_marks { };
4781 static const struct net_thread_marks net_thread_marks_base = { };
4782
4783 __private_extern__ const net_thread_marks_t net_thread_marks_none =
4784 &net_thread_marks_base;
4785
4786 __private_extern__ net_thread_marks_t
4787 net_thread_marks_push(u_int32_t push)
4788 {
4789 static const char *const base = (const void*)&net_thread_marks_base;
4790 u_int32_t pop = 0;
4791
4792 if (push != 0) {
4793 struct uthread *uth = get_bsdthread_info(current_thread());
4794
4795 pop = push & ~uth->uu_network_marks;
4796 if (pop != 0)
4797 uth->uu_network_marks |= pop;
4798 }
4799
4800 return ((net_thread_marks_t)&base[pop]);
4801 }
4802
4803 __private_extern__ net_thread_marks_t
4804 net_thread_unmarks_push(u_int32_t unpush)
4805 {
4806 static const char *const base = (const void*)&net_thread_marks_base;
4807 u_int32_t unpop = 0;
4808
4809 if (unpush != 0) {
4810 struct uthread *uth = get_bsdthread_info(current_thread());
4811
4812 unpop = unpush & uth->uu_network_marks;
4813 if (unpop != 0)
4814 uth->uu_network_marks &= ~unpop;
4815 }
4816
4817 return ((net_thread_marks_t)&base[unpop]);
4818 }
4819
4820 __private_extern__ void
4821 net_thread_marks_pop(net_thread_marks_t popx)
4822 {
4823 static const char *const base = (const void*)&net_thread_marks_base;
4824 const ptrdiff_t pop = (const char *)popx - (const char *)base;
4825
4826 if (pop != 0) {
4827 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4828 struct uthread *uth = get_bsdthread_info(current_thread());
4829
4830 VERIFY((pop & ones) == pop);
4831 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
4832 uth->uu_network_marks &= ~pop;
4833 }
4834 }
4835
4836 __private_extern__ void
4837 net_thread_unmarks_pop(net_thread_marks_t unpopx)
4838 {
4839 static const char *const base = (const void*)&net_thread_marks_base;
4840 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
4841
4842 if (unpop != 0) {
4843 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4844 struct uthread *uth = get_bsdthread_info(current_thread());
4845
4846 VERIFY((unpop & ones) == unpop);
4847 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
4848 uth->uu_network_marks |= unpop;
4849 }
4850 }
4851
4852 __private_extern__ u_int32_t
4853 net_thread_is_marked(u_int32_t check)
4854 {
4855 if (check != 0) {
4856 struct uthread *uth = get_bsdthread_info(current_thread());
4857 return (uth->uu_network_marks & check);
4858 }
4859 else
4860 return (0);
4861 }
4862
4863 __private_extern__ u_int32_t
4864 net_thread_is_unmarked(u_int32_t check)
4865 {
4866 if (check != 0) {
4867 struct uthread *uth = get_bsdthread_info(current_thread());
4868 return (~uth->uu_network_marks & check);
4869 }
4870 else
4871 return (0);
4872 }
4873
4874 static __inline__ int
4875 _is_announcement(const struct sockaddr_in * sender_sin,
4876 const struct sockaddr_in * target_sin)
4877 {
4878 if (sender_sin == NULL) {
4879 return (FALSE);
4880 }
4881 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
4882 }
4883
4884 __private_extern__ errno_t
4885 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4886 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4887 const struct sockaddr *target_proto0, u_int32_t rtflags)
4888 {
4889 errno_t result = 0;
4890 const struct sockaddr_in * sender_sin;
4891 const struct sockaddr_in * target_sin;
4892 struct sockaddr_inarp target_proto_sinarp;
4893 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
4894
4895 if (target_proto == NULL || (sender_proto != NULL &&
4896 sender_proto->sa_family != target_proto->sa_family))
4897 return (EINVAL);
4898
4899 /*
4900 * If the target is a (default) router, provide that
4901 * information to the send_arp callback routine.
4902 */
4903 if (rtflags & RTF_ROUTER) {
4904 bcopy(target_proto, &target_proto_sinarp,
4905 sizeof (struct sockaddr_in));
4906 target_proto_sinarp.sin_other |= SIN_ROUTER;
4907 target_proto = (struct sockaddr *)&target_proto_sinarp;
4908 }
4909
4910 /*
4911 * If this is an ARP request and the target IP is IPv4LL,
4912 * send the request on all interfaces. The exception is
4913 * an announcement, which must only appear on the specific
4914 * interface.
4915 */
4916 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
4917 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
4918 if (target_proto->sa_family == AF_INET &&
4919 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4920 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4921 !_is_announcement(target_sin, sender_sin)) {
4922 ifnet_t *ifp_list;
4923 u_int32_t count;
4924 u_int32_t ifp_on;
4925
4926 result = ENOTSUP;
4927
4928 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4929 for (ifp_on = 0; ifp_on < count; ifp_on++) {
4930 errno_t new_result;
4931 ifaddr_t source_hw = NULL;
4932 ifaddr_t source_ip = NULL;
4933 struct sockaddr_in source_ip_copy;
4934 struct ifnet *cur_ifp = ifp_list[ifp_on];
4935
4936 /*
4937 * Only arp on interfaces marked for IPv4LL
4938 * ARPing. This may mean that we don't ARP on
4939 * the interface the subnet route points to.
4940 */
4941 if (!(cur_ifp->if_eflags & IFEF_ARPLL))
4942 continue;
4943
4944 /* Find the source IP address */
4945 ifnet_lock_shared(cur_ifp);
4946 source_hw = cur_ifp->if_lladdr;
4947 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4948 ifa_link) {
4949 IFA_LOCK(source_ip);
4950 if (source_ip->ifa_addr != NULL &&
4951 source_ip->ifa_addr->sa_family ==
4952 AF_INET) {
4953 /* Copy the source IP address */
4954 source_ip_copy =
4955 *(struct sockaddr_in *)
4956 (void *)source_ip->ifa_addr;
4957 IFA_UNLOCK(source_ip);
4958 break;
4959 }
4960 IFA_UNLOCK(source_ip);
4961 }
4962
4963 /* No IP Source, don't arp */
4964 if (source_ip == NULL) {
4965 ifnet_lock_done(cur_ifp);
4966 continue;
4967 }
4968
4969 IFA_ADDREF(source_hw);
4970 ifnet_lock_done(cur_ifp);
4971
4972 /* Send the ARP */
4973 new_result = dlil_send_arp_internal(cur_ifp,
4974 arpop, (struct sockaddr_dl *)(void *)
4975 source_hw->ifa_addr,
4976 (struct sockaddr *)&source_ip_copy, NULL,
4977 target_proto);
4978
4979 IFA_REMREF(source_hw);
4980 if (result == ENOTSUP) {
4981 result = new_result;
4982 }
4983 }
4984 ifnet_list_free(ifp_list);
4985 }
4986 } else {
4987 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4988 sender_proto, target_hw, target_proto);
4989 }
4990
4991 return (result);
4992 }
4993
4994 /*
4995 * Caller must hold ifnet head lock.
4996 */
4997 static int
4998 ifnet_lookup(struct ifnet *ifp)
4999 {
5000 struct ifnet *_ifp;
5001
5002 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
5003 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
5004 if (_ifp == ifp)
5005 break;
5006 }
5007 return (_ifp != NULL);
5008 }
5009
5010 /*
5011 * Caller has to pass a non-zero refio argument to get a
5012 * IO reference count. This will prevent ifnet_detach from
5013 * being called when there are outstanding io reference counts.
5014 */
5015 int
5016 ifnet_is_attached(struct ifnet *ifp, int refio)
5017 {
5018 int ret;
5019
5020 lck_mtx_lock_spin(&ifp->if_ref_lock);
5021 if ((ret = IF_FULLY_ATTACHED(ifp))) {
5022 if (refio > 0)
5023 ifp->if_refio++;
5024 }
5025 lck_mtx_unlock(&ifp->if_ref_lock);
5026
5027 return (ret);
5028 }
5029
5030 /*
5031 * Caller must ensure the interface is attached; the assumption is that
5032 * there is at least an outstanding IO reference count held already.
5033 * Most callers would call ifnet_is_attached() instead.
5034 */
5035 void
5036 ifnet_incr_iorefcnt(struct ifnet *ifp)
5037 {
5038 lck_mtx_lock_spin(&ifp->if_ref_lock);
5039 VERIFY(IF_FULLY_ATTACHED(ifp));
5040 VERIFY(ifp->if_refio > 0);
5041 ifp->if_refio++;
5042 lck_mtx_unlock(&ifp->if_ref_lock);
5043 }
5044
5045 void
5046 ifnet_decr_iorefcnt(struct ifnet *ifp)
5047 {
5048 lck_mtx_lock_spin(&ifp->if_ref_lock);
5049 VERIFY(ifp->if_refio > 0);
5050 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
5051 ifp->if_refio--;
5052
5053 /*
5054 * if there are no more outstanding io references, wakeup the
5055 * ifnet_detach thread if detaching flag is set.
5056 */
5057 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING))
5058 wakeup(&(ifp->if_refio));
5059
5060 lck_mtx_unlock(&ifp->if_ref_lock);
5061 }
5062
5063 static void
5064 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
5065 {
5066 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
5067 ctrace_t *tr;
5068 u_int32_t idx;
5069 u_int16_t *cnt;
5070
5071 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
5072 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
5073 /* NOTREACHED */
5074 }
5075
5076 if (refhold) {
5077 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
5078 tr = dl_if_dbg->dldbg_if_refhold;
5079 } else {
5080 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
5081 tr = dl_if_dbg->dldbg_if_refrele;
5082 }
5083
5084 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
5085 ctrace_record(&tr[idx]);
5086 }
5087
5088 errno_t
5089 dlil_if_ref(struct ifnet *ifp)
5090 {
5091 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5092
5093 if (dl_if == NULL)
5094 return (EINVAL);
5095
5096 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5097 ++dl_if->dl_if_refcnt;
5098 if (dl_if->dl_if_refcnt == 0) {
5099 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
5100 /* NOTREACHED */
5101 }
5102 if (dl_if->dl_if_trace != NULL)
5103 (*dl_if->dl_if_trace)(dl_if, TRUE);
5104 lck_mtx_unlock(&dl_if->dl_if_lock);
5105
5106 return (0);
5107 }
5108
5109 errno_t
5110 dlil_if_free(struct ifnet *ifp)
5111 {
5112 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5113 bool need_release = FALSE;
5114
5115 if (dl_if == NULL)
5116 return (EINVAL);
5117
5118 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5119 switch (dl_if->dl_if_refcnt) {
5120 case 0:
5121 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
5122 /* NOTREACHED */
5123 break;
5124 case 1:
5125 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
5126 need_release = TRUE;
5127 }
5128 break;
5129 default:
5130 break;
5131 }
5132 --dl_if->dl_if_refcnt;
5133 if (dl_if->dl_if_trace != NULL)
5134 (*dl_if->dl_if_trace)(dl_if, FALSE);
5135 lck_mtx_unlock(&dl_if->dl_if_lock);
5136 if (need_release) {
5137 dlil_if_release(ifp);
5138 }
5139 return (0);
5140 }
5141
5142 static errno_t
5143 dlil_attach_protocol_internal(struct if_proto *proto,
5144 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
5145 uint32_t * proto_count)
5146 {
5147 struct kev_dl_proto_data ev_pr_data;
5148 struct ifnet *ifp = proto->ifp;
5149 int retval = 0;
5150 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
5151 struct if_proto *prev_proto;
5152 struct if_proto *_proto;
5153
5154 /* callee holds a proto refcnt upon success */
5155 ifnet_lock_exclusive(ifp);
5156 _proto = find_attached_proto(ifp, proto->protocol_family);
5157 if (_proto != NULL) {
5158 ifnet_lock_done(ifp);
5159 if_proto_free(_proto);
5160 return (EEXIST);
5161 }
5162
5163 /*
5164 * Call family module add_proto routine so it can refine the
5165 * demux descriptors as it wishes.
5166 */
5167 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
5168 demux_count);
5169 if (retval) {
5170 ifnet_lock_done(ifp);
5171 return (retval);
5172 }
5173
5174 /*
5175 * Insert the protocol in the hash
5176 */
5177 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
5178 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
5179 prev_proto = SLIST_NEXT(prev_proto, next_hash);
5180 if (prev_proto)
5181 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
5182 else
5183 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
5184 proto, next_hash);
5185
5186 /* hold a proto refcnt for attach */
5187 if_proto_ref(proto);
5188
5189 /*
5190 * The reserved field carries the number of protocol still attached
5191 * (subject to change)
5192 */
5193 ev_pr_data.proto_family = proto->protocol_family;
5194 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
5195 ifnet_lock_done(ifp);
5196
5197 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
5198 (struct net_event_data *)&ev_pr_data,
5199 sizeof (struct kev_dl_proto_data));
5200 if (proto_count != NULL) {
5201 *proto_count = ev_pr_data.proto_remaining_count;
5202 }
5203 return (retval);
5204 }
5205
5206 errno_t
5207 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
5208 const struct ifnet_attach_proto_param *proto_details)
5209 {
5210 int retval = 0;
5211 struct if_proto *ifproto = NULL;
5212 uint32_t proto_count = 0;
5213
5214 ifnet_head_lock_shared();
5215 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5216 retval = EINVAL;
5217 goto end;
5218 }
5219 /* Check that the interface is in the global list */
5220 if (!ifnet_lookup(ifp)) {
5221 retval = ENXIO;
5222 goto end;
5223 }
5224
5225 ifproto = zalloc(dlif_proto_zone);
5226 if (ifproto == NULL) {
5227 retval = ENOMEM;
5228 goto end;
5229 }
5230 bzero(ifproto, dlif_proto_size);
5231
5232 /* refcnt held above during lookup */
5233 ifproto->ifp = ifp;
5234 ifproto->protocol_family = protocol;
5235 ifproto->proto_kpi = kProtoKPI_v1;
5236 ifproto->kpi.v1.input = proto_details->input;
5237 ifproto->kpi.v1.pre_output = proto_details->pre_output;
5238 ifproto->kpi.v1.event = proto_details->event;
5239 ifproto->kpi.v1.ioctl = proto_details->ioctl;
5240 ifproto->kpi.v1.detached = proto_details->detached;
5241 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
5242 ifproto->kpi.v1.send_arp = proto_details->send_arp;
5243
5244 retval = dlil_attach_protocol_internal(ifproto,
5245 proto_details->demux_list, proto_details->demux_count,
5246 &proto_count);
5247
5248 end:
5249 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5250 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5251 if_name(ifp), protocol, retval);
5252 } else {
5253 if (dlil_verbose) {
5254 printf("%s: attached v1 protocol %d (count = %d)\n",
5255 if_name(ifp),
5256 protocol, proto_count);
5257 }
5258 }
5259 ifnet_head_done();
5260 if (retval == 0) {
5261 } else if (ifproto != NULL) {
5262 zfree(dlif_proto_zone, ifproto);
5263 }
5264 return (retval);
5265 }
5266
5267 errno_t
5268 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
5269 const struct ifnet_attach_proto_param_v2 *proto_details)
5270 {
5271 int retval = 0;
5272 struct if_proto *ifproto = NULL;
5273 uint32_t proto_count = 0;
5274
5275 ifnet_head_lock_shared();
5276 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5277 retval = EINVAL;
5278 goto end;
5279 }
5280 /* Check that the interface is in the global list */
5281 if (!ifnet_lookup(ifp)) {
5282 retval = ENXIO;
5283 goto end;
5284 }
5285
5286 ifproto = zalloc(dlif_proto_zone);
5287 if (ifproto == NULL) {
5288 retval = ENOMEM;
5289 goto end;
5290 }
5291 bzero(ifproto, sizeof(*ifproto));
5292
5293 /* refcnt held above during lookup */
5294 ifproto->ifp = ifp;
5295 ifproto->protocol_family = protocol;
5296 ifproto->proto_kpi = kProtoKPI_v2;
5297 ifproto->kpi.v2.input = proto_details->input;
5298 ifproto->kpi.v2.pre_output = proto_details->pre_output;
5299 ifproto->kpi.v2.event = proto_details->event;
5300 ifproto->kpi.v2.ioctl = proto_details->ioctl;
5301 ifproto->kpi.v2.detached = proto_details->detached;
5302 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
5303 ifproto->kpi.v2.send_arp = proto_details->send_arp;
5304
5305 retval = dlil_attach_protocol_internal(ifproto,
5306 proto_details->demux_list, proto_details->demux_count,
5307 &proto_count);
5308
5309 end:
5310 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5311 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5312 if_name(ifp), protocol, retval);
5313 } else {
5314 if (dlil_verbose) {
5315 printf("%s: attached v2 protocol %d (count = %d)\n",
5316 if_name(ifp),
5317 protocol, proto_count);
5318 }
5319 }
5320 ifnet_head_done();
5321 if (retval == 0) {
5322 } else if (ifproto != NULL) {
5323 zfree(dlif_proto_zone, ifproto);
5324 }
5325 return (retval);
5326 }
5327
5328 errno_t
5329 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
5330 {
5331 struct if_proto *proto = NULL;
5332 int retval = 0;
5333
5334 if (ifp == NULL || proto_family == 0) {
5335 retval = EINVAL;
5336 goto end;
5337 }
5338
5339 ifnet_lock_exclusive(ifp);
5340 /* callee holds a proto refcnt upon success */
5341 proto = find_attached_proto(ifp, proto_family);
5342 if (proto == NULL) {
5343 retval = ENXIO;
5344 ifnet_lock_done(ifp);
5345 goto end;
5346 }
5347
5348 /* call family module del_proto */
5349 if (ifp->if_del_proto)
5350 ifp->if_del_proto(ifp, proto->protocol_family);
5351
5352 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
5353 proto, if_proto, next_hash);
5354
5355 if (proto->proto_kpi == kProtoKPI_v1) {
5356 proto->kpi.v1.input = ifproto_media_input_v1;
5357 proto->kpi.v1.pre_output = ifproto_media_preout;
5358 proto->kpi.v1.event = ifproto_media_event;
5359 proto->kpi.v1.ioctl = ifproto_media_ioctl;
5360 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
5361 proto->kpi.v1.send_arp = ifproto_media_send_arp;
5362 } else {
5363 proto->kpi.v2.input = ifproto_media_input_v2;
5364 proto->kpi.v2.pre_output = ifproto_media_preout;
5365 proto->kpi.v2.event = ifproto_media_event;
5366 proto->kpi.v2.ioctl = ifproto_media_ioctl;
5367 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
5368 proto->kpi.v2.send_arp = ifproto_media_send_arp;
5369 }
5370 proto->detached = 1;
5371 ifnet_lock_done(ifp);
5372
5373 if (dlil_verbose) {
5374 printf("%s: detached %s protocol %d\n", if_name(ifp),
5375 (proto->proto_kpi == kProtoKPI_v1) ?
5376 "v1" : "v2", proto_family);
5377 }
5378
5379 /* release proto refcnt held during protocol attach */
5380 if_proto_free(proto);
5381
5382 /*
5383 * Release proto refcnt held during lookup; the rest of
5384 * protocol detach steps will happen when the last proto
5385 * reference is released.
5386 */
5387 if_proto_free(proto);
5388
5389 end:
5390 return (retval);
5391 }
5392
5393
5394 static errno_t
5395 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
5396 struct mbuf *packet, char *header)
5397 {
5398 #pragma unused(ifp, protocol, packet, header)
5399 return (ENXIO);
5400 }
5401
5402 static errno_t
5403 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
5404 struct mbuf *packet)
5405 {
5406 #pragma unused(ifp, protocol, packet)
5407 return (ENXIO);
5408
5409 }
5410
5411 static errno_t
5412 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
5413 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
5414 char *link_layer_dest)
5415 {
5416 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5417 return (ENXIO);
5418
5419 }
5420
5421 static void
5422 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
5423 const struct kev_msg *event)
5424 {
5425 #pragma unused(ifp, protocol, event)
5426 }
5427
5428 static errno_t
5429 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
5430 unsigned long command, void *argument)
5431 {
5432 #pragma unused(ifp, protocol, command, argument)
5433 return (ENXIO);
5434 }
5435
5436 static errno_t
5437 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
5438 struct sockaddr_dl *out_ll, size_t ll_len)
5439 {
5440 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5441 return (ENXIO);
5442 }
5443
5444 static errno_t
5445 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
5446 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5447 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5448 {
5449 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5450 return (ENXIO);
5451 }
5452
5453 extern int if_next_index(void);
5454 extern int tcp_ecn_outbound;
5455
5456 errno_t
5457 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
5458 {
5459 struct ifnet *tmp_if;
5460 struct ifaddr *ifa;
5461 struct if_data_internal if_data_saved;
5462 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5463 struct dlil_threading_info *dl_inp;
5464 u_int32_t sflags = 0;
5465 int err;
5466
5467 if (ifp == NULL)
5468 return (EINVAL);
5469
5470 /*
5471 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5472 * prevent the interface from being configured while it is
5473 * embryonic, as ifnet_head_lock is dropped and reacquired
5474 * below prior to marking the ifnet with IFRF_ATTACHED.
5475 */
5476 dlil_if_lock();
5477 ifnet_head_lock_exclusive();
5478 /* Verify we aren't already on the list */
5479 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
5480 if (tmp_if == ifp) {
5481 ifnet_head_done();
5482 dlil_if_unlock();
5483 return (EEXIST);
5484 }
5485 }
5486
5487 lck_mtx_lock_spin(&ifp->if_ref_lock);
5488 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
5489 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
5490 __func__, ifp);
5491 /* NOTREACHED */
5492 }
5493 lck_mtx_unlock(&ifp->if_ref_lock);
5494
5495 ifnet_lock_exclusive(ifp);
5496
5497 /* Sanity check */
5498 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5499 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5500
5501 if (ll_addr != NULL) {
5502 if (ifp->if_addrlen == 0) {
5503 ifp->if_addrlen = ll_addr->sdl_alen;
5504 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
5505 ifnet_lock_done(ifp);
5506 ifnet_head_done();
5507 dlil_if_unlock();
5508 return (EINVAL);
5509 }
5510 }
5511
5512 /*
5513 * Allow interfaces without protocol families to attach
5514 * only if they have the necessary fields filled out.
5515 */
5516 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
5517 DLIL_PRINTF("%s: Attempt to attach interface without "
5518 "family module - %d\n", __func__, ifp->if_family);
5519 ifnet_lock_done(ifp);
5520 ifnet_head_done();
5521 dlil_if_unlock();
5522 return (ENODEV);
5523 }
5524
5525 /* Allocate protocol hash table */
5526 VERIFY(ifp->if_proto_hash == NULL);
5527 ifp->if_proto_hash = zalloc(dlif_phash_zone);
5528 if (ifp->if_proto_hash == NULL) {
5529 ifnet_lock_done(ifp);
5530 ifnet_head_done();
5531 dlil_if_unlock();
5532 return (ENOBUFS);
5533 }
5534 bzero(ifp->if_proto_hash, dlif_phash_size);
5535
5536 lck_mtx_lock_spin(&ifp->if_flt_lock);
5537 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
5538 TAILQ_INIT(&ifp->if_flt_head);
5539 VERIFY(ifp->if_flt_busy == 0);
5540 VERIFY(ifp->if_flt_waiters == 0);
5541 lck_mtx_unlock(&ifp->if_flt_lock);
5542
5543 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
5544 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
5545 LIST_INIT(&ifp->if_multiaddrs);
5546 }
5547
5548 VERIFY(ifp->if_allhostsinm == NULL);
5549 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5550 TAILQ_INIT(&ifp->if_addrhead);
5551
5552 if (ifp->if_index == 0) {
5553 int idx = if_next_index();
5554
5555 if (idx == -1) {
5556 ifp->if_index = 0;
5557 ifnet_lock_done(ifp);
5558 ifnet_head_done();
5559 dlil_if_unlock();
5560 return (ENOBUFS);
5561 }
5562 ifp->if_index = idx;
5563 }
5564 /* There should not be anything occupying this slot */
5565 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5566
5567 /* allocate (if needed) and initialize a link address */
5568 ifa = dlil_alloc_lladdr(ifp, ll_addr);
5569 if (ifa == NULL) {
5570 ifnet_lock_done(ifp);
5571 ifnet_head_done();
5572 dlil_if_unlock();
5573 return (ENOBUFS);
5574 }
5575
5576 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5577 ifnet_addrs[ifp->if_index - 1] = ifa;
5578
5579 /* make this address the first on the list */
5580 IFA_LOCK(ifa);
5581 /* hold a reference for ifnet_addrs[] */
5582 IFA_ADDREF_LOCKED(ifa);
5583 /* if_attach_link_ifa() holds a reference for ifa_link */
5584 if_attach_link_ifa(ifp, ifa);
5585 IFA_UNLOCK(ifa);
5586
5587 #if CONFIG_MACF_NET
5588 mac_ifnet_label_associate(ifp);
5589 #endif
5590
5591 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5592 ifindex2ifnet[ifp->if_index] = ifp;
5593
5594 /* Hold a reference to the underlying dlil_ifnet */
5595 ifnet_reference(ifp);
5596
5597 /* Clear stats (save and restore other fields that we care) */
5598 if_data_saved = ifp->if_data;
5599 bzero(&ifp->if_data, sizeof (ifp->if_data));
5600 ifp->if_data.ifi_type = if_data_saved.ifi_type;
5601 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5602 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5603 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5604 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5605 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5606 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5607 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5608 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5609 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5610 ifnet_touch_lastchange(ifp);
5611
5612 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
5613 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
5614 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
5615
5616 /* By default, use SFB and enable flow advisory */
5617 sflags = PKTSCHEDF_QALG_SFB;
5618 if (if_flowadv)
5619 sflags |= PKTSCHEDF_QALG_FLOWCTL;
5620
5621 if (if_delaybased_queue)
5622 sflags |= PKTSCHEDF_QALG_DELAYBASED;
5623
5624 if (ifp->if_output_sched_model ==
5625 IFNET_SCHED_MODEL_DRIVER_MANAGED)
5626 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
5627
5628 /* Initialize transmit queue(s) */
5629 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
5630 if (err != 0) {
5631 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5632 "err=%d", __func__, ifp, err);
5633 /* NOTREACHED */
5634 }
5635
5636 /* Sanity checks on the input thread storage */
5637 dl_inp = &dl_if->dl_if_inpstorage;
5638 bzero(&dl_inp->stats, sizeof (dl_inp->stats));
5639 VERIFY(dl_inp->input_waiting == 0);
5640 VERIFY(dl_inp->wtot == 0);
5641 VERIFY(dl_inp->ifp == NULL);
5642 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
5643 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
5644 VERIFY(!dl_inp->net_affinity);
5645 VERIFY(ifp->if_inp == NULL);
5646 VERIFY(dl_inp->input_thr == THREAD_NULL);
5647 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
5648 VERIFY(dl_inp->poll_thr == THREAD_NULL);
5649 VERIFY(dl_inp->tag == 0);
5650 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
5651 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
5652 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
5653 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
5654 #if IFNET_INPUT_SANITY_CHK
5655 VERIFY(dl_inp->input_mbuf_cnt == 0);
5656 #endif /* IFNET_INPUT_SANITY_CHK */
5657
5658 /*
5659 * A specific DLIL input thread is created per Ethernet/cellular
5660 * interface or for an interface which supports opportunistic
5661 * input polling. Pseudo interfaces or other types of interfaces
5662 * use the main input thread instead.
5663 */
5664 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
5665 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
5666 ifp->if_inp = dl_inp;
5667 err = dlil_create_input_thread(ifp, ifp->if_inp);
5668 if (err != 0) {
5669 panic_plain("%s: ifp=%p couldn't get an input thread; "
5670 "err=%d", __func__, ifp, err);
5671 /* NOTREACHED */
5672 }
5673 }
5674
5675 if (ifp->if_inp != NULL && ifp->if_inp->input_mit_tcall == NULL) {
5676 ifp->if_inp->input_mit_tcall =
5677 thread_call_allocate_with_priority(dlil_mit_tcall_fn,
5678 ifp, THREAD_CALL_PRIORITY_KERNEL);
5679 }
5680
5681 /*
5682 * If the driver supports the new transmit model, calculate flow hash
5683 * and create a workloop starter thread to invoke the if_start callback
5684 * where the packets may be dequeued and transmitted.
5685 */
5686 if (ifp->if_eflags & IFEF_TXSTART) {
5687 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5688 VERIFY(ifp->if_flowhash != 0);
5689 VERIFY(ifp->if_start_thread == THREAD_NULL);
5690
5691 ifnet_set_start_cycle(ifp, NULL);
5692 ifp->if_start_active = 0;
5693 ifp->if_start_req = 0;
5694 ifp->if_start_flags = 0;
5695 VERIFY(ifp->if_start != NULL);
5696 if ((err = kernel_thread_start(ifnet_start_thread_fn,
5697 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
5698 panic_plain("%s: "
5699 "ifp=%p couldn't get a start thread; "
5700 "err=%d", __func__, ifp, err);
5701 /* NOTREACHED */
5702 }
5703 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
5704 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5705 } else {
5706 ifp->if_flowhash = 0;
5707 }
5708
5709 /*
5710 * If the driver supports the new receive model, create a poller
5711 * thread to invoke if_input_poll callback where the packets may
5712 * be dequeued from the driver and processed for reception.
5713 */
5714 if (ifp->if_eflags & IFEF_RXPOLL) {
5715 VERIFY(ifp->if_input_poll != NULL);
5716 VERIFY(ifp->if_input_ctl != NULL);
5717 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5718
5719 ifnet_set_poll_cycle(ifp, NULL);
5720 ifp->if_poll_update = 0;
5721 ifp->if_poll_active = 0;
5722 ifp->if_poll_req = 0;
5723 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
5724 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5725 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5726 "err=%d", __func__, ifp, err);
5727 /* NOTREACHED */
5728 }
5729 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
5730 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5731 }
5732
5733 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5734 VERIFY(ifp->if_desc.ifd_len == 0);
5735 VERIFY(ifp->if_desc.ifd_desc != NULL);
5736
5737 /* Record attach PC stacktrace */
5738 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5739
5740 ifp->if_updatemcasts = 0;
5741 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5742 struct ifmultiaddr *ifma;
5743 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5744 IFMA_LOCK(ifma);
5745 if (ifma->ifma_addr->sa_family == AF_LINK ||
5746 ifma->ifma_addr->sa_family == AF_UNSPEC)
5747 ifp->if_updatemcasts++;
5748 IFMA_UNLOCK(ifma);
5749 }
5750
5751 printf("%s: attached with %d suspended link-layer multicast "
5752 "membership(s)\n", if_name(ifp),
5753 ifp->if_updatemcasts);
5754 }
5755
5756 /* Clear logging parameters */
5757 bzero(&ifp->if_log, sizeof (ifp->if_log));
5758
5759 /* Clear foreground/realtime activity timestamps */
5760 ifp->if_fg_sendts = 0;
5761 ifp->if_rt_sendts = 0;
5762
5763 VERIFY(ifp->if_delegated.ifp == NULL);
5764 VERIFY(ifp->if_delegated.type == 0);
5765 VERIFY(ifp->if_delegated.family == 0);
5766 VERIFY(ifp->if_delegated.subfamily == 0);
5767 VERIFY(ifp->if_delegated.expensive == 0);
5768
5769 VERIFY(ifp->if_agentids == NULL);
5770 VERIFY(ifp->if_agentcount == 0);
5771
5772 /* Reset interface state */
5773 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5774 ifp->if_interface_state.valid_bitmask |=
5775 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5776 ifp->if_interface_state.interface_availability =
5777 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5778
5779 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5780 if (ifp == lo_ifp) {
5781 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5782 ifp->if_interface_state.valid_bitmask |=
5783 IF_INTERFACE_STATE_LQM_STATE_VALID;
5784 } else {
5785 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5786 }
5787
5788 /*
5789 * Enable ECN capability on this interface depending on the
5790 * value of ECN global setting
5791 */
5792 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
5793 ifp->if_eflags |= IFEF_ECN_ENABLE;
5794 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
5795 }
5796
5797 /*
5798 * Built-in Cyclops always on policy for WiFi infra
5799 */
5800 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5801 errno_t error;
5802
5803 error = if_set_qosmarking_mode(ifp,
5804 IFRTYPE_QOSMARKING_FASTLANE);
5805 if (error != 0) {
5806 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5807 __func__, ifp->if_xname, error);
5808 } else {
5809 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
5810 #if (DEVELOPMENT || DEBUG)
5811 printf("%s fastlane enabled on %s\n",
5812 __func__, ifp->if_xname);
5813 #endif /* (DEVELOPMENT || DEBUG) */
5814 }
5815 }
5816
5817 ifnet_lock_done(ifp);
5818 ifnet_head_done();
5819
5820
5821 lck_mtx_lock(&ifp->if_cached_route_lock);
5822 /* Enable forwarding cached route */
5823 ifp->if_fwd_cacheok = 1;
5824 /* Clean up any existing cached routes */
5825 ROUTE_RELEASE(&ifp->if_fwd_route);
5826 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
5827 ROUTE_RELEASE(&ifp->if_src_route);
5828 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
5829 ROUTE_RELEASE(&ifp->if_src_route6);
5830 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
5831 lck_mtx_unlock(&ifp->if_cached_route_lock);
5832
5833 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5834
5835 /*
5836 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5837 * and trees; do this before the ifnet is marked as attached.
5838 * The ifnet keeps the reference to the info structures even after
5839 * the ifnet is detached, since the network-layer records still
5840 * refer to the info structures even after that. This also
5841 * makes it possible for them to still function after the ifnet
5842 * is recycled or reattached.
5843 */
5844 #if INET
5845 if (IGMP_IFINFO(ifp) == NULL) {
5846 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
5847 VERIFY(IGMP_IFINFO(ifp) != NULL);
5848 } else {
5849 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5850 igmp_domifreattach(IGMP_IFINFO(ifp));
5851 }
5852 #endif /* INET */
5853 #if INET6
5854 if (MLD_IFINFO(ifp) == NULL) {
5855 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
5856 VERIFY(MLD_IFINFO(ifp) != NULL);
5857 } else {
5858 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5859 mld_domifreattach(MLD_IFINFO(ifp));
5860 }
5861 #endif /* INET6 */
5862
5863 VERIFY(ifp->if_data_threshold == 0);
5864 VERIFY(ifp->if_dt_tcall != NULL);
5865
5866 /*
5867 * Finally, mark this ifnet as attached.
5868 */
5869 lck_mtx_lock(rnh_lock);
5870 ifnet_lock_exclusive(ifp);
5871 lck_mtx_lock_spin(&ifp->if_ref_lock);
5872 ifp->if_refflags = IFRF_ATTACHED; /* clears embryonic */
5873 lck_mtx_unlock(&ifp->if_ref_lock);
5874 if (net_rtref) {
5875 /* boot-args override; enable idle notification */
5876 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5877 IFRF_IDLE_NOTIFY);
5878 } else {
5879 /* apply previous request(s) to set the idle flags, if any */
5880 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5881 ifp->if_idle_new_flags_mask);
5882
5883 }
5884 ifnet_lock_done(ifp);
5885 lck_mtx_unlock(rnh_lock);
5886 dlil_if_unlock();
5887
5888 #if PF
5889 /*
5890 * Attach packet filter to this interface, if enabled.
5891 */
5892 pf_ifnet_hook(ifp, 1);
5893 #endif /* PF */
5894
5895 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
5896
5897 if (dlil_verbose) {
5898 printf("%s: attached%s\n", if_name(ifp),
5899 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5900 }
5901
5902 return (0);
5903 }
5904
5905 /*
5906 * Prepare the storage for the first/permanent link address, which must
5907 * must have the same lifetime as the ifnet itself. Although the link
5908 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5909 * its location in memory must never change as it may still be referred
5910 * to by some parts of the system afterwards (unfortunate implementation
5911 * artifacts inherited from BSD.)
5912 *
5913 * Caller must hold ifnet lock as writer.
5914 */
5915 static struct ifaddr *
5916 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
5917 {
5918 struct ifaddr *ifa, *oifa;
5919 struct sockaddr_dl *asdl, *msdl;
5920 char workbuf[IFNAMSIZ*2];
5921 int namelen, masklen, socksize;
5922 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5923
5924 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
5925 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
5926
5927 namelen = snprintf(workbuf, sizeof (workbuf), "%s",
5928 if_name(ifp));
5929 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
5930 + ((namelen > 0) ? namelen : 0);
5931 socksize = masklen + ifp->if_addrlen;
5932 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
5933 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
5934 socksize = sizeof(struct sockaddr_dl);
5935 socksize = ROUNDUP(socksize);
5936 #undef ROUNDUP
5937
5938 ifa = ifp->if_lladdr;
5939 if (socksize > DLIL_SDLMAXLEN ||
5940 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
5941 /*
5942 * Rare, but in the event that the link address requires
5943 * more storage space than DLIL_SDLMAXLEN, allocate the
5944 * largest possible storages for address and mask, such
5945 * that we can reuse the same space when if_addrlen grows.
5946 * This same space will be used when if_addrlen shrinks.
5947 */
5948 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
5949 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
5950 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
5951 if (ifa == NULL)
5952 return (NULL);
5953 ifa_lock_init(ifa);
5954 /* Don't set IFD_ALLOC, as this is permanent */
5955 ifa->ifa_debug = IFD_LINK;
5956 }
5957 IFA_LOCK(ifa);
5958 /* address and mask sockaddr_dl locations */
5959 asdl = (struct sockaddr_dl *)(ifa + 1);
5960 bzero(asdl, SOCK_MAXADDRLEN);
5961 msdl = (struct sockaddr_dl *)(void *)
5962 ((char *)asdl + SOCK_MAXADDRLEN);
5963 bzero(msdl, SOCK_MAXADDRLEN);
5964 } else {
5965 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
5966 /*
5967 * Use the storage areas for address and mask within the
5968 * dlil_ifnet structure. This is the most common case.
5969 */
5970 if (ifa == NULL) {
5971 ifa = &dl_if->dl_if_lladdr.ifa;
5972 ifa_lock_init(ifa);
5973 /* Don't set IFD_ALLOC, as this is permanent */
5974 ifa->ifa_debug = IFD_LINK;
5975 }
5976 IFA_LOCK(ifa);
5977 /* address and mask sockaddr_dl locations */
5978 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
5979 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
5980 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
5981 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
5982 }
5983
5984 /* hold a permanent reference for the ifnet itself */
5985 IFA_ADDREF_LOCKED(ifa);
5986 oifa = ifp->if_lladdr;
5987 ifp->if_lladdr = ifa;
5988
5989 VERIFY(ifa->ifa_debug == IFD_LINK);
5990 ifa->ifa_ifp = ifp;
5991 ifa->ifa_rtrequest = link_rtrequest;
5992 ifa->ifa_addr = (struct sockaddr *)asdl;
5993 asdl->sdl_len = socksize;
5994 asdl->sdl_family = AF_LINK;
5995 if (namelen > 0) {
5996 bcopy(workbuf, asdl->sdl_data, min(namelen,
5997 sizeof (asdl->sdl_data)));
5998 asdl->sdl_nlen = namelen;
5999 } else {
6000 asdl->sdl_nlen = 0;
6001 }
6002 asdl->sdl_index = ifp->if_index;
6003 asdl->sdl_type = ifp->if_type;
6004 if (ll_addr != NULL) {
6005 asdl->sdl_alen = ll_addr->sdl_alen;
6006 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
6007 } else {
6008 asdl->sdl_alen = 0;
6009 }
6010 ifa->ifa_netmask = (struct sockaddr *)msdl;
6011 msdl->sdl_len = masklen;
6012 while (namelen > 0)
6013 msdl->sdl_data[--namelen] = 0xff;
6014 IFA_UNLOCK(ifa);
6015
6016 if (oifa != NULL)
6017 IFA_REMREF(oifa);
6018
6019 return (ifa);
6020 }
6021
6022 static void
6023 if_purgeaddrs(struct ifnet *ifp)
6024 {
6025 #if INET
6026 in_purgeaddrs(ifp);
6027 #endif /* INET */
6028 #if INET6
6029 in6_purgeaddrs(ifp);
6030 #endif /* INET6 */
6031 }
6032
6033 errno_t
6034 ifnet_detach(ifnet_t ifp)
6035 {
6036 struct ifnet *delegated_ifp;
6037 struct nd_ifinfo *ndi = NULL;
6038
6039 if (ifp == NULL)
6040 return (EINVAL);
6041
6042 ndi = ND_IFINFO(ifp);
6043 if (NULL != ndi)
6044 ndi->cga_initialized = FALSE;
6045
6046 lck_mtx_lock(rnh_lock);
6047 ifnet_head_lock_exclusive();
6048 ifnet_lock_exclusive(ifp);
6049
6050 /*
6051 * Check to see if this interface has previously triggered
6052 * aggressive protocol draining; if so, decrement the global
6053 * refcnt and clear PR_AGGDRAIN on the route domain if
6054 * there are no more of such an interface around.
6055 */
6056 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
6057
6058 lck_mtx_lock_spin(&ifp->if_ref_lock);
6059 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
6060 lck_mtx_unlock(&ifp->if_ref_lock);
6061 ifnet_lock_done(ifp);
6062 ifnet_head_done();
6063 lck_mtx_unlock(rnh_lock);
6064 return (EINVAL);
6065 } else if (ifp->if_refflags & IFRF_DETACHING) {
6066 /* Interface has already been detached */
6067 lck_mtx_unlock(&ifp->if_ref_lock);
6068 ifnet_lock_done(ifp);
6069 ifnet_head_done();
6070 lck_mtx_unlock(rnh_lock);
6071 return (ENXIO);
6072 }
6073 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
6074 /* Indicate this interface is being detached */
6075 ifp->if_refflags &= ~IFRF_ATTACHED;
6076 ifp->if_refflags |= IFRF_DETACHING;
6077 lck_mtx_unlock(&ifp->if_ref_lock);
6078
6079 if (dlil_verbose)
6080 printf("%s: detaching\n", if_name(ifp));
6081
6082 /* Reset ECN enable/disable flags */
6083 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
6084 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
6085
6086 /*
6087 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6088 * no longer be visible during lookups from this point.
6089 */
6090 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
6091 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
6092 ifp->if_link.tqe_next = NULL;
6093 ifp->if_link.tqe_prev = NULL;
6094 if (ifp->if_ordered_link.tqe_next != NULL ||
6095 ifp->if_ordered_link.tqe_prev != NULL) {
6096 ifnet_remove_from_ordered_list(ifp);
6097 }
6098 ifindex2ifnet[ifp->if_index] = NULL;
6099
6100 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6101 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
6102
6103 /* Record detach PC stacktrace */
6104 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
6105
6106 /* Clear logging parameters */
6107 bzero(&ifp->if_log, sizeof (ifp->if_log));
6108
6109 /* Clear delegated interface info (reference released below) */
6110 delegated_ifp = ifp->if_delegated.ifp;
6111 bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
6112
6113 /* Reset interface state */
6114 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
6115
6116 ifnet_lock_done(ifp);
6117 ifnet_head_done();
6118 lck_mtx_unlock(rnh_lock);
6119
6120
6121 /* Release reference held on the delegated interface */
6122 if (delegated_ifp != NULL)
6123 ifnet_release(delegated_ifp);
6124
6125 /* Reset Link Quality Metric (unless loopback [lo0]) */
6126 if (ifp != lo_ifp)
6127 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
6128
6129 /* Reset TCP local statistics */
6130 if (ifp->if_tcp_stat != NULL)
6131 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
6132
6133 /* Reset UDP local statistics */
6134 if (ifp->if_udp_stat != NULL)
6135 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
6136
6137 /* Reset ifnet IPv4 stats */
6138 if (ifp->if_ipv4_stat != NULL)
6139 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
6140
6141 /* Reset ifnet IPv6 stats */
6142 if (ifp->if_ipv6_stat != NULL)
6143 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
6144
6145 /* Release memory held for interface link status report */
6146 if (ifp->if_link_status != NULL) {
6147 FREE(ifp->if_link_status, M_TEMP);
6148 ifp->if_link_status = NULL;
6149 }
6150
6151 /* Clear agent IDs */
6152 if (ifp->if_agentids != NULL) {
6153 FREE(ifp->if_agentids, M_NETAGENT);
6154 ifp->if_agentids = NULL;
6155 }
6156 ifp->if_agentcount = 0;
6157
6158
6159 /* Let BPF know we're detaching */
6160 bpfdetach(ifp);
6161
6162 /* Mark the interface as DOWN */
6163 if_down(ifp);
6164
6165 /* Disable forwarding cached route */
6166 lck_mtx_lock(&ifp->if_cached_route_lock);
6167 ifp->if_fwd_cacheok = 0;
6168 lck_mtx_unlock(&ifp->if_cached_route_lock);
6169
6170 /* Disable data threshold and wait for any pending event posting */
6171 ifp->if_data_threshold = 0;
6172 VERIFY(ifp->if_dt_tcall != NULL);
6173 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
6174
6175 /*
6176 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6177 * references to the info structures and leave them attached to
6178 * this ifnet.
6179 */
6180 #if INET
6181 igmp_domifdetach(ifp);
6182 #endif /* INET */
6183 #if INET6
6184 mld_domifdetach(ifp);
6185 #endif /* INET6 */
6186
6187 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
6188
6189 /* Let worker thread take care of the rest, to avoid reentrancy */
6190 dlil_if_lock();
6191 ifnet_detaching_enqueue(ifp);
6192 dlil_if_unlock();
6193
6194 return (0);
6195 }
6196
6197 static void
6198 ifnet_detaching_enqueue(struct ifnet *ifp)
6199 {
6200 dlil_if_lock_assert();
6201
6202 ++ifnet_detaching_cnt;
6203 VERIFY(ifnet_detaching_cnt != 0);
6204 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
6205 wakeup((caddr_t)&ifnet_delayed_run);
6206 }
6207
6208 static struct ifnet *
6209 ifnet_detaching_dequeue(void)
6210 {
6211 struct ifnet *ifp;
6212
6213 dlil_if_lock_assert();
6214
6215 ifp = TAILQ_FIRST(&ifnet_detaching_head);
6216 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
6217 if (ifp != NULL) {
6218 VERIFY(ifnet_detaching_cnt != 0);
6219 --ifnet_detaching_cnt;
6220 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
6221 ifp->if_detaching_link.tqe_next = NULL;
6222 ifp->if_detaching_link.tqe_prev = NULL;
6223 }
6224 return (ifp);
6225 }
6226
6227 static int
6228 ifnet_detacher_thread_cont(int err)
6229 {
6230 #pragma unused(err)
6231 struct ifnet *ifp;
6232
6233 for (;;) {
6234 dlil_if_lock_assert();
6235 while (ifnet_detaching_cnt == 0) {
6236 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6237 (PZERO - 1), "ifnet_detacher_cont", 0,
6238 ifnet_detacher_thread_cont);
6239 /* NOTREACHED */
6240 }
6241
6242 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
6243
6244 /* Take care of detaching ifnet */
6245 ifp = ifnet_detaching_dequeue();
6246 if (ifp != NULL) {
6247 dlil_if_unlock();
6248 ifnet_detach_final(ifp);
6249 dlil_if_lock();
6250 }
6251 }
6252 }
6253
6254 static void
6255 ifnet_detacher_thread_func(void *v, wait_result_t w)
6256 {
6257 #pragma unused(v, w)
6258 dlil_if_lock();
6259 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6260 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
6261 /*
6262 * msleep0() shouldn't have returned as PCATCH was not set;
6263 * therefore assert in this case.
6264 */
6265 dlil_if_unlock();
6266 VERIFY(0);
6267 }
6268
6269 static void
6270 ifnet_detach_final(struct ifnet *ifp)
6271 {
6272 struct ifnet_filter *filter, *filter_next;
6273 struct ifnet_filter_head fhead;
6274 struct dlil_threading_info *inp;
6275 struct ifaddr *ifa;
6276 ifnet_detached_func if_free;
6277 int i;
6278
6279 lck_mtx_lock(&ifp->if_ref_lock);
6280 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6281 panic("%s: flags mismatch (detaching not set) ifp=%p",
6282 __func__, ifp);
6283 /* NOTREACHED */
6284 }
6285
6286 /*
6287 * Wait until the existing IO references get released
6288 * before we proceed with ifnet_detach. This is not a
6289 * common case, so block without using a continuation.
6290 */
6291 while (ifp->if_refio > 0) {
6292 printf("%s: Waiting for IO references on %s interface "
6293 "to be released\n", __func__, if_name(ifp));
6294 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
6295 (PZERO - 1), "ifnet_ioref_wait", NULL);
6296 }
6297 lck_mtx_unlock(&ifp->if_ref_lock);
6298
6299 /* Drain and destroy send queue */
6300 ifclassq_teardown(ifp);
6301
6302 /* Detach interface filters */
6303 lck_mtx_lock(&ifp->if_flt_lock);
6304 if_flt_monitor_enter(ifp);
6305
6306 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6307 fhead = ifp->if_flt_head;
6308 TAILQ_INIT(&ifp->if_flt_head);
6309
6310 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
6311 filter_next = TAILQ_NEXT(filter, filt_next);
6312 lck_mtx_unlock(&ifp->if_flt_lock);
6313
6314 dlil_detach_filter_internal(filter, 1);
6315 lck_mtx_lock(&ifp->if_flt_lock);
6316 }
6317 if_flt_monitor_leave(ifp);
6318 lck_mtx_unlock(&ifp->if_flt_lock);
6319
6320 /* Tell upper layers to drop their network addresses */
6321 if_purgeaddrs(ifp);
6322
6323 ifnet_lock_exclusive(ifp);
6324
6325 /* Uplumb all protocols */
6326 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6327 struct if_proto *proto;
6328
6329 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6330 while (proto != NULL) {
6331 protocol_family_t family = proto->protocol_family;
6332 ifnet_lock_done(ifp);
6333 proto_unplumb(family, ifp);
6334 ifnet_lock_exclusive(ifp);
6335 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6336 }
6337 /* There should not be any protocols left */
6338 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
6339 }
6340 zfree(dlif_phash_zone, ifp->if_proto_hash);
6341 ifp->if_proto_hash = NULL;
6342
6343 /* Detach (permanent) link address from if_addrhead */
6344 ifa = TAILQ_FIRST(&ifp->if_addrhead);
6345 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
6346 IFA_LOCK(ifa);
6347 if_detach_link_ifa(ifp, ifa);
6348 IFA_UNLOCK(ifa);
6349
6350 /* Remove (permanent) link address from ifnet_addrs[] */
6351 IFA_REMREF(ifa);
6352 ifnet_addrs[ifp->if_index - 1] = NULL;
6353
6354 /* This interface should not be on {ifnet_head,detaching} */
6355 VERIFY(ifp->if_link.tqe_next == NULL);
6356 VERIFY(ifp->if_link.tqe_prev == NULL);
6357 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6358 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
6359 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
6360 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
6361
6362 /* The slot should have been emptied */
6363 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6364
6365 /* There should not be any addresses left */
6366 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
6367
6368 /*
6369 * Signal the starter thread to terminate itself.
6370 */
6371 if (ifp->if_start_thread != THREAD_NULL) {
6372 lck_mtx_lock_spin(&ifp->if_start_lock);
6373 ifp->if_start_flags = 0;
6374 ifp->if_start_thread = THREAD_NULL;
6375 wakeup_one((caddr_t)&ifp->if_start_thread);
6376 lck_mtx_unlock(&ifp->if_start_lock);
6377 }
6378
6379 /*
6380 * Signal the poller thread to terminate itself.
6381 */
6382 if (ifp->if_poll_thread != THREAD_NULL) {
6383 lck_mtx_lock_spin(&ifp->if_poll_lock);
6384 ifp->if_poll_thread = THREAD_NULL;
6385 wakeup_one((caddr_t)&ifp->if_poll_thread);
6386 lck_mtx_unlock(&ifp->if_poll_lock);
6387 }
6388
6389 /*
6390 * If thread affinity was set for the workloop thread, we will need
6391 * to tear down the affinity and release the extra reference count
6392 * taken at attach time. Does not apply to lo0 or other interfaces
6393 * without dedicated input threads.
6394 */
6395 if ((inp = ifp->if_inp) != NULL) {
6396 VERIFY(inp != dlil_main_input_thread);
6397
6398 if (inp->net_affinity) {
6399 struct thread *tp, *wtp, *ptp;
6400
6401 lck_mtx_lock_spin(&inp->input_lck);
6402 wtp = inp->wloop_thr;
6403 inp->wloop_thr = THREAD_NULL;
6404 ptp = inp->poll_thr;
6405 inp->poll_thr = THREAD_NULL;
6406 tp = inp->input_thr; /* don't nullify now */
6407 inp->tag = 0;
6408 inp->net_affinity = FALSE;
6409 lck_mtx_unlock(&inp->input_lck);
6410
6411 /* Tear down poll thread affinity */
6412 if (ptp != NULL) {
6413 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
6414 (void) dlil_affinity_set(ptp,
6415 THREAD_AFFINITY_TAG_NULL);
6416 thread_deallocate(ptp);
6417 }
6418
6419 /* Tear down workloop thread affinity */
6420 if (wtp != NULL) {
6421 (void) dlil_affinity_set(wtp,
6422 THREAD_AFFINITY_TAG_NULL);
6423 thread_deallocate(wtp);
6424 }
6425
6426 /* Tear down DLIL input thread affinity */
6427 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
6428 thread_deallocate(tp);
6429 }
6430
6431 /* disassociate ifp DLIL input thread */
6432 ifp->if_inp = NULL;
6433
6434 /* tell the input thread to terminate */
6435 lck_mtx_lock_spin(&inp->input_lck);
6436 inp->input_waiting |= DLIL_INPUT_TERMINATE;
6437 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
6438 wakeup_one((caddr_t)&inp->input_waiting);
6439 }
6440 lck_mtx_unlock(&inp->input_lck);
6441
6442 /* wait for the input thread to terminate */
6443 lck_mtx_lock_spin(&inp->input_lck);
6444 while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE)
6445 == 0) {
6446 (void) msleep(&inp->input_waiting, &inp->input_lck,
6447 (PZERO - 1) | PSPIN, inp->input_name, NULL);
6448 }
6449 lck_mtx_unlock(&inp->input_lck);
6450
6451 /* clean-up input thread state */
6452 dlil_clean_threading_info(inp);
6453
6454 }
6455
6456 /* The driver might unload, so point these to ourselves */
6457 if_free = ifp->if_free;
6458 ifp->if_output_dlil = ifp_if_output;
6459 ifp->if_output = ifp_if_output;
6460 ifp->if_pre_enqueue = ifp_if_output;
6461 ifp->if_start = ifp_if_start;
6462 ifp->if_output_ctl = ifp_if_ctl;
6463 ifp->if_input_dlil = ifp_if_input;
6464 ifp->if_input_poll = ifp_if_input_poll;
6465 ifp->if_input_ctl = ifp_if_ctl;
6466 ifp->if_ioctl = ifp_if_ioctl;
6467 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
6468 ifp->if_free = ifp_if_free;
6469 ifp->if_demux = ifp_if_demux;
6470 ifp->if_event = ifp_if_event;
6471 ifp->if_framer_legacy = ifp_if_framer;
6472 ifp->if_framer = ifp_if_framer_extended;
6473 ifp->if_add_proto = ifp_if_add_proto;
6474 ifp->if_del_proto = ifp_if_del_proto;
6475 ifp->if_check_multi = ifp_if_check_multi;
6476
6477 /* wipe out interface description */
6478 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
6479 ifp->if_desc.ifd_len = 0;
6480 VERIFY(ifp->if_desc.ifd_desc != NULL);
6481 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
6482
6483 /* there shouldn't be any delegation by now */
6484 VERIFY(ifp->if_delegated.ifp == NULL);
6485 VERIFY(ifp->if_delegated.type == 0);
6486 VERIFY(ifp->if_delegated.family == 0);
6487 VERIFY(ifp->if_delegated.subfamily == 0);
6488 VERIFY(ifp->if_delegated.expensive == 0);
6489
6490 /* QoS marking get cleared */
6491 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
6492 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
6493
6494
6495 ifnet_lock_done(ifp);
6496
6497 #if PF
6498 /*
6499 * Detach this interface from packet filter, if enabled.
6500 */
6501 pf_ifnet_hook(ifp, 0);
6502 #endif /* PF */
6503
6504 /* Filter list should be empty */
6505 lck_mtx_lock_spin(&ifp->if_flt_lock);
6506 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6507 VERIFY(ifp->if_flt_busy == 0);
6508 VERIFY(ifp->if_flt_waiters == 0);
6509 lck_mtx_unlock(&ifp->if_flt_lock);
6510
6511 /* Last chance to drain send queue */
6512 if_qflush(ifp, 0);
6513
6514 /* Last chance to cleanup any cached route */
6515 lck_mtx_lock(&ifp->if_cached_route_lock);
6516 VERIFY(!ifp->if_fwd_cacheok);
6517 ROUTE_RELEASE(&ifp->if_fwd_route);
6518 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
6519 ROUTE_RELEASE(&ifp->if_src_route);
6520 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
6521 ROUTE_RELEASE(&ifp->if_src_route6);
6522 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
6523 lck_mtx_unlock(&ifp->if_cached_route_lock);
6524
6525 VERIFY(ifp->if_data_threshold == 0);
6526 VERIFY(ifp->if_dt_tcall != NULL);
6527 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
6528
6529 ifnet_llreach_ifdetach(ifp);
6530
6531 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
6532
6533 /*
6534 * Finally, mark this ifnet as detached.
6535 */
6536 lck_mtx_lock_spin(&ifp->if_ref_lock);
6537 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6538 panic("%s: flags mismatch (detaching not set) ifp=%p",
6539 __func__, ifp);
6540 /* NOTREACHED */
6541 }
6542 ifp->if_refflags &= ~IFRF_DETACHING;
6543 lck_mtx_unlock(&ifp->if_ref_lock);
6544 if (if_free != NULL)
6545 if_free(ifp);
6546
6547 if (dlil_verbose)
6548 printf("%s: detached\n", if_name(ifp));
6549
6550 /* Release reference held during ifnet attach */
6551 ifnet_release(ifp);
6552 }
6553
6554 errno_t
6555 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6556 {
6557 #pragma unused(ifp)
6558 m_freem_list(m);
6559 return (0);
6560 }
6561
6562 void
6563 ifp_if_start(struct ifnet *ifp)
6564 {
6565 ifnet_purge(ifp);
6566 }
6567
6568 static errno_t
6569 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6570 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6571 boolean_t poll, struct thread *tp)
6572 {
6573 #pragma unused(ifp, m_tail, s, poll, tp)
6574 m_freem_list(m_head);
6575 return (ENXIO);
6576 }
6577
6578 static void
6579 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6580 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6581 {
6582 #pragma unused(ifp, flags, max_cnt)
6583 if (m_head != NULL)
6584 *m_head = NULL;
6585 if (m_tail != NULL)
6586 *m_tail = NULL;
6587 if (cnt != NULL)
6588 *cnt = 0;
6589 if (len != NULL)
6590 *len = 0;
6591 }
6592
6593 static errno_t
6594 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6595 {
6596 #pragma unused(ifp, cmd, arglen, arg)
6597 return (EOPNOTSUPP);
6598 }
6599
6600 static errno_t
6601 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6602 {
6603 #pragma unused(ifp, fh, pf)
6604 m_freem(m);
6605 return (EJUSTRETURN);
6606 }
6607
6608 static errno_t
6609 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6610 const struct ifnet_demux_desc *da, u_int32_t dc)
6611 {
6612 #pragma unused(ifp, pf, da, dc)
6613 return (EINVAL);
6614 }
6615
6616 static errno_t
6617 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6618 {
6619 #pragma unused(ifp, pf)
6620 return (EINVAL);
6621 }
6622
6623 static errno_t
6624 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6625 {
6626 #pragma unused(ifp, sa)
6627 return (EOPNOTSUPP);
6628 }
6629
6630 #if CONFIG_EMBEDDED
6631 static errno_t
6632 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6633 const struct sockaddr *sa, const char *ll, const char *t,
6634 u_int32_t *pre, u_int32_t *post)
6635 #else
6636 static errno_t
6637 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6638 const struct sockaddr *sa, const char *ll, const char *t)
6639 #endif /* !CONFIG_EMBEDDED */
6640 {
6641 #pragma unused(ifp, m, sa, ll, t)
6642 #if CONFIG_EMBEDDED
6643 return (ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post));
6644 #else
6645 return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
6646 #endif /* !CONFIG_EMBEDDED */
6647 }
6648
6649 static errno_t
6650 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6651 const struct sockaddr *sa, const char *ll, const char *t,
6652 u_int32_t *pre, u_int32_t *post)
6653 {
6654 #pragma unused(ifp, sa, ll, t)
6655 m_freem(*m);
6656 *m = NULL;
6657
6658 if (pre != NULL)
6659 *pre = 0;
6660 if (post != NULL)
6661 *post = 0;
6662
6663 return (EJUSTRETURN);
6664 }
6665
6666 errno_t
6667 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6668 {
6669 #pragma unused(ifp, cmd, arg)
6670 return (EOPNOTSUPP);
6671 }
6672
6673 static errno_t
6674 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6675 {
6676 #pragma unused(ifp, tm, f)
6677 /* XXX not sure what to do here */
6678 return (0);
6679 }
6680
6681 static void
6682 ifp_if_free(struct ifnet *ifp)
6683 {
6684 #pragma unused(ifp)
6685 }
6686
6687 static void
6688 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6689 {
6690 #pragma unused(ifp, e)
6691 }
6692
6693 __private_extern__
6694 int dlil_if_acquire(u_int32_t family, const void *uniqueid,
6695 size_t uniqueid_len, struct ifnet **ifp)
6696 {
6697 struct ifnet *ifp1 = NULL;
6698 struct dlil_ifnet *dlifp1 = NULL;
6699 void *buf, *base, **pbuf;
6700 int ret = 0;
6701
6702 dlil_if_lock();
6703 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
6704 ifp1 = (struct ifnet *)dlifp1;
6705
6706 if (ifp1->if_family != family)
6707 continue;
6708
6709 lck_mtx_lock(&dlifp1->dl_if_lock);
6710 /* same uniqueid and same len or no unique id specified */
6711 if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) &&
6712 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
6713 /* check for matching interface in use */
6714 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6715 if (uniqueid_len) {
6716 ret = EBUSY;
6717 lck_mtx_unlock(&dlifp1->dl_if_lock);
6718 goto end;
6719 }
6720 } else {
6721 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
6722 lck_mtx_unlock(&dlifp1->dl_if_lock);
6723 *ifp = ifp1;
6724 goto end;
6725 }
6726 }
6727 lck_mtx_unlock(&dlifp1->dl_if_lock);
6728 }
6729
6730 /* no interface found, allocate a new one */
6731 buf = zalloc(dlif_zone);
6732 if (buf == NULL) {
6733 ret = ENOMEM;
6734 goto end;
6735 }
6736 bzero(buf, dlif_bufsize);
6737
6738 /* Get the 64-bit aligned base address for this object */
6739 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
6740 sizeof (u_int64_t));
6741 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
6742
6743 /*
6744 * Wind back a pointer size from the aligned base and
6745 * save the original address so we can free it later.
6746 */
6747 pbuf = (void **)((intptr_t)base - sizeof (void *));
6748 *pbuf = buf;
6749 dlifp1 = base;
6750
6751 if (uniqueid_len) {
6752 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
6753 M_NKE, M_WAITOK);
6754 if (dlifp1->dl_if_uniqueid == NULL) {
6755 zfree(dlif_zone, buf);
6756 ret = ENOMEM;
6757 goto end;
6758 }
6759 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
6760 dlifp1->dl_if_uniqueid_len = uniqueid_len;
6761 }
6762
6763 ifp1 = (struct ifnet *)dlifp1;
6764 dlifp1->dl_if_flags = DLIF_INUSE;
6765 if (ifnet_debug) {
6766 dlifp1->dl_if_flags |= DLIF_DEBUG;
6767 dlifp1->dl_if_trace = dlil_if_trace;
6768 }
6769 ifp1->if_name = dlifp1->dl_if_namestorage;
6770 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
6771
6772 /* initialize interface description */
6773 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
6774 ifp1->if_desc.ifd_len = 0;
6775 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
6776
6777
6778 #if CONFIG_MACF_NET
6779 mac_ifnet_label_init(ifp1);
6780 #endif
6781
6782 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
6783 DLIL_PRINTF("%s: failed to allocate if local stats, "
6784 "error: %d\n", __func__, ret);
6785 /* This probably shouldn't be fatal */
6786 ret = 0;
6787 }
6788
6789 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
6790 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
6791 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
6792 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6793 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
6794 ifnet_lock_attr);
6795 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
6796 #if INET
6797 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
6798 ifnet_lock_attr);
6799 ifp1->if_inetdata = NULL;
6800 #endif
6801 #if INET6
6802 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
6803 ifnet_lock_attr);
6804 ifp1->if_inet6data = NULL;
6805 #endif
6806 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
6807 ifnet_lock_attr);
6808 ifp1->if_link_status = NULL;
6809
6810 /* for send data paths */
6811 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
6812 ifnet_lock_attr);
6813 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
6814 ifnet_lock_attr);
6815 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
6816 ifnet_lock_attr);
6817
6818 /* for receive data paths */
6819 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
6820 ifnet_lock_attr);
6821
6822 /* thread call allocation is done with sleeping zalloc */
6823 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
6824 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
6825 if (ifp1->if_dt_tcall == NULL) {
6826 panic_plain("%s: couldn't create if_dt_tcall", __func__);
6827 /* NOTREACHED */
6828 }
6829
6830 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
6831
6832 *ifp = ifp1;
6833
6834 end:
6835 dlil_if_unlock();
6836
6837 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
6838 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
6839
6840 return (ret);
6841 }
6842
6843 __private_extern__ void
6844 dlil_if_release(ifnet_t ifp)
6845 {
6846 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
6847
6848 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
6849 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
6850 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
6851 }
6852
6853 ifnet_lock_exclusive(ifp);
6854 lck_mtx_lock(&dlifp->dl_if_lock);
6855 dlifp->dl_if_flags &= ~DLIF_INUSE;
6856 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6857 ifp->if_name = dlifp->dl_if_namestorage;
6858 /* Reset external name (name + unit) */
6859 ifp->if_xname = dlifp->dl_if_xnamestorage;
6860 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
6861 "%s?", ifp->if_name);
6862 lck_mtx_unlock(&dlifp->dl_if_lock);
6863 #if CONFIG_MACF_NET
6864 /*
6865 * We can either recycle the MAC label here or in dlil_if_acquire().
6866 * It seems logical to do it here but this means that anything that
6867 * still has a handle on ifp will now see it as unlabeled.
6868 * Since the interface is "dead" that may be OK. Revisit later.
6869 */
6870 mac_ifnet_label_recycle(ifp);
6871 #endif
6872 ifnet_lock_done(ifp);
6873 }
6874
6875 __private_extern__ void
6876 dlil_if_lock(void)
6877 {
6878 lck_mtx_lock(&dlil_ifnet_lock);
6879 }
6880
6881 __private_extern__ void
6882 dlil_if_unlock(void)
6883 {
6884 lck_mtx_unlock(&dlil_ifnet_lock);
6885 }
6886
6887 __private_extern__ void
6888 dlil_if_lock_assert(void)
6889 {
6890 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
6891 }
6892
6893 __private_extern__ void
6894 dlil_proto_unplumb_all(struct ifnet *ifp)
6895 {
6896 /*
6897 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6898 * each bucket contains exactly one entry; PF_VLAN does not need an
6899 * explicit unplumb.
6900 *
6901 * if_proto_hash[3] is for other protocols; we expect anything
6902 * in this bucket to respond to the DETACHING event (which would
6903 * have happened by now) and do the unplumb then.
6904 */
6905 (void) proto_unplumb(PF_INET, ifp);
6906 #if INET6
6907 (void) proto_unplumb(PF_INET6, ifp);
6908 #endif /* INET6 */
6909 }
6910
6911 static void
6912 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6913 {
6914 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6915 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6916
6917 route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
6918
6919 lck_mtx_unlock(&ifp->if_cached_route_lock);
6920 }
6921
6922 static void
6923 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6924 {
6925 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6926 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6927
6928 if (ifp->if_fwd_cacheok) {
6929 route_copyin(src, &ifp->if_src_route, sizeof (*src));
6930 } else {
6931 ROUTE_RELEASE(src);
6932 }
6933 lck_mtx_unlock(&ifp->if_cached_route_lock);
6934 }
6935
6936 #if INET6
6937 static void
6938 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6939 {
6940 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6941 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6942
6943 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6944 sizeof (*dst));
6945
6946 lck_mtx_unlock(&ifp->if_cached_route_lock);
6947 }
6948
6949 static void
6950 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6951 {
6952 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6953 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6954
6955 if (ifp->if_fwd_cacheok) {
6956 route_copyin((struct route *)src,
6957 (struct route *)&ifp->if_src_route6, sizeof (*src));
6958 } else {
6959 ROUTE_RELEASE(src);
6960 }
6961 lck_mtx_unlock(&ifp->if_cached_route_lock);
6962 }
6963 #endif /* INET6 */
6964
6965 struct rtentry *
6966 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6967 {
6968 struct route src_rt;
6969 struct sockaddr_in *dst;
6970
6971 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
6972
6973 ifp_src_route_copyout(ifp, &src_rt);
6974
6975 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6976 ROUTE_RELEASE(&src_rt);
6977 if (dst->sin_family != AF_INET) {
6978 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
6979 dst->sin_len = sizeof (src_rt.ro_dst);
6980 dst->sin_family = AF_INET;
6981 }
6982 dst->sin_addr = src_ip;
6983
6984 VERIFY(src_rt.ro_rt == NULL);
6985 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
6986 0, 0, ifp->if_index);
6987
6988 if (src_rt.ro_rt != NULL) {
6989 /* retain a ref, copyin consumes one */
6990 struct rtentry *rte = src_rt.ro_rt;
6991 RT_ADDREF(rte);
6992 ifp_src_route_copyin(ifp, &src_rt);
6993 src_rt.ro_rt = rte;
6994 }
6995 }
6996
6997 return (src_rt.ro_rt);
6998 }
6999
7000 #if INET6
7001 struct rtentry *
7002 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
7003 {
7004 struct route_in6 src_rt;
7005
7006 ifp_src_route6_copyout(ifp, &src_rt);
7007
7008 if (ROUTE_UNUSABLE(&src_rt) ||
7009 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
7010 ROUTE_RELEASE(&src_rt);
7011 if (src_rt.ro_dst.sin6_family != AF_INET6) {
7012 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
7013 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
7014 src_rt.ro_dst.sin6_family = AF_INET6;
7015 }
7016 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
7017 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
7018 sizeof (src_rt.ro_dst.sin6_addr));
7019
7020 if (src_rt.ro_rt == NULL) {
7021 src_rt.ro_rt = rtalloc1_scoped(
7022 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
7023 ifp->if_index);
7024
7025 if (src_rt.ro_rt != NULL) {
7026 /* retain a ref, copyin consumes one */
7027 struct rtentry *rte = src_rt.ro_rt;
7028 RT_ADDREF(rte);
7029 ifp_src_route6_copyin(ifp, &src_rt);
7030 src_rt.ro_rt = rte;
7031 }
7032 }
7033 }
7034
7035 return (src_rt.ro_rt);
7036 }
7037 #endif /* INET6 */
7038
7039 void
7040 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
7041 {
7042 struct kev_dl_link_quality_metric_data ev_lqm_data;
7043
7044 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
7045
7046 /* Normalize to edge */
7047 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
7048 lqm = IFNET_LQM_THRESH_ABORT;
7049 atomic_bitset_32(&tcbinfo.ipi_flags,
7050 INPCBINFO_HANDLE_LQM_ABORT);
7051 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
7052 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
7053 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
7054 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
7055 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
7056 lqm <= IFNET_LQM_THRESH_POOR) {
7057 lqm = IFNET_LQM_THRESH_POOR;
7058 } else if (lqm > IFNET_LQM_THRESH_POOR &&
7059 lqm <= IFNET_LQM_THRESH_GOOD) {
7060 lqm = IFNET_LQM_THRESH_GOOD;
7061 }
7062
7063 /*
7064 * Take the lock if needed
7065 */
7066 if (!locked)
7067 ifnet_lock_exclusive(ifp);
7068
7069 if (lqm == ifp->if_interface_state.lqm_state &&
7070 (ifp->if_interface_state.valid_bitmask &
7071 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
7072 /*
7073 * Release the lock if was not held by the caller
7074 */
7075 if (!locked)
7076 ifnet_lock_done(ifp);
7077 return; /* nothing to update */
7078 }
7079 ifp->if_interface_state.valid_bitmask |=
7080 IF_INTERFACE_STATE_LQM_STATE_VALID;
7081 ifp->if_interface_state.lqm_state = lqm;
7082
7083 /*
7084 * Don't want to hold the lock when issuing kernel events
7085 */
7086 ifnet_lock_done(ifp);
7087
7088 bzero(&ev_lqm_data, sizeof (ev_lqm_data));
7089 ev_lqm_data.link_quality_metric = lqm;
7090
7091 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
7092 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
7093
7094 /*
7095 * Reacquire the lock for the caller
7096 */
7097 if (locked)
7098 ifnet_lock_exclusive(ifp);
7099 }
7100
7101 static void
7102 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
7103 {
7104 struct kev_dl_rrc_state kev;
7105
7106 if (rrc_state == ifp->if_interface_state.rrc_state &&
7107 (ifp->if_interface_state.valid_bitmask &
7108 IF_INTERFACE_STATE_RRC_STATE_VALID))
7109 return;
7110
7111 ifp->if_interface_state.valid_bitmask |=
7112 IF_INTERFACE_STATE_RRC_STATE_VALID;
7113
7114 ifp->if_interface_state.rrc_state = rrc_state;
7115
7116 /*
7117 * Don't want to hold the lock when issuing kernel events
7118 */
7119 ifnet_lock_done(ifp);
7120
7121 bzero(&kev, sizeof(struct kev_dl_rrc_state));
7122 kev.rrc_state = rrc_state;
7123
7124 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
7125 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
7126
7127 ifnet_lock_exclusive(ifp);
7128 }
7129
7130 errno_t
7131 if_state_update(struct ifnet *ifp,
7132 struct if_interface_state *if_interface_state)
7133 {
7134 u_short if_index_available = 0;
7135
7136 ifnet_lock_exclusive(ifp);
7137
7138 if ((ifp->if_type != IFT_CELLULAR) &&
7139 (if_interface_state->valid_bitmask &
7140 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
7141 ifnet_lock_done(ifp);
7142 return (ENOTSUP);
7143 }
7144 if ((if_interface_state->valid_bitmask &
7145 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
7146 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
7147 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
7148 ifnet_lock_done(ifp);
7149 return (EINVAL);
7150 }
7151 if ((if_interface_state->valid_bitmask &
7152 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
7153 if_interface_state->rrc_state !=
7154 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
7155 if_interface_state->rrc_state !=
7156 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
7157 ifnet_lock_done(ifp);
7158 return (EINVAL);
7159 }
7160
7161 if (if_interface_state->valid_bitmask &
7162 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7163 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
7164 }
7165 if (if_interface_state->valid_bitmask &
7166 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7167 if_rrc_state_update(ifp, if_interface_state->rrc_state);
7168 }
7169 if (if_interface_state->valid_bitmask &
7170 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7171 ifp->if_interface_state.valid_bitmask |=
7172 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7173 ifp->if_interface_state.interface_availability =
7174 if_interface_state->interface_availability;
7175
7176 if (ifp->if_interface_state.interface_availability ==
7177 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
7178 if_index_available = ifp->if_index;
7179 }
7180 }
7181 ifnet_lock_done(ifp);
7182
7183 /*
7184 * Check if the TCP connections going on this interface should be
7185 * forced to send probe packets instead of waiting for TCP timers
7186 * to fire. This will be done when there is an explicit
7187 * notification that the interface became available.
7188 */
7189 if (if_index_available > 0)
7190 tcp_interface_send_probe(if_index_available);
7191
7192 return (0);
7193 }
7194
7195 void
7196 if_get_state(struct ifnet *ifp,
7197 struct if_interface_state *if_interface_state)
7198 {
7199 ifnet_lock_shared(ifp);
7200
7201 if_interface_state->valid_bitmask = 0;
7202
7203 if (ifp->if_interface_state.valid_bitmask &
7204 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7205 if_interface_state->valid_bitmask |=
7206 IF_INTERFACE_STATE_RRC_STATE_VALID;
7207 if_interface_state->rrc_state =
7208 ifp->if_interface_state.rrc_state;
7209 }
7210 if (ifp->if_interface_state.valid_bitmask &
7211 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7212 if_interface_state->valid_bitmask |=
7213 IF_INTERFACE_STATE_LQM_STATE_VALID;
7214 if_interface_state->lqm_state =
7215 ifp->if_interface_state.lqm_state;
7216 }
7217 if (ifp->if_interface_state.valid_bitmask &
7218 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7219 if_interface_state->valid_bitmask |=
7220 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7221 if_interface_state->interface_availability =
7222 ifp->if_interface_state.interface_availability;
7223 }
7224
7225 ifnet_lock_done(ifp);
7226 }
7227
7228 errno_t
7229 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
7230 {
7231 ifnet_lock_exclusive(ifp);
7232 if (conn_probe > 1) {
7233 ifnet_lock_done(ifp);
7234 return (EINVAL);
7235 }
7236 if (conn_probe == 0)
7237 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
7238 else
7239 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
7240 ifnet_lock_done(ifp);
7241
7242 #if NECP
7243 necp_update_all_clients();
7244 #endif /* NECP */
7245
7246 tcp_probe_connectivity(ifp, conn_probe);
7247 return (0);
7248 }
7249
7250 /* for uuid.c */
7251 int
7252 uuid_get_ethernet(u_int8_t *node)
7253 {
7254 struct ifnet *ifp;
7255 struct sockaddr_dl *sdl;
7256
7257 ifnet_head_lock_shared();
7258 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
7259 ifnet_lock_shared(ifp);
7260 IFA_LOCK_SPIN(ifp->if_lladdr);
7261 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
7262 if (sdl->sdl_type == IFT_ETHER) {
7263 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
7264 IFA_UNLOCK(ifp->if_lladdr);
7265 ifnet_lock_done(ifp);
7266 ifnet_head_done();
7267 return (0);
7268 }
7269 IFA_UNLOCK(ifp->if_lladdr);
7270 ifnet_lock_done(ifp);
7271 }
7272 ifnet_head_done();
7273
7274 return (-1);
7275 }
7276
7277 static int
7278 sysctl_rxpoll SYSCTL_HANDLER_ARGS
7279 {
7280 #pragma unused(arg1, arg2)
7281 uint32_t i;
7282 int err;
7283
7284 i = if_rxpoll;
7285
7286 err = sysctl_handle_int(oidp, &i, 0, req);
7287 if (err != 0 || req->newptr == USER_ADDR_NULL)
7288 return (err);
7289
7290 if (net_rxpoll == 0)
7291 return (ENXIO);
7292
7293 if_rxpoll = i;
7294 return (err);
7295 }
7296
7297 static int
7298 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7299 {
7300 #pragma unused(arg1, arg2)
7301 uint64_t q;
7302 int err;
7303
7304 q = if_rxpoll_mode_holdtime;
7305
7306 err = sysctl_handle_quad(oidp, &q, 0, req);
7307 if (err != 0 || req->newptr == USER_ADDR_NULL)
7308 return (err);
7309
7310 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN)
7311 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
7312
7313 if_rxpoll_mode_holdtime = q;
7314
7315 return (err);
7316 }
7317
7318 static int
7319 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7320 {
7321 #pragma unused(arg1, arg2)
7322 uint64_t q;
7323 int err;
7324
7325 q = if_rxpoll_sample_holdtime;
7326
7327 err = sysctl_handle_quad(oidp, &q, 0, req);
7328 if (err != 0 || req->newptr == USER_ADDR_NULL)
7329 return (err);
7330
7331 if (q < IF_RXPOLL_SAMPLETIME_MIN)
7332 q = IF_RXPOLL_SAMPLETIME_MIN;
7333
7334 if_rxpoll_sample_holdtime = q;
7335
7336 return (err);
7337 }
7338
7339 static int
7340 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7341 {
7342 #pragma unused(arg1, arg2)
7343 uint64_t q;
7344 int err;
7345
7346 q = if_rxpoll_interval_time;
7347
7348 err = sysctl_handle_quad(oidp, &q, 0, req);
7349 if (err != 0 || req->newptr == USER_ADDR_NULL)
7350 return (err);
7351
7352 if (q < IF_RXPOLL_INTERVALTIME_MIN)
7353 q = IF_RXPOLL_INTERVALTIME_MIN;
7354
7355 if_rxpoll_interval_time = q;
7356
7357 return (err);
7358 }
7359
7360 static int
7361 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7362 {
7363 #pragma unused(arg1, arg2)
7364 uint32_t i;
7365 int err;
7366
7367 i = if_rxpoll_wlowat;
7368
7369 err = sysctl_handle_int(oidp, &i, 0, req);
7370 if (err != 0 || req->newptr == USER_ADDR_NULL)
7371 return (err);
7372
7373 if (i == 0 || i >= if_rxpoll_whiwat)
7374 return (EINVAL);
7375
7376 if_rxpoll_wlowat = i;
7377 return (err);
7378 }
7379
7380 static int
7381 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7382 {
7383 #pragma unused(arg1, arg2)
7384 uint32_t i;
7385 int err;
7386
7387 i = if_rxpoll_whiwat;
7388
7389 err = sysctl_handle_int(oidp, &i, 0, req);
7390 if (err != 0 || req->newptr == USER_ADDR_NULL)
7391 return (err);
7392
7393 if (i <= if_rxpoll_wlowat)
7394 return (EINVAL);
7395
7396 if_rxpoll_whiwat = i;
7397 return (err);
7398 }
7399
7400 static int
7401 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
7402 {
7403 #pragma unused(arg1, arg2)
7404 int i, err;
7405
7406 i = if_sndq_maxlen;
7407
7408 err = sysctl_handle_int(oidp, &i, 0, req);
7409 if (err != 0 || req->newptr == USER_ADDR_NULL)
7410 return (err);
7411
7412 if (i < IF_SNDQ_MINLEN)
7413 i = IF_SNDQ_MINLEN;
7414
7415 if_sndq_maxlen = i;
7416 return (err);
7417 }
7418
7419 static int
7420 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
7421 {
7422 #pragma unused(arg1, arg2)
7423 int i, err;
7424
7425 i = if_rcvq_maxlen;
7426
7427 err = sysctl_handle_int(oidp, &i, 0, req);
7428 if (err != 0 || req->newptr == USER_ADDR_NULL)
7429 return (err);
7430
7431 if (i < IF_RCVQ_MINLEN)
7432 i = IF_RCVQ_MINLEN;
7433
7434 if_rcvq_maxlen = i;
7435 return (err);
7436 }
7437
7438 void
7439 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
7440 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
7441 {
7442 struct kev_dl_node_presence kev;
7443 struct sockaddr_dl *sdl;
7444 struct sockaddr_in6 *sin6;
7445
7446 VERIFY(ifp);
7447 VERIFY(sa);
7448 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7449
7450 bzero(&kev, sizeof (kev));
7451 sin6 = &kev.sin6_node_address;
7452 sdl = &kev.sdl_node_address;
7453 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7454 kev.rssi = rssi;
7455 kev.link_quality_metric = lqm;
7456 kev.node_proximity_metric = npm;
7457 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
7458
7459 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
7460 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
7461 &kev.link_data, sizeof (kev));
7462 }
7463
7464 void
7465 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
7466 {
7467 struct kev_dl_node_absence kev;
7468 struct sockaddr_in6 *sin6;
7469 struct sockaddr_dl *sdl;
7470
7471 VERIFY(ifp);
7472 VERIFY(sa);
7473 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7474
7475 bzero(&kev, sizeof (kev));
7476 sin6 = &kev.sin6_node_address;
7477 sdl = &kev.sdl_node_address;
7478 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7479
7480 nd6_alt_node_absent(ifp, sin6);
7481 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
7482 &kev.link_data, sizeof (kev));
7483 }
7484
7485 const void *
7486 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
7487 kauth_cred_t *credp)
7488 {
7489 const u_int8_t *bytes;
7490 size_t size;
7491
7492 bytes = CONST_LLADDR(sdl);
7493 size = sdl->sdl_alen;
7494
7495 #if CONFIG_MACF
7496 if (dlil_lladdr_ckreq) {
7497 switch (sdl->sdl_type) {
7498 case IFT_ETHER:
7499 case IFT_IEEE1394:
7500 break;
7501 default:
7502 credp = NULL;
7503 break;
7504 };
7505
7506 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
7507 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
7508 [0] = 2
7509 };
7510
7511 bytes = unspec;
7512 }
7513 }
7514 #else
7515 #pragma unused(credp)
7516 #endif
7517
7518 if (sizep != NULL) *sizep = size;
7519 return (bytes);
7520 }
7521
7522 void
7523 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
7524 u_int8_t info[DLIL_MODARGLEN])
7525 {
7526 struct kev_dl_issues kev;
7527 struct timeval tv;
7528
7529 VERIFY(ifp != NULL);
7530 VERIFY(modid != NULL);
7531 _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
7532 _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
7533
7534 bzero(&kev, sizeof (kev));
7535
7536 microtime(&tv);
7537 kev.timestamp = tv.tv_sec;
7538 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
7539 if (info != NULL)
7540 bcopy(info, &kev.info, DLIL_MODARGLEN);
7541
7542 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
7543 &kev.link_data, sizeof (kev));
7544 }
7545
7546 errno_t
7547 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7548 struct proc *p)
7549 {
7550 u_int32_t level = IFNET_THROTTLE_OFF;
7551 errno_t result = 0;
7552
7553 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
7554
7555 if (cmd == SIOCSIFOPPORTUNISTIC) {
7556 /*
7557 * XXX: Use priv_check_cred() instead of root check?
7558 */
7559 if ((result = proc_suser(p)) != 0)
7560 return (result);
7561
7562 if (ifr->ifr_opportunistic.ifo_flags ==
7563 IFRIFOF_BLOCK_OPPORTUNISTIC)
7564 level = IFNET_THROTTLE_OPPORTUNISTIC;
7565 else if (ifr->ifr_opportunistic.ifo_flags == 0)
7566 level = IFNET_THROTTLE_OFF;
7567 else
7568 result = EINVAL;
7569
7570 if (result == 0)
7571 result = ifnet_set_throttle(ifp, level);
7572 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
7573 ifr->ifr_opportunistic.ifo_flags = 0;
7574 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
7575 ifr->ifr_opportunistic.ifo_flags |=
7576 IFRIFOF_BLOCK_OPPORTUNISTIC;
7577 }
7578 }
7579
7580 /*
7581 * Return the count of current opportunistic connections
7582 * over the interface.
7583 */
7584 if (result == 0) {
7585 uint32_t flags = 0;
7586 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
7587 INPCB_OPPORTUNISTIC_SETCMD : 0;
7588 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
7589 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
7590 ifr->ifr_opportunistic.ifo_inuse =
7591 udp_count_opportunistic(ifp->if_index, flags) +
7592 tcp_count_opportunistic(ifp->if_index, flags);
7593 }
7594
7595 if (result == EALREADY)
7596 result = 0;
7597
7598 return (result);
7599 }
7600
7601 int
7602 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
7603 {
7604 struct ifclassq *ifq;
7605 int err = 0;
7606
7607 if (!(ifp->if_eflags & IFEF_TXSTART))
7608 return (ENXIO);
7609
7610 *level = IFNET_THROTTLE_OFF;
7611
7612 ifq = &ifp->if_snd;
7613 IFCQ_LOCK(ifq);
7614 /* Throttling works only for IFCQ, not ALTQ instances */
7615 if (IFCQ_IS_ENABLED(ifq))
7616 IFCQ_GET_THROTTLE(ifq, *level, err);
7617 IFCQ_UNLOCK(ifq);
7618
7619 return (err);
7620 }
7621
7622 int
7623 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
7624 {
7625 struct ifclassq *ifq;
7626 int err = 0;
7627
7628 if (!(ifp->if_eflags & IFEF_TXSTART))
7629 return (ENXIO);
7630
7631 ifq = &ifp->if_snd;
7632
7633 switch (level) {
7634 case IFNET_THROTTLE_OFF:
7635 case IFNET_THROTTLE_OPPORTUNISTIC:
7636 break;
7637 default:
7638 return (EINVAL);
7639 }
7640
7641 IFCQ_LOCK(ifq);
7642 if (IFCQ_IS_ENABLED(ifq))
7643 IFCQ_SET_THROTTLE(ifq, level, err);
7644 IFCQ_UNLOCK(ifq);
7645
7646 if (err == 0) {
7647 printf("%s: throttling level set to %d\n", if_name(ifp),
7648 level);
7649 if (level == IFNET_THROTTLE_OFF)
7650 ifnet_start(ifp);
7651 }
7652
7653 return (err);
7654 }
7655
7656 errno_t
7657 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7658 struct proc *p)
7659 {
7660 #pragma unused(p)
7661 errno_t result = 0;
7662 uint32_t flags;
7663 int level, category, subcategory;
7664
7665 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7666
7667 if (cmd == SIOCSIFLOG) {
7668 if ((result = priv_check_cred(kauth_cred_get(),
7669 PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
7670 return (result);
7671
7672 level = ifr->ifr_log.ifl_level;
7673 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX)
7674 result = EINVAL;
7675
7676 flags = ifr->ifr_log.ifl_flags;
7677 if ((flags &= IFNET_LOGF_MASK) == 0)
7678 result = EINVAL;
7679
7680 category = ifr->ifr_log.ifl_category;
7681 subcategory = ifr->ifr_log.ifl_subcategory;
7682
7683 if (result == 0)
7684 result = ifnet_set_log(ifp, level, flags,
7685 category, subcategory);
7686 } else {
7687 result = ifnet_get_log(ifp, &level, &flags, &category,
7688 &subcategory);
7689 if (result == 0) {
7690 ifr->ifr_log.ifl_level = level;
7691 ifr->ifr_log.ifl_flags = flags;
7692 ifr->ifr_log.ifl_category = category;
7693 ifr->ifr_log.ifl_subcategory = subcategory;
7694 }
7695 }
7696
7697 return (result);
7698 }
7699
7700 int
7701 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7702 int32_t category, int32_t subcategory)
7703 {
7704 int err = 0;
7705
7706 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7707 VERIFY(flags & IFNET_LOGF_MASK);
7708
7709 /*
7710 * The logging level applies to all facilities; make sure to
7711 * update them all with the most current level.
7712 */
7713 flags |= ifp->if_log.flags;
7714
7715 if (ifp->if_output_ctl != NULL) {
7716 struct ifnet_log_params l;
7717
7718 bzero(&l, sizeof (l));
7719 l.level = level;
7720 l.flags = flags;
7721 l.flags &= ~IFNET_LOGF_DLIL;
7722 l.category = category;
7723 l.subcategory = subcategory;
7724
7725 /* Send this request to lower layers */
7726 if (l.flags != 0) {
7727 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7728 sizeof (l), &l);
7729 }
7730 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7731 /*
7732 * If targeted to the lower layers without an output
7733 * control callback registered on the interface, just
7734 * silently ignore facilities other than ours.
7735 */
7736 flags &= IFNET_LOGF_DLIL;
7737 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL)))
7738 level = 0;
7739 }
7740
7741 if (err == 0) {
7742 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT)
7743 ifp->if_log.flags = 0;
7744 else
7745 ifp->if_log.flags |= flags;
7746
7747 log(LOG_INFO, "%s: logging level set to %d flags=%b "
7748 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
7749 ifp->if_log.level, ifp->if_log.flags,
7750 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
7751 category, subcategory);
7752 }
7753
7754 return (err);
7755 }
7756
7757 int
7758 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7759 int32_t *category, int32_t *subcategory)
7760 {
7761 if (level != NULL)
7762 *level = ifp->if_log.level;
7763 if (flags != NULL)
7764 *flags = ifp->if_log.flags;
7765 if (category != NULL)
7766 *category = ifp->if_log.category;
7767 if (subcategory != NULL)
7768 *subcategory = ifp->if_log.subcategory;
7769
7770 return (0);
7771 }
7772
7773 int
7774 ifnet_notify_address(struct ifnet *ifp, int af)
7775 {
7776 struct ifnet_notify_address_params na;
7777
7778 #if PF
7779 (void) pf_ifaddr_hook(ifp);
7780 #endif /* PF */
7781
7782 if (ifp->if_output_ctl == NULL)
7783 return (EOPNOTSUPP);
7784
7785 bzero(&na, sizeof (na));
7786 na.address_family = af;
7787
7788 return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7789 sizeof (na), &na));
7790 }
7791
7792 errno_t
7793 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7794 {
7795 if (ifp == NULL || flowid == NULL) {
7796 return (EINVAL);
7797 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7798 !IF_FULLY_ATTACHED(ifp)) {
7799 return (ENXIO);
7800 }
7801
7802 *flowid = ifp->if_flowhash;
7803
7804 return (0);
7805 }
7806
7807 errno_t
7808 ifnet_disable_output(struct ifnet *ifp)
7809 {
7810 int err;
7811
7812 if (ifp == NULL) {
7813 return (EINVAL);
7814 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7815 !IF_FULLY_ATTACHED(ifp)) {
7816 return (ENXIO);
7817 }
7818
7819 if ((err = ifnet_fc_add(ifp)) == 0) {
7820 lck_mtx_lock_spin(&ifp->if_start_lock);
7821 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7822 lck_mtx_unlock(&ifp->if_start_lock);
7823 }
7824 return (err);
7825 }
7826
7827 errno_t
7828 ifnet_enable_output(struct ifnet *ifp)
7829 {
7830 if (ifp == NULL) {
7831 return (EINVAL);
7832 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7833 !IF_FULLY_ATTACHED(ifp)) {
7834 return (ENXIO);
7835 }
7836
7837 ifnet_start_common(ifp, 1);
7838 return (0);
7839 }
7840
7841 void
7842 ifnet_flowadv(uint32_t flowhash)
7843 {
7844 struct ifnet_fc_entry *ifce;
7845 struct ifnet *ifp;
7846
7847 ifce = ifnet_fc_get(flowhash);
7848 if (ifce == NULL)
7849 return;
7850
7851 VERIFY(ifce->ifce_ifp != NULL);
7852 ifp = ifce->ifce_ifp;
7853
7854 /* flow hash gets recalculated per attach, so check */
7855 if (ifnet_is_attached(ifp, 1)) {
7856 if (ifp->if_flowhash == flowhash)
7857 (void) ifnet_enable_output(ifp);
7858 ifnet_decr_iorefcnt(ifp);
7859 }
7860 ifnet_fc_entry_free(ifce);
7861 }
7862
7863 /*
7864 * Function to compare ifnet_fc_entries in ifnet flow control tree
7865 */
7866 static inline int
7867 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7868 {
7869 return (fc1->ifce_flowhash - fc2->ifce_flowhash);
7870 }
7871
7872 static int
7873 ifnet_fc_add(struct ifnet *ifp)
7874 {
7875 struct ifnet_fc_entry keyfc, *ifce;
7876 uint32_t flowhash;
7877
7878 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7879 VERIFY(ifp->if_flowhash != 0);
7880 flowhash = ifp->if_flowhash;
7881
7882 bzero(&keyfc, sizeof (keyfc));
7883 keyfc.ifce_flowhash = flowhash;
7884
7885 lck_mtx_lock_spin(&ifnet_fc_lock);
7886 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7887 if (ifce != NULL && ifce->ifce_ifp == ifp) {
7888 /* Entry is already in ifnet_fc_tree, return */
7889 lck_mtx_unlock(&ifnet_fc_lock);
7890 return (0);
7891 }
7892
7893 if (ifce != NULL) {
7894 /*
7895 * There is a different fc entry with the same flow hash
7896 * but different ifp pointer. There can be a collision
7897 * on flow hash but the probability is low. Let's just
7898 * avoid adding a second one when there is a collision.
7899 */
7900 lck_mtx_unlock(&ifnet_fc_lock);
7901 return (EAGAIN);
7902 }
7903
7904 /* become regular mutex */
7905 lck_mtx_convert_spin(&ifnet_fc_lock);
7906
7907 ifce = zalloc_noblock(ifnet_fc_zone);
7908 if (ifce == NULL) {
7909 /* memory allocation failed */
7910 lck_mtx_unlock(&ifnet_fc_lock);
7911 return (ENOMEM);
7912 }
7913 bzero(ifce, ifnet_fc_zone_size);
7914
7915 ifce->ifce_flowhash = flowhash;
7916 ifce->ifce_ifp = ifp;
7917
7918 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7919 lck_mtx_unlock(&ifnet_fc_lock);
7920 return (0);
7921 }
7922
7923 static struct ifnet_fc_entry *
7924 ifnet_fc_get(uint32_t flowhash)
7925 {
7926 struct ifnet_fc_entry keyfc, *ifce;
7927 struct ifnet *ifp;
7928
7929 bzero(&keyfc, sizeof (keyfc));
7930 keyfc.ifce_flowhash = flowhash;
7931
7932 lck_mtx_lock_spin(&ifnet_fc_lock);
7933 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7934 if (ifce == NULL) {
7935 /* Entry is not present in ifnet_fc_tree, return */
7936 lck_mtx_unlock(&ifnet_fc_lock);
7937 return (NULL);
7938 }
7939
7940 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7941
7942 VERIFY(ifce->ifce_ifp != NULL);
7943 ifp = ifce->ifce_ifp;
7944
7945 /* become regular mutex */
7946 lck_mtx_convert_spin(&ifnet_fc_lock);
7947
7948 if (!ifnet_is_attached(ifp, 0)) {
7949 /*
7950 * This ifp is not attached or in the process of being
7951 * detached; just don't process it.
7952 */
7953 ifnet_fc_entry_free(ifce);
7954 ifce = NULL;
7955 }
7956 lck_mtx_unlock(&ifnet_fc_lock);
7957
7958 return (ifce);
7959 }
7960
7961 static void
7962 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7963 {
7964 zfree(ifnet_fc_zone, ifce);
7965 }
7966
7967 static uint32_t
7968 ifnet_calc_flowhash(struct ifnet *ifp)
7969 {
7970 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7971 uint32_t flowhash = 0;
7972
7973 if (ifnet_flowhash_seed == 0)
7974 ifnet_flowhash_seed = RandomULong();
7975
7976 bzero(&fh, sizeof (fh));
7977
7978 (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name);
7979 fh.ifk_unit = ifp->if_unit;
7980 fh.ifk_flags = ifp->if_flags;
7981 fh.ifk_eflags = ifp->if_eflags;
7982 fh.ifk_capabilities = ifp->if_capabilities;
7983 fh.ifk_capenable = ifp->if_capenable;
7984 fh.ifk_output_sched_model = ifp->if_output_sched_model;
7985 fh.ifk_rand1 = RandomULong();
7986 fh.ifk_rand2 = RandomULong();
7987
7988 try_again:
7989 flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed);
7990 if (flowhash == 0) {
7991 /* try to get a non-zero flowhash */
7992 ifnet_flowhash_seed = RandomULong();
7993 goto try_again;
7994 }
7995
7996 return (flowhash);
7997 }
7998
7999 int
8000 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
8001 uint16_t flags, uint8_t *data)
8002 {
8003 #pragma unused(flags)
8004 int error = 0;
8005
8006 switch (family) {
8007 case AF_INET:
8008 if_inetdata_lock_exclusive(ifp);
8009 if (IN_IFEXTRA(ifp) != NULL) {
8010 if (len == 0) {
8011 /* Allow clearing the signature */
8012 IN_IFEXTRA(ifp)->netsig_len = 0;
8013 bzero(IN_IFEXTRA(ifp)->netsig,
8014 sizeof (IN_IFEXTRA(ifp)->netsig));
8015 if_inetdata_lock_done(ifp);
8016 break;
8017 } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) {
8018 error = EINVAL;
8019 if_inetdata_lock_done(ifp);
8020 break;
8021 }
8022 IN_IFEXTRA(ifp)->netsig_len = len;
8023 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
8024 } else {
8025 error = ENOMEM;
8026 }
8027 if_inetdata_lock_done(ifp);
8028 break;
8029
8030 case AF_INET6:
8031 if_inet6data_lock_exclusive(ifp);
8032 if (IN6_IFEXTRA(ifp) != NULL) {
8033 if (len == 0) {
8034 /* Allow clearing the signature */
8035 IN6_IFEXTRA(ifp)->netsig_len = 0;
8036 bzero(IN6_IFEXTRA(ifp)->netsig,
8037 sizeof (IN6_IFEXTRA(ifp)->netsig));
8038 if_inet6data_lock_done(ifp);
8039 break;
8040 } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) {
8041 error = EINVAL;
8042 if_inet6data_lock_done(ifp);
8043 break;
8044 }
8045 IN6_IFEXTRA(ifp)->netsig_len = len;
8046 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
8047 } else {
8048 error = ENOMEM;
8049 }
8050 if_inet6data_lock_done(ifp);
8051 break;
8052
8053 default:
8054 error = EINVAL;
8055 break;
8056 }
8057
8058 return (error);
8059 }
8060
8061 int
8062 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
8063 uint16_t *flags, uint8_t *data)
8064 {
8065 int error = 0;
8066
8067 if (ifp == NULL || len == NULL || data == NULL)
8068 return (EINVAL);
8069
8070 switch (family) {
8071 case AF_INET:
8072 if_inetdata_lock_shared(ifp);
8073 if (IN_IFEXTRA(ifp) != NULL) {
8074 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
8075 error = EINVAL;
8076 if_inetdata_lock_done(ifp);
8077 break;
8078 }
8079 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0)
8080 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
8081 else
8082 error = ENOENT;
8083 } else {
8084 error = ENOMEM;
8085 }
8086 if_inetdata_lock_done(ifp);
8087 break;
8088
8089 case AF_INET6:
8090 if_inet6data_lock_shared(ifp);
8091 if (IN6_IFEXTRA(ifp) != NULL) {
8092 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
8093 error = EINVAL;
8094 if_inet6data_lock_done(ifp);
8095 break;
8096 }
8097 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0)
8098 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
8099 else
8100 error = ENOENT;
8101 } else {
8102 error = ENOMEM;
8103 }
8104 if_inet6data_lock_done(ifp);
8105 break;
8106
8107 default:
8108 error = EINVAL;
8109 break;
8110 }
8111
8112 if (error == 0 && flags != NULL)
8113 *flags = 0;
8114
8115 return (error);
8116 }
8117
8118 #if INET6
8119 int
8120 ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8121 {
8122 int i, error = 0, one_set = 0;
8123
8124 if_inet6data_lock_exclusive(ifp);
8125
8126 if (IN6_IFEXTRA(ifp) == NULL) {
8127 error = ENOMEM;
8128 goto out;
8129 }
8130
8131 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8132 uint32_t prefix_len =
8133 prefixes[i].prefix_len;
8134 struct in6_addr *prefix =
8135 &prefixes[i].ipv6_prefix;
8136
8137 if (prefix_len == 0) {
8138 /* Allow clearing the signature */
8139 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
8140 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8141 sizeof(struct in6_addr));
8142
8143 continue;
8144 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
8145 prefix_len != NAT64_PREFIX_LEN_40 &&
8146 prefix_len != NAT64_PREFIX_LEN_48 &&
8147 prefix_len != NAT64_PREFIX_LEN_56 &&
8148 prefix_len != NAT64_PREFIX_LEN_64 &&
8149 prefix_len != NAT64_PREFIX_LEN_96) {
8150 error = EINVAL;
8151 goto out;
8152 }
8153
8154 if (IN6_IS_SCOPE_EMBED(prefix)) {
8155 error = EINVAL;
8156 goto out;
8157 }
8158
8159 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
8160 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8161 sizeof(struct in6_addr));
8162 one_set = 1;
8163 }
8164
8165 out:
8166 if_inet6data_lock_done(ifp);
8167
8168 if (error == 0 && one_set != 0)
8169 necp_update_all_clients();
8170
8171 return (error);
8172 }
8173
8174 int
8175 ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8176 {
8177 int i, found_one = 0, error = 0;
8178
8179 if (ifp == NULL)
8180 return (EINVAL);
8181
8182 if_inet6data_lock_shared(ifp);
8183
8184 if (IN6_IFEXTRA(ifp) == NULL) {
8185 error = ENOMEM;
8186 goto out;
8187 }
8188
8189 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8190 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0)
8191 found_one = 1;
8192 }
8193
8194 if (found_one == 0) {
8195 error = ENOENT;
8196 goto out;
8197 }
8198
8199 if (prefixes)
8200 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
8201 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
8202
8203 out:
8204 if_inet6data_lock_done(ifp);
8205
8206 return (error);
8207 }
8208 #endif
8209
8210 static void
8211 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
8212 protocol_family_t pf)
8213 {
8214 #pragma unused(ifp)
8215 uint32_t did_sw;
8216
8217 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
8218 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6)))
8219 return;
8220
8221 switch (pf) {
8222 case PF_INET:
8223 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
8224 if (did_sw & CSUM_DELAY_IP)
8225 hwcksum_dbg_finalized_hdr++;
8226 if (did_sw & CSUM_DELAY_DATA)
8227 hwcksum_dbg_finalized_data++;
8228 break;
8229 #if INET6
8230 case PF_INET6:
8231 /*
8232 * Checksum offload should not have been enabled when
8233 * extension headers exist; that also means that we
8234 * cannot force-finalize packets with extension headers.
8235 * Indicate to the callee should it skip such case by
8236 * setting optlen to -1.
8237 */
8238 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
8239 m->m_pkthdr.csum_flags);
8240 if (did_sw & CSUM_DELAY_IPV6_DATA)
8241 hwcksum_dbg_finalized_data++;
8242 break;
8243 #endif /* INET6 */
8244 default:
8245 return;
8246 }
8247 }
8248
8249 static void
8250 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
8251 protocol_family_t pf)
8252 {
8253 uint16_t sum = 0;
8254 uint32_t hlen;
8255
8256 if (frame_header == NULL ||
8257 frame_header < (char *)mbuf_datastart(m) ||
8258 frame_header > (char *)m->m_data) {
8259 printf("%s: frame header pointer 0x%llx out of range "
8260 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
8261 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
8262 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
8263 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
8264 (uint64_t)VM_KERNEL_ADDRPERM(m));
8265 return;
8266 }
8267 hlen = (m->m_data - frame_header);
8268
8269 switch (pf) {
8270 case PF_INET:
8271 #if INET6
8272 case PF_INET6:
8273 #endif /* INET6 */
8274 break;
8275 default:
8276 return;
8277 }
8278
8279 /*
8280 * Force partial checksum offload; useful to simulate cases
8281 * where the hardware does not support partial checksum offload,
8282 * in order to validate correctness throughout the layers above.
8283 */
8284 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
8285 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
8286
8287 if (foff > (uint32_t)m->m_pkthdr.len)
8288 return;
8289
8290 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
8291
8292 /* Compute 16-bit 1's complement sum from forced offset */
8293 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
8294
8295 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
8296 m->m_pkthdr.csum_rx_val = sum;
8297 m->m_pkthdr.csum_rx_start = (foff + hlen);
8298
8299 hwcksum_dbg_partial_forced++;
8300 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
8301 }
8302
8303 /*
8304 * Partial checksum offload verification (and adjustment);
8305 * useful to validate and test cases where the hardware
8306 * supports partial checksum offload.
8307 */
8308 if ((m->m_pkthdr.csum_flags &
8309 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
8310 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
8311 uint32_t rxoff;
8312
8313 /* Start offset must begin after frame header */
8314 rxoff = m->m_pkthdr.csum_rx_start;
8315 if (hlen > rxoff) {
8316 hwcksum_dbg_bad_rxoff++;
8317 if (dlil_verbose) {
8318 printf("%s: partial cksum start offset %d "
8319 "is less than frame header length %d for "
8320 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
8321 (uint64_t)VM_KERNEL_ADDRPERM(m));
8322 }
8323 return;
8324 }
8325 rxoff -= hlen;
8326
8327 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
8328 /*
8329 * Compute the expected 16-bit 1's complement sum;
8330 * skip this if we've already computed it above
8331 * when partial checksum offload is forced.
8332 */
8333 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
8334
8335 /* Hardware or driver is buggy */
8336 if (sum != m->m_pkthdr.csum_rx_val) {
8337 hwcksum_dbg_bad_cksum++;
8338 if (dlil_verbose) {
8339 printf("%s: bad partial cksum value "
8340 "0x%x (expected 0x%x) for mbuf "
8341 "0x%llx [rx_start %d]\n",
8342 if_name(ifp),
8343 m->m_pkthdr.csum_rx_val, sum,
8344 (uint64_t)VM_KERNEL_ADDRPERM(m),
8345 m->m_pkthdr.csum_rx_start);
8346 }
8347 return;
8348 }
8349 }
8350 hwcksum_dbg_verified++;
8351
8352 /*
8353 * This code allows us to emulate various hardwares that
8354 * perform 16-bit 1's complement sum beginning at various
8355 * start offset values.
8356 */
8357 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
8358 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
8359
8360 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len)
8361 return;
8362
8363 sum = m_adj_sum16(m, rxoff, aoff,
8364 m_pktlen(m) - aoff, sum);
8365
8366 m->m_pkthdr.csum_rx_val = sum;
8367 m->m_pkthdr.csum_rx_start = (aoff + hlen);
8368
8369 hwcksum_dbg_adjusted++;
8370 }
8371 }
8372 }
8373
8374 static int
8375 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8376 {
8377 #pragma unused(arg1, arg2)
8378 u_int32_t i;
8379 int err;
8380
8381 i = hwcksum_dbg_mode;
8382
8383 err = sysctl_handle_int(oidp, &i, 0, req);
8384 if (err != 0 || req->newptr == USER_ADDR_NULL)
8385 return (err);
8386
8387 if (hwcksum_dbg == 0)
8388 return (ENODEV);
8389
8390 if ((i & ~HWCKSUM_DBG_MASK) != 0)
8391 return (EINVAL);
8392
8393 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
8394
8395 return (err);
8396 }
8397
8398 static int
8399 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8400 {
8401 #pragma unused(arg1, arg2)
8402 u_int32_t i;
8403 int err;
8404
8405 i = hwcksum_dbg_partial_rxoff_forced;
8406
8407 err = sysctl_handle_int(oidp, &i, 0, req);
8408 if (err != 0 || req->newptr == USER_ADDR_NULL)
8409 return (err);
8410
8411 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED))
8412 return (ENODEV);
8413
8414 hwcksum_dbg_partial_rxoff_forced = i;
8415
8416 return (err);
8417 }
8418
8419 static int
8420 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8421 {
8422 #pragma unused(arg1, arg2)
8423 u_int32_t i;
8424 int err;
8425
8426 i = hwcksum_dbg_partial_rxoff_adj;
8427
8428 err = sysctl_handle_int(oidp, &i, 0, req);
8429 if (err != 0 || req->newptr == USER_ADDR_NULL)
8430 return (err);
8431
8432 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ))
8433 return (ENODEV);
8434
8435 hwcksum_dbg_partial_rxoff_adj = i;
8436
8437 return (err);
8438 }
8439
8440 static int
8441 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8442 {
8443 #pragma unused(oidp, arg1, arg2)
8444 int err;
8445
8446 if (req->oldptr == USER_ADDR_NULL) {
8447
8448 }
8449 if (req->newptr != USER_ADDR_NULL) {
8450 return (EPERM);
8451 }
8452 err = SYSCTL_OUT(req, &tx_chain_len_stats,
8453 sizeof(struct chain_len_stats));
8454
8455 return (err);
8456 }
8457
8458
8459 #if DEBUG || DEVELOPMENT
8460 /* Blob for sum16 verification */
8461 static uint8_t sumdata[] = {
8462 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8463 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8464 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8465 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8466 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8467 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8468 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8469 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8470 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8471 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8472 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8473 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8474 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8475 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8476 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8477 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8478 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8479 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8480 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8481 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8482 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8483 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8484 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8485 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8486 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8487 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8488 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8489 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8490 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8491 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8492 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8493 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8494 0xc8, 0x28, 0x02, 0x00, 0x00
8495 };
8496
8497 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
8498 static struct {
8499 boolean_t init;
8500 uint16_t len;
8501 uint16_t sumr; /* reference */
8502 uint16_t sumrp; /* reference, precomputed */
8503 } sumtbl[] = {
8504 { FALSE, 0, 0, 0x0000 },
8505 { FALSE, 1, 0, 0x001f },
8506 { FALSE, 2, 0, 0x8b1f },
8507 { FALSE, 3, 0, 0x8b27 },
8508 { FALSE, 7, 0, 0x790e },
8509 { FALSE, 11, 0, 0xcb6d },
8510 { FALSE, 20, 0, 0x20dd },
8511 { FALSE, 27, 0, 0xbabd },
8512 { FALSE, 32, 0, 0xf3e8 },
8513 { FALSE, 37, 0, 0x197d },
8514 { FALSE, 43, 0, 0x9eae },
8515 { FALSE, 64, 0, 0x4678 },
8516 { FALSE, 127, 0, 0x9399 },
8517 { FALSE, 256, 0, 0xd147 },
8518 { FALSE, 325, 0, 0x0358 },
8519 };
8520 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8521
8522 static void
8523 dlil_verify_sum16(void)
8524 {
8525 struct mbuf *m;
8526 uint8_t *buf;
8527 int n;
8528
8529 /* Make sure test data plus extra room for alignment fits in cluster */
8530 _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES);
8531
8532 kprintf("DLIL: running SUM16 self-tests ... ");
8533
8534 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
8535 MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */
8536 buf = mtod(m, uint8_t *); /* base address */
8537
8538 for (n = 0; n < SUMTBL_MAX; n++) {
8539 uint16_t len = sumtbl[n].len;
8540 int i;
8541
8542 /* Verify for all possible alignments */
8543 for (i = 0; i < (int)sizeof (uint64_t); i++) {
8544 uint16_t sum, sumr;
8545 uint8_t *c;
8546
8547 /* Copy over test data to mbuf */
8548 VERIFY(len <= sizeof (sumdata));
8549 c = buf + i;
8550 bcopy(sumdata, c, len);
8551
8552 /* Zero-offset test (align by data pointer) */
8553 m->m_data = (caddr_t)c;
8554 m->m_len = len;
8555 sum = m_sum16(m, 0, len);
8556
8557 if (!sumtbl[n].init) {
8558 sumr = in_cksum_mbuf_ref(m, len, 0, 0);
8559 sumtbl[n].sumr = sumr;
8560 sumtbl[n].init = TRUE;
8561 } else {
8562 sumr = sumtbl[n].sumr;
8563 }
8564
8565 /* Something is horribly broken; stop now */
8566 if (sumr != sumtbl[n].sumrp) {
8567 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
8568 "for len=%d align=%d sum=0x%04x "
8569 "[expected=0x%04x]\n", __func__,
8570 len, i, sum, sumr);
8571 /* NOTREACHED */
8572 } else if (sum != sumr) {
8573 panic_plain("\n%s: broken m_sum16() for len=%d "
8574 "align=%d sum=0x%04x [expected=0x%04x]\n",
8575 __func__, len, i, sum, sumr);
8576 /* NOTREACHED */
8577 }
8578
8579 /* Alignment test by offset (fixed data pointer) */
8580 m->m_data = (caddr_t)buf;
8581 m->m_len = i + len;
8582 sum = m_sum16(m, i, len);
8583
8584 /* Something is horribly broken; stop now */
8585 if (sum != sumr) {
8586 panic_plain("\n%s: broken m_sum16() for len=%d "
8587 "offset=%d sum=0x%04x [expected=0x%04x]\n",
8588 __func__, len, i, sum, sumr);
8589 /* NOTREACHED */
8590 }
8591 #if INET
8592 /* Simple sum16 contiguous buffer test by aligment */
8593 sum = b_sum16(c, len);
8594
8595 /* Something is horribly broken; stop now */
8596 if (sum != sumr) {
8597 panic_plain("\n%s: broken b_sum16() for len=%d "
8598 "align=%d sum=0x%04x [expected=0x%04x]\n",
8599 __func__, len, i, sum, sumr);
8600 /* NOTREACHED */
8601 }
8602 #endif /* INET */
8603 }
8604 }
8605 m_freem(m);
8606
8607 kprintf("PASSED\n");
8608 }
8609 #endif /* DEBUG || DEVELOPMENT */
8610
8611 #define CASE_STRINGIFY(x) case x: return #x
8612
8613 __private_extern__ const char *
8614 dlil_kev_dl_code_str(u_int32_t event_code)
8615 {
8616 switch (event_code) {
8617 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
8618 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
8619 CASE_STRINGIFY(KEV_DL_SIFMTU);
8620 CASE_STRINGIFY(KEV_DL_SIFPHYS);
8621 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
8622 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
8623 CASE_STRINGIFY(KEV_DL_ADDMULTI);
8624 CASE_STRINGIFY(KEV_DL_DELMULTI);
8625 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
8626 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
8627 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
8628 CASE_STRINGIFY(KEV_DL_LINK_OFF);
8629 CASE_STRINGIFY(KEV_DL_LINK_ON);
8630 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
8631 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
8632 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
8633 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
8634 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
8635 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
8636 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
8637 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
8638 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
8639 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
8640 CASE_STRINGIFY(KEV_DL_ISSUES);
8641 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
8642 default:
8643 break;
8644 }
8645 return ("");
8646 }
8647
8648 /*
8649 * Mirror the arguments of ifnet_get_local_ports_extended()
8650 * ifindex
8651 * protocol
8652 * flags
8653 */
8654 static int
8655 sysctl_get_ports_used SYSCTL_HANDLER_ARGS
8656 {
8657 #pragma unused(oidp)
8658 int *name = (int *)arg1;
8659 int namelen = arg2;
8660 int error = 0;
8661 int idx;
8662 protocol_family_t protocol;
8663 u_int32_t flags;
8664 ifnet_t ifp = NULL;
8665 u_int8_t *bitfield = NULL;
8666
8667 if (req->newptr != USER_ADDR_NULL) {
8668 error = EPERM;
8669 goto done;
8670 }
8671 if (namelen != 3) {
8672 error = ENOENT;
8673 goto done;
8674 }
8675
8676 if (req->oldptr == USER_ADDR_NULL) {
8677 req->oldidx = bitstr_size(65536);
8678 goto done;
8679 }
8680 if (req->oldlen < bitstr_size(65536)) {
8681 error = ENOMEM;
8682 goto done;
8683 }
8684
8685 idx = name[0];
8686 protocol = name[1];
8687 flags = name[2];
8688
8689 ifnet_head_lock_shared();
8690 if (!IF_INDEX_IN_RANGE(idx)) {
8691 ifnet_head_done();
8692 error = ENOENT;
8693 goto done;
8694 }
8695 ifp = ifindex2ifnet[idx];
8696 ifnet_head_done();
8697
8698 bitfield = _MALLOC(bitstr_size(65536), M_TEMP, M_WAITOK | M_ZERO);
8699 if (bitfield == NULL) {
8700 error = ENOMEM;
8701 goto done;
8702 }
8703 error = ifnet_get_local_ports_extended(ifp, protocol, flags, bitfield);
8704 if (error != 0) {
8705 printf("%s: ifnet_get_local_ports_extended() error %d\n",
8706 __func__, error);
8707 goto done;
8708 }
8709 error = SYSCTL_OUT(req, bitfield, bitstr_size(65536));
8710 done:
8711 if (bitfield != NULL)
8712 _FREE(bitfield, M_TEMP);
8713 return (error);
8714 }
8715
8716 static void
8717 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
8718 {
8719 #pragma unused(arg1)
8720 struct ifnet *ifp = arg0;
8721
8722 if (ifnet_is_attached(ifp, 1)) {
8723 nstat_ifnet_threshold_reached(ifp->if_index);
8724 ifnet_decr_iorefcnt(ifp);
8725 }
8726 }
8727
8728 void
8729 ifnet_notify_data_threshold(struct ifnet *ifp)
8730 {
8731 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
8732 uint64_t oldbytes = ifp->if_dt_bytes;
8733
8734 ASSERT(ifp->if_dt_tcall != NULL);
8735
8736 /*
8737 * If we went over the threshold, notify NetworkStatistics.
8738 * We rate-limit it based on the threshold interval value.
8739 */
8740 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
8741 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
8742 !thread_call_isactive(ifp->if_dt_tcall)) {
8743 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
8744 uint64_t now = mach_absolute_time(), deadline = now;
8745 uint64_t ival;
8746
8747 if (tival != 0) {
8748 nanoseconds_to_absolutetime(tival, &ival);
8749 clock_deadline_for_periodic_event(ival, now, &deadline);
8750 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
8751 deadline);
8752 } else {
8753 (void) thread_call_enter(ifp->if_dt_tcall);
8754 }
8755 }
8756 }
8757
8758 #if (DEVELOPMENT || DEBUG)
8759 /*
8760 * The sysctl variable name contains the input parameters of
8761 * ifnet_get_keepalive_offload_frames()
8762 * ifp (interface index): name[0]
8763 * frames_array_count: name[1]
8764 * frame_data_offset: name[2]
8765 * The return length gives used_frames_count
8766 */
8767 static int
8768 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8769 {
8770 #pragma unused(oidp)
8771 int *name = (int *)arg1;
8772 u_int namelen = arg2;
8773 int idx;
8774 ifnet_t ifp = NULL;
8775 u_int32_t frames_array_count;
8776 size_t frame_data_offset;
8777 u_int32_t used_frames_count;
8778 struct ifnet_keepalive_offload_frame *frames_array = NULL;
8779 int error = 0;
8780 u_int32_t i;
8781
8782 /*
8783 * Only root can get look at other people TCP frames
8784 */
8785 error = proc_suser(current_proc());
8786 if (error != 0)
8787 goto done;
8788 /*
8789 * Validate the input parameters
8790 */
8791 if (req->newptr != USER_ADDR_NULL) {
8792 error = EPERM;
8793 goto done;
8794 }
8795 if (namelen != 3) {
8796 error = EINVAL;
8797 goto done;
8798 }
8799 if (req->oldptr == USER_ADDR_NULL) {
8800 error = EINVAL;
8801 goto done;
8802 }
8803 if (req->oldlen == 0) {
8804 error = EINVAL;
8805 goto done;
8806 }
8807 idx = name[0];
8808 frames_array_count = name[1];
8809 frame_data_offset = name[2];
8810
8811 /* Make sure the passed buffer is large enough */
8812 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
8813 req->oldlen) {
8814 error = ENOMEM;
8815 goto done;
8816 }
8817
8818 ifnet_head_lock_shared();
8819 if (!IF_INDEX_IN_RANGE(idx)) {
8820 ifnet_head_done();
8821 error = ENOENT;
8822 goto done;
8823 }
8824 ifp = ifindex2ifnet[idx];
8825 ifnet_head_done();
8826
8827 frames_array = _MALLOC(frames_array_count *
8828 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
8829 if (frames_array == NULL) {
8830 error = ENOMEM;
8831 goto done;
8832 }
8833
8834 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
8835 frames_array_count, frame_data_offset, &used_frames_count);
8836 if (error != 0) {
8837 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8838 __func__, error);
8839 goto done;
8840 }
8841
8842 for (i = 0; i < used_frames_count; i++) {
8843 error = SYSCTL_OUT(req, frames_array + i,
8844 sizeof(struct ifnet_keepalive_offload_frame));
8845 if (error != 0) {
8846 goto done;
8847 }
8848 }
8849 done:
8850 if (frames_array != NULL)
8851 _FREE(frames_array, M_TEMP);
8852 return (error);
8853 }
8854 #endif /* DEVELOPMENT || DEBUG */
8855
8856 void
8857 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
8858 struct ifnet *ifp)
8859 {
8860 tcp_update_stats_per_flow(ifs, ifp);
8861 }
8862
8863 static void
8864 dlil_mit_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
8865 {
8866 #pragma unused(arg1)
8867 struct ifnet *ifp = (struct ifnet *)arg0;
8868 struct dlil_threading_info *inp = ifp->if_inp;
8869
8870 ifnet_lock_shared(ifp);
8871 if (!IF_FULLY_ATTACHED(ifp) || inp == NULL) {
8872 ifnet_lock_done(ifp);
8873 return;
8874 }
8875
8876 lck_mtx_lock_spin(&inp->input_lck);
8877 inp->input_waiting |= DLIL_INPUT_WAITING;
8878 if (!(inp->input_waiting & DLIL_INPUT_RUNNING) ||
8879 !qempty(&inp->rcvq_pkts)) {
8880 inp->wtot++;
8881 wakeup_one((caddr_t)&inp->input_waiting);
8882 }
8883 lck_mtx_unlock(&inp->input_lck);
8884 ifnet_lock_done(ifp);
8885 }