]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/dlil.c
xnu-4570.71.2.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
1 /*
2 * Copyright (c) 1999-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
43 #include <sys/user.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
47 #include <net/if.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
50 #include <net/dlil.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
59 #include <sys/priv.h>
60
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
67
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_ipsec.h>
71 #include <net/if_llreach.h>
72 #include <net/if_utun.h>
73 #include <net/kpi_interfacefilter.h>
74 #include <net/classq/classq.h>
75 #include <net/classq/classq_sfb.h>
76 #include <net/flowhash.h>
77 #include <net/ntstat.h>
78 #include <net/if_llatbl.h>
79 #include <net/net_api_stats.h>
80 #include <net/if_ports_used.h>
81
82 #if INET
83 #include <netinet/in_var.h>
84 #include <netinet/igmp_var.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/tcp.h>
87 #include <netinet/tcp_var.h>
88 #include <netinet/udp.h>
89 #include <netinet/udp_var.h>
90 #include <netinet/if_ether.h>
91 #include <netinet/in_pcb.h>
92 #include <netinet/in_tclass.h>
93 #endif /* INET */
94
95 #if INET6
96 #include <netinet6/in6_var.h>
97 #include <netinet6/nd6.h>
98 #include <netinet6/mld6_var.h>
99 #include <netinet6/scope6_var.h>
100 #endif /* INET6 */
101
102 #include <libkern/OSAtomic.h>
103 #include <libkern/tree.h>
104
105 #include <dev/random/randomdev.h>
106 #include <machine/machine_routines.h>
107
108 #include <mach/thread_act.h>
109 #include <mach/sdt.h>
110
111 #if CONFIG_MACF
112 #include <sys/kauth.h>
113 #include <security/mac_framework.h>
114 #include <net/ethernet.h>
115 #include <net/firewire.h>
116 #endif
117
118 #if PF
119 #include <net/pfvar.h>
120 #endif /* PF */
121 #include <net/pktsched/pktsched.h>
122
123 #if NECP
124 #include <net/necp.h>
125 #endif /* NECP */
126
127
128 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
129 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
130 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
131 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
132 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
133
134 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
135 #define MAX_LINKADDR 4 /* LONGWORDS */
136 #define M_NKE M_IFADDR
137
138 #if 1
139 #define DLIL_PRINTF printf
140 #else
141 #define DLIL_PRINTF kprintf
142 #endif
143
144 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
145 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
146
147 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
148 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
149
150 enum {
151 kProtoKPI_v1 = 1,
152 kProtoKPI_v2 = 2
153 };
154
155 /*
156 * List of if_proto structures in if_proto_hash[] is protected by
157 * the ifnet lock. The rest of the fields are initialized at protocol
158 * attach time and never change, thus no lock required as long as
159 * a reference to it is valid, via if_proto_ref().
160 */
161 struct if_proto {
162 SLIST_ENTRY(if_proto) next_hash;
163 u_int32_t refcount;
164 u_int32_t detached;
165 struct ifnet *ifp;
166 protocol_family_t protocol_family;
167 int proto_kpi;
168 union {
169 struct {
170 proto_media_input input;
171 proto_media_preout pre_output;
172 proto_media_event event;
173 proto_media_ioctl ioctl;
174 proto_media_detached detached;
175 proto_media_resolve_multi resolve_multi;
176 proto_media_send_arp send_arp;
177 } v1;
178 struct {
179 proto_media_input_v2 input;
180 proto_media_preout pre_output;
181 proto_media_event event;
182 proto_media_ioctl ioctl;
183 proto_media_detached detached;
184 proto_media_resolve_multi resolve_multi;
185 proto_media_send_arp send_arp;
186 } v2;
187 } kpi;
188 };
189
190 SLIST_HEAD(proto_hash_entry, if_proto);
191
192 #define DLIL_SDLDATALEN \
193 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
194
195 struct dlil_ifnet {
196 struct ifnet dl_if; /* public ifnet */
197 /*
198 * DLIL private fields, protected by dl_if_lock
199 */
200 decl_lck_mtx_data(, dl_if_lock);
201 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
202 u_int32_t dl_if_flags; /* flags (below) */
203 u_int32_t dl_if_refcnt; /* refcnt */
204 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
205 void *dl_if_uniqueid; /* unique interface id */
206 size_t dl_if_uniqueid_len; /* length of the unique id */
207 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
208 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
209 struct {
210 struct ifaddr ifa; /* lladdr ifa */
211 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
212 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
213 } dl_if_lladdr;
214 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
215 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
216 ctrace_t dl_if_attach; /* attach PC stacktrace */
217 ctrace_t dl_if_detach; /* detach PC stacktrace */
218 };
219
220 /* Values for dl_if_flags (private to DLIL) */
221 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
222 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
223 #define DLIF_DEBUG 0x4 /* has debugging info */
224
225 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
226
227 /* For gdb */
228 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
229
230 struct dlil_ifnet_dbg {
231 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
232 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
233 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
234 /*
235 * Circular lists of ifnet_{reference,release} callers.
236 */
237 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
238 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
239 };
240
241 #define DLIL_TO_IFP(s) (&s->dl_if)
242 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
243
244 struct ifnet_filter {
245 TAILQ_ENTRY(ifnet_filter) filt_next;
246 u_int32_t filt_skip;
247 u_int32_t filt_flags;
248 ifnet_t filt_ifp;
249 const char *filt_name;
250 void *filt_cookie;
251 protocol_family_t filt_protocol;
252 iff_input_func filt_input;
253 iff_output_func filt_output;
254 iff_event_func filt_event;
255 iff_ioctl_func filt_ioctl;
256 iff_detached_func filt_detached;
257 };
258
259 struct proto_input_entry;
260
261 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
262 static lck_grp_t *dlil_lock_group;
263 lck_grp_t *ifnet_lock_group;
264 static lck_grp_t *ifnet_head_lock_group;
265 static lck_grp_t *ifnet_snd_lock_group;
266 static lck_grp_t *ifnet_rcv_lock_group;
267 lck_attr_t *ifnet_lock_attr;
268 decl_lck_rw_data(static, ifnet_head_lock);
269 decl_lck_mtx_data(static, dlil_ifnet_lock);
270 u_int32_t dlil_filter_disable_tso_count = 0;
271
272 #if DEBUG
273 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
274 #else
275 static unsigned int ifnet_debug; /* debugging (disabled) */
276 #endif /* !DEBUG */
277 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
278 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
279 static struct zone *dlif_zone; /* zone for dlil_ifnet */
280
281 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
282 #define DLIF_ZONE_NAME "ifnet" /* zone name */
283
284 static unsigned int dlif_filt_size; /* size of ifnet_filter */
285 static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
286
287 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
288 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
289
290 static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
291 static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
292
293 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
294 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
295
296 static unsigned int dlif_proto_size; /* size of if_proto */
297 static struct zone *dlif_proto_zone; /* zone for if_proto */
298
299 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
300 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
301
302 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
303 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
304 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
305
306 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
307 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
308
309 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
310 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
311 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
312
313 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
314 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
315
316 static u_int32_t net_rtref;
317
318 static struct dlil_main_threading_info dlil_main_input_thread_info;
319 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
320 (struct dlil_threading_info *)&dlil_main_input_thread_info;
321
322 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
323 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
324 static void dlil_if_trace(struct dlil_ifnet *, int);
325 static void if_proto_ref(struct if_proto *);
326 static void if_proto_free(struct if_proto *);
327 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
328 static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
329 u_int32_t list_count);
330 static void if_flt_monitor_busy(struct ifnet *);
331 static void if_flt_monitor_unbusy(struct ifnet *);
332 static void if_flt_monitor_enter(struct ifnet *);
333 static void if_flt_monitor_leave(struct ifnet *);
334 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
335 char **, protocol_family_t);
336 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
337 protocol_family_t);
338 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
339 const struct sockaddr_dl *);
340 static int ifnet_lookup(struct ifnet *);
341 static void if_purgeaddrs(struct ifnet *);
342
343 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
344 struct mbuf *, char *);
345 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
346 struct mbuf *);
347 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
348 mbuf_t *, const struct sockaddr *, void *, char *, char *);
349 static void ifproto_media_event(struct ifnet *, protocol_family_t,
350 const struct kev_msg *);
351 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
352 unsigned long, void *);
353 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
354 struct sockaddr_dl *, size_t);
355 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
356 const struct sockaddr_dl *, const struct sockaddr *,
357 const struct sockaddr_dl *, const struct sockaddr *);
358
359 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
360 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
361 boolean_t poll, struct thread *tp);
362 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
363 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
364 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
365 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
366 protocol_family_t *);
367 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
368 const struct ifnet_demux_desc *, u_int32_t);
369 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
370 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
371 #if CONFIG_EMBEDDED
372 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
373 const struct sockaddr *, const char *, const char *,
374 u_int32_t *, u_int32_t *);
375 #else
376 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
377 const struct sockaddr *, const char *, const char *);
378 #endif /* CONFIG_EMBEDDED */
379 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
380 const struct sockaddr *, const char *, const char *,
381 u_int32_t *, u_int32_t *);
382 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
383 static void ifp_if_free(struct ifnet *);
384 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
385 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
386 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
387
388 static void dlil_main_input_thread_func(void *, wait_result_t);
389 static void dlil_input_thread_func(void *, wait_result_t);
390 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
391 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
392 static void dlil_terminate_input_thread(struct dlil_threading_info *);
393 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
394 struct dlil_threading_info *, boolean_t);
395 static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
396 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
397 u_int32_t, ifnet_model_t, boolean_t);
398 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
399 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
400
401 #if DEBUG || DEVELOPMENT
402 static void dlil_verify_sum16(void);
403 #endif /* DEBUG || DEVELOPMENT */
404 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
405 protocol_family_t);
406 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
407 protocol_family_t);
408
409 static void ifnet_detacher_thread_func(void *, wait_result_t);
410 static int ifnet_detacher_thread_cont(int);
411 static void ifnet_detach_final(struct ifnet *);
412 static void ifnet_detaching_enqueue(struct ifnet *);
413 static struct ifnet *ifnet_detaching_dequeue(void);
414
415 static void ifnet_start_thread_fn(void *, wait_result_t);
416 static void ifnet_poll_thread_fn(void *, wait_result_t);
417 static void ifnet_poll(struct ifnet *);
418 static errno_t ifnet_enqueue_common(struct ifnet *, void *,
419 classq_pkt_type_t, boolean_t, boolean_t *);
420
421 static void ifp_src_route_copyout(struct ifnet *, struct route *);
422 static void ifp_src_route_copyin(struct ifnet *, struct route *);
423 #if INET6
424 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
425 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
426 #endif /* INET6 */
427
428 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
429 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
430 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
431 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
432 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
433 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
434 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
435 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
436 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
437 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
438 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
439
440 struct chain_len_stats tx_chain_len_stats;
441 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
442
443 #if TEST_INPUT_THREAD_TERMINATION
444 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
445 #endif /* TEST_INPUT_THREAD_TERMINATION */
446
447 /* The following are protected by dlil_ifnet_lock */
448 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
449 static u_int32_t ifnet_detaching_cnt;
450 static void *ifnet_delayed_run; /* wait channel for detaching thread */
451
452 decl_lck_mtx_data(static, ifnet_fc_lock);
453
454 static uint32_t ifnet_flowhash_seed;
455
456 struct ifnet_flowhash_key {
457 char ifk_name[IFNAMSIZ];
458 uint32_t ifk_unit;
459 uint32_t ifk_flags;
460 uint32_t ifk_eflags;
461 uint32_t ifk_capabilities;
462 uint32_t ifk_capenable;
463 uint32_t ifk_output_sched_model;
464 uint32_t ifk_rand1;
465 uint32_t ifk_rand2;
466 };
467
468 /* Flow control entry per interface */
469 struct ifnet_fc_entry {
470 RB_ENTRY(ifnet_fc_entry) ifce_entry;
471 u_int32_t ifce_flowhash;
472 struct ifnet *ifce_ifp;
473 };
474
475 static uint32_t ifnet_calc_flowhash(struct ifnet *);
476 static int ifce_cmp(const struct ifnet_fc_entry *,
477 const struct ifnet_fc_entry *);
478 static int ifnet_fc_add(struct ifnet *);
479 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
480 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
481
482 /* protected by ifnet_fc_lock */
483 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
484 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
485 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
486
487 static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
488 static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
489
490 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
491 #define IFNET_FC_ZONE_MAX 32
492
493 extern void bpfdetach(struct ifnet *);
494 extern void proto_input_run(void);
495
496 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
497 u_int32_t flags);
498 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
499 u_int32_t flags);
500
501 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
502
503 #if CONFIG_MACF
504 #ifdef CONFIG_EMBEDDED
505 int dlil_lladdr_ckreq = 1;
506 #else
507 int dlil_lladdr_ckreq = 0;
508 #endif
509 #endif
510
511 #if DEBUG
512 int dlil_verbose = 1;
513 #else
514 int dlil_verbose = 0;
515 #endif /* DEBUG */
516 #if IFNET_INPUT_SANITY_CHK
517 /* sanity checking of input packet lists received */
518 static u_int32_t dlil_input_sanity_check = 0;
519 #endif /* IFNET_INPUT_SANITY_CHK */
520 /* rate limit debug messages */
521 struct timespec dlil_dbgrate = { 1, 0 };
522
523 SYSCTL_DECL(_net_link_generic_system);
524
525 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
526 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
527
528 #define IF_SNDQ_MINLEN 32
529 u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
530 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
531 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
532 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
533
534 #define IF_RCVQ_MINLEN 32
535 #define IF_RCVQ_MAXLEN 256
536 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
537 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
538 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
539 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
540
541 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
542 static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
543 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
544 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
545 "ilog2 of EWMA decay rate of avg inbound packets");
546
547 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
548 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
549 static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
550 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
551 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
552 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
553 "Q", "input poll mode freeze time");
554
555 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
556 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
557 static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
558 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
559 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
560 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
561 "Q", "input poll sampling time");
562
563 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
564 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
565 static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
566 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
567 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
568 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
569 "Q", "input poll interval (time)");
570
571 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
572 static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
573 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
574 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
575 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
576
577 #define IF_RXPOLL_WLOWAT 10
578 static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
579 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
580 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
581 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
582 "I", "input poll wakeup low watermark");
583
584 #define IF_RXPOLL_WHIWAT 100
585 static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
586 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
587 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
588 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
589 "I", "input poll wakeup high watermark");
590
591 static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
592 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
593 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
594 "max packets per poll call");
595
596 static u_int32_t if_rxpoll = 1;
597 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
598 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
599 sysctl_rxpoll, "I", "enable opportunistic input polling");
600
601 #if TEST_INPUT_THREAD_TERMINATION
602 static u_int32_t if_input_thread_termination_spin = 0;
603 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
604 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
605 &if_input_thread_termination_spin, 0,
606 sysctl_input_thread_termination_spin,
607 "I", "input thread termination spin limit");
608 #endif /* TEST_INPUT_THREAD_TERMINATION */
609
610 static u_int32_t cur_dlil_input_threads = 0;
611 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
612 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
613 "Current number of DLIL input threads");
614
615 #if IFNET_INPUT_SANITY_CHK
616 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
617 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
618 "Turn on sanity checking in DLIL input");
619 #endif /* IFNET_INPUT_SANITY_CHK */
620
621 static u_int32_t if_flowadv = 1;
622 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
623 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
624 "enable flow-advisory mechanism");
625
626 static u_int32_t if_delaybased_queue = 1;
627 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
628 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
629 "enable delay based dynamic queue sizing");
630
631 static uint64_t hwcksum_in_invalidated = 0;
632 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
633 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
634 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
635
636 uint32_t hwcksum_dbg = 0;
637 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
638 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
639 "enable hardware cksum debugging");
640
641 u_int32_t ifnet_start_delayed = 0;
642 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
643 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
644 "number of times start was delayed");
645
646 u_int32_t ifnet_delay_start_disabled = 0;
647 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
648 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
649 "number of times start was delayed");
650
651 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
652 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
653 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
654 #define HWCKSUM_DBG_MASK \
655 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
656 HWCKSUM_DBG_FINALIZE_FORCED)
657
658 static uint32_t hwcksum_dbg_mode = 0;
659 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
660 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
661 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
662
663 static uint64_t hwcksum_dbg_partial_forced = 0;
664 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
665 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
666 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
667
668 static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
669 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
670 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
671 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
672
673 static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
674 SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
675 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
676 &hwcksum_dbg_partial_rxoff_forced, 0,
677 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
678 "forced partial cksum rx offset");
679
680 static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
681 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
682 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
683 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
684 "adjusted partial cksum rx offset");
685
686 static uint64_t hwcksum_dbg_verified = 0;
687 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
688 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
689 &hwcksum_dbg_verified, "packets verified for having good checksum");
690
691 static uint64_t hwcksum_dbg_bad_cksum = 0;
692 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
693 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
694 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
695
696 static uint64_t hwcksum_dbg_bad_rxoff = 0;
697 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
698 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
699 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
700
701 static uint64_t hwcksum_dbg_adjusted = 0;
702 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
703 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
704 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
705
706 static uint64_t hwcksum_dbg_finalized_hdr = 0;
707 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
708 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
709 &hwcksum_dbg_finalized_hdr, "finalized headers");
710
711 static uint64_t hwcksum_dbg_finalized_data = 0;
712 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
713 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
714 &hwcksum_dbg_finalized_data, "finalized payloads");
715
716 uint32_t hwcksum_tx = 1;
717 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
718 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
719 "enable transmit hardware checksum offload");
720
721 uint32_t hwcksum_rx = 1;
722 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
723 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
724 "enable receive hardware checksum offload");
725
726 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
727 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
728 sysctl_tx_chain_len_stats, "S", "");
729
730 uint32_t tx_chain_len_count = 0;
731 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
732 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
733
734 static uint32_t threshold_notify = 1; /* enable/disable */
735 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
736 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
737
738 static uint32_t threshold_interval = 2; /* in seconds */
739 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
740 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
741
742 #if (DEVELOPMENT || DEBUG)
743 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
744 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
745 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
746 #endif /* DEVELOPMENT || DEBUG */
747
748 struct net_api_stats net_api_stats;
749 SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD|CTLFLAG_LOCKED,
750 &net_api_stats, net_api_stats, "");
751
752
753 unsigned int net_rxpoll = 1;
754 unsigned int net_affinity = 1;
755 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
756
757 extern u_int32_t inject_buckets;
758
759 static lck_grp_attr_t *dlil_grp_attributes = NULL;
760 static lck_attr_t *dlil_lck_attributes = NULL;
761
762 /* DLIL data threshold thread call */
763 static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
764
765 static void dlil_mit_tcall_fn(thread_call_param_t, thread_call_param_t);
766
767 uint32_t dlil_rcv_mit_pkts_min = 5;
768 uint32_t dlil_rcv_mit_pkts_max = 64;
769 uint32_t dlil_rcv_mit_interval = (500 * 1000);
770
771 #if (DEVELOPMENT || DEBUG)
772 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_min,
773 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_min, 0, "");
774 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_max,
775 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_max, 0, "");
776 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_interval,
777 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_interval, 0, "");
778 #endif /* DEVELOPMENT || DEBUG */
779
780
781 #define DLIL_INPUT_CHECK(m, ifp) { \
782 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
783 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
784 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
785 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
786 /* NOTREACHED */ \
787 } \
788 }
789
790 #define DLIL_EWMA(old, new, decay) do { \
791 u_int32_t _avg; \
792 if ((_avg = (old)) > 0) \
793 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
794 else \
795 _avg = (new); \
796 (old) = _avg; \
797 } while (0)
798
799 #define MBPS (1ULL * 1000 * 1000)
800 #define GBPS (MBPS * 1000)
801
802 struct rxpoll_time_tbl {
803 u_int64_t speed; /* downlink speed */
804 u_int32_t plowat; /* packets low watermark */
805 u_int32_t phiwat; /* packets high watermark */
806 u_int32_t blowat; /* bytes low watermark */
807 u_int32_t bhiwat; /* bytes high watermark */
808 };
809
810 static struct rxpoll_time_tbl rxpoll_tbl[] = {
811 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
812 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
813 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
814 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
815 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
816 { 0, 0, 0, 0, 0 }
817 };
818
819 int
820 proto_hash_value(u_int32_t protocol_family)
821 {
822 /*
823 * dlil_proto_unplumb_all() depends on the mapping between
824 * the hash bucket index and the protocol family defined
825 * here; future changes must be applied there as well.
826 */
827 switch (protocol_family) {
828 case PF_INET:
829 return (0);
830 case PF_INET6:
831 return (1);
832 case PF_VLAN:
833 return (2);
834 case PF_UNSPEC:
835 default:
836 return (3);
837 }
838 }
839
840 /*
841 * Caller must already be holding ifnet lock.
842 */
843 static struct if_proto *
844 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
845 {
846 struct if_proto *proto = NULL;
847 u_int32_t i = proto_hash_value(protocol_family);
848
849 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
850
851 if (ifp->if_proto_hash != NULL)
852 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
853
854 while (proto != NULL && proto->protocol_family != protocol_family)
855 proto = SLIST_NEXT(proto, next_hash);
856
857 if (proto != NULL)
858 if_proto_ref(proto);
859
860 return (proto);
861 }
862
863 static void
864 if_proto_ref(struct if_proto *proto)
865 {
866 atomic_add_32(&proto->refcount, 1);
867 }
868
869 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
870
871 static void
872 if_proto_free(struct if_proto *proto)
873 {
874 u_int32_t oldval;
875 struct ifnet *ifp = proto->ifp;
876 u_int32_t proto_family = proto->protocol_family;
877 struct kev_dl_proto_data ev_pr_data;
878
879 oldval = atomic_add_32_ov(&proto->refcount, -1);
880 if (oldval > 1)
881 return;
882
883 /* No more reference on this, protocol must have been detached */
884 VERIFY(proto->detached);
885
886 if (proto->proto_kpi == kProtoKPI_v1) {
887 if (proto->kpi.v1.detached)
888 proto->kpi.v1.detached(ifp, proto->protocol_family);
889 }
890 if (proto->proto_kpi == kProtoKPI_v2) {
891 if (proto->kpi.v2.detached)
892 proto->kpi.v2.detached(ifp, proto->protocol_family);
893 }
894
895 /*
896 * Cleanup routes that may still be in the routing table for that
897 * interface/protocol pair.
898 */
899 if_rtproto_del(ifp, proto_family);
900
901 /*
902 * The reserved field carries the number of protocol still attached
903 * (subject to change)
904 */
905 ifnet_lock_shared(ifp);
906 ev_pr_data.proto_family = proto_family;
907 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
908 ifnet_lock_done(ifp);
909
910 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
911 (struct net_event_data *)&ev_pr_data,
912 sizeof (struct kev_dl_proto_data));
913
914 if (ev_pr_data.proto_remaining_count == 0) {
915 /*
916 * The protocol count has gone to zero, mark the interface down.
917 * This used to be done by configd.KernelEventMonitor, but that
918 * is inherently prone to races (rdar://problem/30810208).
919 */
920 (void) ifnet_set_flags(ifp, 0, IFF_UP);
921 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
922 dlil_post_sifflags_msg(ifp);
923 }
924
925 zfree(dlif_proto_zone, proto);
926 }
927
928 __private_extern__ void
929 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
930 {
931 #if !MACH_ASSERT
932 #pragma unused(ifp)
933 #endif
934 unsigned int type = 0;
935 int ass = 1;
936
937 switch (what) {
938 case IFNET_LCK_ASSERT_EXCLUSIVE:
939 type = LCK_RW_ASSERT_EXCLUSIVE;
940 break;
941
942 case IFNET_LCK_ASSERT_SHARED:
943 type = LCK_RW_ASSERT_SHARED;
944 break;
945
946 case IFNET_LCK_ASSERT_OWNED:
947 type = LCK_RW_ASSERT_HELD;
948 break;
949
950 case IFNET_LCK_ASSERT_NOTOWNED:
951 /* nothing to do here for RW lock; bypass assert */
952 ass = 0;
953 break;
954
955 default:
956 panic("bad ifnet assert type: %d", what);
957 /* NOTREACHED */
958 }
959 if (ass)
960 LCK_RW_ASSERT(&ifp->if_lock, type);
961 }
962
963 __private_extern__ void
964 ifnet_lock_shared(struct ifnet *ifp)
965 {
966 lck_rw_lock_shared(&ifp->if_lock);
967 }
968
969 __private_extern__ void
970 ifnet_lock_exclusive(struct ifnet *ifp)
971 {
972 lck_rw_lock_exclusive(&ifp->if_lock);
973 }
974
975 __private_extern__ void
976 ifnet_lock_done(struct ifnet *ifp)
977 {
978 lck_rw_done(&ifp->if_lock);
979 }
980
981 #if INET
982 __private_extern__ void
983 if_inetdata_lock_shared(struct ifnet *ifp)
984 {
985 lck_rw_lock_shared(&ifp->if_inetdata_lock);
986 }
987
988 __private_extern__ void
989 if_inetdata_lock_exclusive(struct ifnet *ifp)
990 {
991 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
992 }
993
994 __private_extern__ void
995 if_inetdata_lock_done(struct ifnet *ifp)
996 {
997 lck_rw_done(&ifp->if_inetdata_lock);
998 }
999 #endif
1000
1001 #if INET6
1002 __private_extern__ void
1003 if_inet6data_lock_shared(struct ifnet *ifp)
1004 {
1005 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1006 }
1007
1008 __private_extern__ void
1009 if_inet6data_lock_exclusive(struct ifnet *ifp)
1010 {
1011 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1012 }
1013
1014 __private_extern__ void
1015 if_inet6data_lock_done(struct ifnet *ifp)
1016 {
1017 lck_rw_done(&ifp->if_inet6data_lock);
1018 }
1019 #endif
1020
1021 __private_extern__ void
1022 ifnet_head_lock_shared(void)
1023 {
1024 lck_rw_lock_shared(&ifnet_head_lock);
1025 }
1026
1027 __private_extern__ void
1028 ifnet_head_lock_exclusive(void)
1029 {
1030 lck_rw_lock_exclusive(&ifnet_head_lock);
1031 }
1032
1033 __private_extern__ void
1034 ifnet_head_done(void)
1035 {
1036 lck_rw_done(&ifnet_head_lock);
1037 }
1038
1039 __private_extern__ void
1040 ifnet_head_assert_exclusive(void)
1041 {
1042 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1043 }
1044
1045 /*
1046 * dlil_ifp_protolist
1047 * - get the list of protocols attached to the interface, or just the number
1048 * of attached protocols
1049 * - if the number returned is greater than 'list_count', truncation occurred
1050 *
1051 * Note:
1052 * - caller must already be holding ifnet lock.
1053 */
1054 static u_int32_t
1055 dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1056 u_int32_t list_count)
1057 {
1058 u_int32_t count = 0;
1059 int i;
1060
1061 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1062
1063 if (ifp->if_proto_hash == NULL)
1064 goto done;
1065
1066 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1067 struct if_proto *proto;
1068 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1069 if (list != NULL && count < list_count) {
1070 list[count] = proto->protocol_family;
1071 }
1072 count++;
1073 }
1074 }
1075 done:
1076 return (count);
1077 }
1078
1079 __private_extern__ u_int32_t
1080 if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1081 {
1082 ifnet_lock_shared(ifp);
1083 count = dlil_ifp_protolist(ifp, protolist, count);
1084 ifnet_lock_done(ifp);
1085 return (count);
1086 }
1087
1088 __private_extern__ void
1089 if_free_protolist(u_int32_t *list)
1090 {
1091 _FREE(list, M_TEMP);
1092 }
1093
1094 __private_extern__ void
1095 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1096 u_int32_t event_code, struct net_event_data *event_data,
1097 u_int32_t event_data_len)
1098 {
1099 struct net_event_data ev_data;
1100 struct kev_msg ev_msg;
1101
1102 bzero(&ev_msg, sizeof (ev_msg));
1103 bzero(&ev_data, sizeof (ev_data));
1104 /*
1105 * a net event always starts with a net_event_data structure
1106 * but the caller can generate a simple net event or
1107 * provide a longer event structure to post
1108 */
1109 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1110 ev_msg.kev_class = KEV_NETWORK_CLASS;
1111 ev_msg.kev_subclass = event_subclass;
1112 ev_msg.event_code = event_code;
1113
1114 if (event_data == NULL) {
1115 event_data = &ev_data;
1116 event_data_len = sizeof (struct net_event_data);
1117 }
1118
1119 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1120 event_data->if_family = ifp->if_family;
1121 event_data->if_unit = (u_int32_t)ifp->if_unit;
1122
1123 ev_msg.dv[0].data_length = event_data_len;
1124 ev_msg.dv[0].data_ptr = event_data;
1125 ev_msg.dv[1].data_length = 0;
1126
1127 /* Don't update interface generation for quality and RRC state changess */
1128 bool update_generation = (event_subclass != KEV_DL_SUBCLASS ||
1129 (event_code != KEV_DL_LINK_QUALITY_METRIC_CHANGED &&
1130 event_code != KEV_DL_RRC_STATE_CHANGED));
1131
1132 dlil_event_internal(ifp, &ev_msg, update_generation);
1133 }
1134
1135 __private_extern__ int
1136 dlil_alloc_local_stats(struct ifnet *ifp)
1137 {
1138 int ret = EINVAL;
1139 void *buf, *base, **pbuf;
1140
1141 if (ifp == NULL)
1142 goto end;
1143
1144 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1145 /* allocate tcpstat_local structure */
1146 buf = zalloc(dlif_tcpstat_zone);
1147 if (buf == NULL) {
1148 ret = ENOMEM;
1149 goto end;
1150 }
1151 bzero(buf, dlif_tcpstat_bufsize);
1152
1153 /* Get the 64-bit aligned base address for this object */
1154 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1155 sizeof (u_int64_t));
1156 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1157 ((intptr_t)buf + dlif_tcpstat_bufsize));
1158
1159 /*
1160 * Wind back a pointer size from the aligned base and
1161 * save the original address so we can free it later.
1162 */
1163 pbuf = (void **)((intptr_t)base - sizeof (void *));
1164 *pbuf = buf;
1165 ifp->if_tcp_stat = base;
1166
1167 /* allocate udpstat_local structure */
1168 buf = zalloc(dlif_udpstat_zone);
1169 if (buf == NULL) {
1170 ret = ENOMEM;
1171 goto end;
1172 }
1173 bzero(buf, dlif_udpstat_bufsize);
1174
1175 /* Get the 64-bit aligned base address for this object */
1176 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1177 sizeof (u_int64_t));
1178 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1179 ((intptr_t)buf + dlif_udpstat_bufsize));
1180
1181 /*
1182 * Wind back a pointer size from the aligned base and
1183 * save the original address so we can free it later.
1184 */
1185 pbuf = (void **)((intptr_t)base - sizeof (void *));
1186 *pbuf = buf;
1187 ifp->if_udp_stat = base;
1188
1189 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
1190 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
1191
1192 ret = 0;
1193 }
1194
1195 if (ifp->if_ipv4_stat == NULL) {
1196 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1197 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1198 if (ifp->if_ipv4_stat == NULL) {
1199 ret = ENOMEM;
1200 goto end;
1201 }
1202 }
1203
1204 if (ifp->if_ipv6_stat == NULL) {
1205 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1206 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1207 if (ifp->if_ipv6_stat == NULL) {
1208 ret = ENOMEM;
1209 goto end;
1210 }
1211 }
1212 end:
1213 if (ret != 0) {
1214 if (ifp->if_tcp_stat != NULL) {
1215 pbuf = (void **)
1216 ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
1217 zfree(dlif_tcpstat_zone, *pbuf);
1218 ifp->if_tcp_stat = NULL;
1219 }
1220 if (ifp->if_udp_stat != NULL) {
1221 pbuf = (void **)
1222 ((intptr_t)ifp->if_udp_stat - sizeof (void *));
1223 zfree(dlif_udpstat_zone, *pbuf);
1224 ifp->if_udp_stat = NULL;
1225 }
1226 if (ifp->if_ipv4_stat != NULL) {
1227 FREE(ifp->if_ipv4_stat, M_TEMP);
1228 ifp->if_ipv4_stat = NULL;
1229 }
1230 if (ifp->if_ipv6_stat != NULL) {
1231 FREE(ifp->if_ipv6_stat, M_TEMP);
1232 ifp->if_ipv6_stat = NULL;
1233 }
1234 }
1235
1236 return (ret);
1237 }
1238
1239 static int
1240 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
1241 {
1242 thread_continue_t func;
1243 u_int32_t limit;
1244 int error;
1245
1246 /* NULL ifp indicates the main input thread, called at dlil_init time */
1247 if (ifp == NULL) {
1248 func = dlil_main_input_thread_func;
1249 VERIFY(inp == dlil_main_input_thread);
1250 (void) strlcat(inp->input_name,
1251 "main_input", DLIL_THREADNAME_LEN);
1252 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1253 func = dlil_rxpoll_input_thread_func;
1254 VERIFY(inp != dlil_main_input_thread);
1255 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1256 "%s_input_poll", if_name(ifp));
1257 } else {
1258 func = dlil_input_thread_func;
1259 VERIFY(inp != dlil_main_input_thread);
1260 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1261 "%s_input", if_name(ifp));
1262 }
1263 VERIFY(inp->input_thr == THREAD_NULL);
1264
1265 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1266 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1267
1268 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1269 inp->ifp = ifp; /* NULL for main input thread */
1270
1271 net_timerclear(&inp->mode_holdtime);
1272 net_timerclear(&inp->mode_lasttime);
1273 net_timerclear(&inp->sample_holdtime);
1274 net_timerclear(&inp->sample_lasttime);
1275 net_timerclear(&inp->dbg_lasttime);
1276
1277 /*
1278 * For interfaces that support opportunistic polling, set the
1279 * low and high watermarks for outstanding inbound packets/bytes.
1280 * Also define freeze times for transitioning between modes
1281 * and updating the average.
1282 */
1283 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1284 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1285 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1286 } else {
1287 limit = (u_int32_t)-1;
1288 }
1289
1290 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1291 if (inp == dlil_main_input_thread) {
1292 struct dlil_main_threading_info *inpm =
1293 (struct dlil_main_threading_info *)inp;
1294 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1295 }
1296
1297 error = kernel_thread_start(func, inp, &inp->input_thr);
1298 if (error == KERN_SUCCESS) {
1299 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
1300 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
1301 /*
1302 * We create an affinity set so that the matching workloop
1303 * thread or the starter thread (for loopback) can be
1304 * scheduled on the same processor set as the input thread.
1305 */
1306 if (net_affinity) {
1307 struct thread *tp = inp->input_thr;
1308 u_int32_t tag;
1309 /*
1310 * Randomize to reduce the probability
1311 * of affinity tag namespace collision.
1312 */
1313 read_frandom(&tag, sizeof (tag));
1314 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1315 thread_reference(tp);
1316 inp->tag = tag;
1317 inp->net_affinity = TRUE;
1318 }
1319 }
1320 } else if (inp == dlil_main_input_thread) {
1321 panic_plain("%s: couldn't create main input thread", __func__);
1322 /* NOTREACHED */
1323 } else {
1324 panic_plain("%s: couldn't create %s input thread", __func__,
1325 if_name(ifp));
1326 /* NOTREACHED */
1327 }
1328 OSAddAtomic(1, &cur_dlil_input_threads);
1329
1330 return (error);
1331 }
1332
1333 #if TEST_INPUT_THREAD_TERMINATION
1334 static int
1335 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1336 {
1337 #pragma unused(arg1, arg2)
1338 uint32_t i;
1339 int err;
1340
1341 i = if_input_thread_termination_spin;
1342
1343 err = sysctl_handle_int(oidp, &i, 0, req);
1344 if (err != 0 || req->newptr == USER_ADDR_NULL)
1345 return (err);
1346
1347 if (net_rxpoll == 0)
1348 return (ENXIO);
1349
1350 if_input_thread_termination_spin = i;
1351 return (err);
1352 }
1353 #endif /* TEST_INPUT_THREAD_TERMINATION */
1354
1355 static void
1356 dlil_clean_threading_info(struct dlil_threading_info *inp)
1357 {
1358 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1359 lck_grp_free(inp->lck_grp);
1360
1361 inp->input_waiting = 0;
1362 inp->wtot = 0;
1363 bzero(inp->input_name, sizeof (inp->input_name));
1364 inp->ifp = NULL;
1365 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1366 qlimit(&inp->rcvq_pkts) = 0;
1367 bzero(&inp->stats, sizeof (inp->stats));
1368
1369 VERIFY(!inp->net_affinity);
1370 inp->input_thr = THREAD_NULL;
1371 VERIFY(inp->wloop_thr == THREAD_NULL);
1372 VERIFY(inp->poll_thr == THREAD_NULL);
1373 VERIFY(inp->tag == 0);
1374
1375 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1376 bzero(&inp->tstats, sizeof (inp->tstats));
1377 bzero(&inp->pstats, sizeof (inp->pstats));
1378 bzero(&inp->sstats, sizeof (inp->sstats));
1379
1380 net_timerclear(&inp->mode_holdtime);
1381 net_timerclear(&inp->mode_lasttime);
1382 net_timerclear(&inp->sample_holdtime);
1383 net_timerclear(&inp->sample_lasttime);
1384 net_timerclear(&inp->dbg_lasttime);
1385
1386 #if IFNET_INPUT_SANITY_CHK
1387 inp->input_mbuf_cnt = 0;
1388 #endif /* IFNET_INPUT_SANITY_CHK */
1389 }
1390
1391 static void
1392 dlil_terminate_input_thread(struct dlil_threading_info *inp)
1393 {
1394 struct ifnet *ifp = inp->ifp;
1395
1396 VERIFY(current_thread() == inp->input_thr);
1397 VERIFY(inp != dlil_main_input_thread);
1398
1399 OSAddAtomic(-1, &cur_dlil_input_threads);
1400
1401 #if TEST_INPUT_THREAD_TERMINATION
1402 { /* do something useless that won't get optimized away */
1403 uint32_t v = 1;
1404 for (uint32_t i = 0;
1405 i < if_input_thread_termination_spin;
1406 i++) {
1407 v = (i + 1) * v;
1408 }
1409 printf("the value is %d\n", v);
1410 }
1411 #endif /* TEST_INPUT_THREAD_TERMINATION */
1412
1413 lck_mtx_lock_spin(&inp->input_lck);
1414 VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0);
1415 inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE;
1416 wakeup_one((caddr_t)&inp->input_waiting);
1417 lck_mtx_unlock(&inp->input_lck);
1418
1419 /* for the extra refcnt from kernel_thread_start() */
1420 thread_deallocate(current_thread());
1421
1422 if (dlil_verbose) {
1423 printf("%s: input thread terminated\n",
1424 if_name(ifp));
1425 }
1426
1427 /* this is the end */
1428 thread_terminate(current_thread());
1429 /* NOTREACHED */
1430 }
1431
1432 static kern_return_t
1433 dlil_affinity_set(struct thread *tp, u_int32_t tag)
1434 {
1435 thread_affinity_policy_data_t policy;
1436
1437 bzero(&policy, sizeof (policy));
1438 policy.affinity_tag = tag;
1439 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1440 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
1441 }
1442
1443 void
1444 dlil_init(void)
1445 {
1446 thread_t thread = THREAD_NULL;
1447
1448 /*
1449 * The following fields must be 64-bit aligned for atomic operations.
1450 */
1451 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1452 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1453 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1454 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1455 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1456 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1457 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1458 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1459 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1460 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1461 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1462 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1463 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1464 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1465 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1466
1467 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1468 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1469 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1470 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1471 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1472 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1473 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1474 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1475 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1476 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1477 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1478 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1479 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1480 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1481 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1482
1483 /*
1484 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1485 */
1486 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1487 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1488 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1489 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1490 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1491 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1492 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1493 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1494 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1495 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1496 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1497 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1498 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1499 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1500
1501 /*
1502 * ... as well as the mbuf checksum flags counterparts.
1503 */
1504 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1505 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1506 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1507 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1508 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1509 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1510 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1511 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1512 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1513 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1514 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1515
1516 /*
1517 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1518 */
1519 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1520 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1521
1522 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1523 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1524 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1525 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1526
1527 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1528 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1529 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1530
1531 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1532 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1533 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1534 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1535 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1536 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1537 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1538 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1539 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1540 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1541 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1542 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1543 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1544 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1545 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1546 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1547
1548 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1549 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1550 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1551 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1552 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1553 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1554 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1555
1556 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1557 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1558
1559 PE_parse_boot_argn("net_affinity", &net_affinity,
1560 sizeof (net_affinity));
1561
1562 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
1563
1564 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
1565
1566 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
1567
1568 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
1569 sizeof (struct dlil_ifnet_dbg);
1570 /* Enforce 64-bit alignment for dlil_ifnet structure */
1571 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
1572 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
1573 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1574 0, DLIF_ZONE_NAME);
1575 if (dlif_zone == NULL) {
1576 panic_plain("%s: failed allocating %s", __func__,
1577 DLIF_ZONE_NAME);
1578 /* NOTREACHED */
1579 }
1580 zone_change(dlif_zone, Z_EXPAND, TRUE);
1581 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1582
1583 dlif_filt_size = sizeof (struct ifnet_filter);
1584 dlif_filt_zone = zinit(dlif_filt_size,
1585 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1586 if (dlif_filt_zone == NULL) {
1587 panic_plain("%s: failed allocating %s", __func__,
1588 DLIF_FILT_ZONE_NAME);
1589 /* NOTREACHED */
1590 }
1591 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1592 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1593
1594 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
1595 dlif_phash_zone = zinit(dlif_phash_size,
1596 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1597 if (dlif_phash_zone == NULL) {
1598 panic_plain("%s: failed allocating %s", __func__,
1599 DLIF_PHASH_ZONE_NAME);
1600 /* NOTREACHED */
1601 }
1602 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1603 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1604
1605 dlif_proto_size = sizeof (struct if_proto);
1606 dlif_proto_zone = zinit(dlif_proto_size,
1607 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1608 if (dlif_proto_zone == NULL) {
1609 panic_plain("%s: failed allocating %s", __func__,
1610 DLIF_PROTO_ZONE_NAME);
1611 /* NOTREACHED */
1612 }
1613 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1614 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1615
1616 dlif_tcpstat_size = sizeof (struct tcpstat_local);
1617 /* Enforce 64-bit alignment for tcpstat_local structure */
1618 dlif_tcpstat_bufsize =
1619 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
1620 dlif_tcpstat_bufsize =
1621 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
1622 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1623 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1624 DLIF_TCPSTAT_ZONE_NAME);
1625 if (dlif_tcpstat_zone == NULL) {
1626 panic_plain("%s: failed allocating %s", __func__,
1627 DLIF_TCPSTAT_ZONE_NAME);
1628 /* NOTREACHED */
1629 }
1630 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1631 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1632
1633 dlif_udpstat_size = sizeof (struct udpstat_local);
1634 /* Enforce 64-bit alignment for udpstat_local structure */
1635 dlif_udpstat_bufsize =
1636 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
1637 dlif_udpstat_bufsize =
1638 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
1639 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1640 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1641 DLIF_UDPSTAT_ZONE_NAME);
1642 if (dlif_udpstat_zone == NULL) {
1643 panic_plain("%s: failed allocating %s", __func__,
1644 DLIF_UDPSTAT_ZONE_NAME);
1645 /* NOTREACHED */
1646 }
1647 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1648 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1649
1650 ifnet_llreach_init();
1651 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1652
1653 TAILQ_INIT(&dlil_ifnet_head);
1654 TAILQ_INIT(&ifnet_head);
1655 TAILQ_INIT(&ifnet_detaching_head);
1656 TAILQ_INIT(&ifnet_ordered_head);
1657
1658 /* Setup the lock groups we will use */
1659 dlil_grp_attributes = lck_grp_attr_alloc_init();
1660
1661 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1662 dlil_grp_attributes);
1663 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1664 dlil_grp_attributes);
1665 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1666 dlil_grp_attributes);
1667 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1668 dlil_grp_attributes);
1669 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1670 dlil_grp_attributes);
1671
1672 /* Setup the lock attributes we will use */
1673 dlil_lck_attributes = lck_attr_alloc_init();
1674
1675 ifnet_lock_attr = lck_attr_alloc_init();
1676
1677 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1678 dlil_lck_attributes);
1679 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1680
1681 /* Setup interface flow control related items */
1682 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1683
1684 ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry);
1685 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1686 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1687 if (ifnet_fc_zone == NULL) {
1688 panic_plain("%s: failed allocating %s", __func__,
1689 IFNET_FC_ZONE_NAME);
1690 /* NOTREACHED */
1691 }
1692 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1693 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
1694
1695 /* Initialize interface address subsystem */
1696 ifa_init();
1697
1698 #if PF
1699 /* Initialize the packet filter */
1700 pfinit();
1701 #endif /* PF */
1702
1703 /* Initialize queue algorithms */
1704 classq_init();
1705
1706 /* Initialize packet schedulers */
1707 pktsched_init();
1708
1709 /* Initialize flow advisory subsystem */
1710 flowadv_init();
1711
1712 /* Initialize the pktap virtual interface */
1713 pktap_init();
1714
1715 /* Initialize the service class to dscp map */
1716 net_qos_map_init();
1717
1718 /* Initialize the interface port list */
1719 if_ports_used_init();
1720
1721 #if DEBUG || DEVELOPMENT
1722 /* Run self-tests */
1723 dlil_verify_sum16();
1724 #endif /* DEBUG || DEVELOPMENT */
1725
1726 /* Initialize link layer table */
1727 lltable_glbl_init();
1728
1729 /*
1730 * Create and start up the main DLIL input thread and the interface
1731 * detacher threads once everything is initialized.
1732 */
1733 dlil_create_input_thread(NULL, dlil_main_input_thread);
1734
1735 if (kernel_thread_start(ifnet_detacher_thread_func,
1736 NULL, &thread) != KERN_SUCCESS) {
1737 panic_plain("%s: couldn't create detacher thread", __func__);
1738 /* NOTREACHED */
1739 }
1740 thread_deallocate(thread);
1741
1742 }
1743
1744 static void
1745 if_flt_monitor_busy(struct ifnet *ifp)
1746 {
1747 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1748
1749 ++ifp->if_flt_busy;
1750 VERIFY(ifp->if_flt_busy != 0);
1751 }
1752
1753 static void
1754 if_flt_monitor_unbusy(struct ifnet *ifp)
1755 {
1756 if_flt_monitor_leave(ifp);
1757 }
1758
1759 static void
1760 if_flt_monitor_enter(struct ifnet *ifp)
1761 {
1762 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1763
1764 while (ifp->if_flt_busy) {
1765 ++ifp->if_flt_waiters;
1766 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1767 (PZERO - 1), "if_flt_monitor", NULL);
1768 }
1769 if_flt_monitor_busy(ifp);
1770 }
1771
1772 static void
1773 if_flt_monitor_leave(struct ifnet *ifp)
1774 {
1775 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1776
1777 VERIFY(ifp->if_flt_busy != 0);
1778 --ifp->if_flt_busy;
1779
1780 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1781 ifp->if_flt_waiters = 0;
1782 wakeup(&ifp->if_flt_head);
1783 }
1784 }
1785
1786 __private_extern__ int
1787 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1788 interface_filter_t *filter_ref, u_int32_t flags)
1789 {
1790 int retval = 0;
1791 struct ifnet_filter *filter = NULL;
1792
1793 ifnet_head_lock_shared();
1794 /* Check that the interface is in the global list */
1795 if (!ifnet_lookup(ifp)) {
1796 retval = ENXIO;
1797 goto done;
1798 }
1799
1800 filter = zalloc(dlif_filt_zone);
1801 if (filter == NULL) {
1802 retval = ENOMEM;
1803 goto done;
1804 }
1805 bzero(filter, dlif_filt_size);
1806
1807 /* refcnt held above during lookup */
1808 filter->filt_flags = flags;
1809 filter->filt_ifp = ifp;
1810 filter->filt_cookie = if_filter->iff_cookie;
1811 filter->filt_name = if_filter->iff_name;
1812 filter->filt_protocol = if_filter->iff_protocol;
1813 /*
1814 * Do not install filter callbacks for internal coproc interface
1815 */
1816 if (!IFNET_IS_INTCOPROC(ifp)) {
1817 filter->filt_input = if_filter->iff_input;
1818 filter->filt_output = if_filter->iff_output;
1819 filter->filt_event = if_filter->iff_event;
1820 filter->filt_ioctl = if_filter->iff_ioctl;
1821 }
1822 filter->filt_detached = if_filter->iff_detached;
1823
1824 lck_mtx_lock(&ifp->if_flt_lock);
1825 if_flt_monitor_enter(ifp);
1826
1827 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1828 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1829
1830 if_flt_monitor_leave(ifp);
1831 lck_mtx_unlock(&ifp->if_flt_lock);
1832
1833 *filter_ref = filter;
1834
1835 /*
1836 * Bump filter count and route_generation ID to let TCP
1837 * know it shouldn't do TSO on this connection
1838 */
1839 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1840 OSAddAtomic(1, &dlil_filter_disable_tso_count);
1841 routegenid_update();
1842 }
1843 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1844 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1845 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1846 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1847 }
1848 if (dlil_verbose) {
1849 printf("%s: %s filter attached\n", if_name(ifp),
1850 if_filter->iff_name);
1851 }
1852 done:
1853 ifnet_head_done();
1854 if (retval != 0 && ifp != NULL) {
1855 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1856 if_name(ifp), if_filter->iff_name, retval);
1857 }
1858 if (retval != 0 && filter != NULL)
1859 zfree(dlif_filt_zone, filter);
1860
1861 return (retval);
1862 }
1863
1864 static int
1865 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1866 {
1867 int retval = 0;
1868
1869 if (detached == 0) {
1870 ifnet_t ifp = NULL;
1871
1872 ifnet_head_lock_shared();
1873 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1874 interface_filter_t entry = NULL;
1875
1876 lck_mtx_lock(&ifp->if_flt_lock);
1877 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1878 if (entry != filter || entry->filt_skip)
1879 continue;
1880 /*
1881 * We've found a match; since it's possible
1882 * that the thread gets blocked in the monitor,
1883 * we do the lock dance. Interface should
1884 * not be detached since we still have a use
1885 * count held during filter attach.
1886 */
1887 entry->filt_skip = 1; /* skip input/output */
1888 lck_mtx_unlock(&ifp->if_flt_lock);
1889 ifnet_head_done();
1890
1891 lck_mtx_lock(&ifp->if_flt_lock);
1892 if_flt_monitor_enter(ifp);
1893 LCK_MTX_ASSERT(&ifp->if_flt_lock,
1894 LCK_MTX_ASSERT_OWNED);
1895
1896 /* Remove the filter from the list */
1897 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1898 filt_next);
1899
1900 if_flt_monitor_leave(ifp);
1901 lck_mtx_unlock(&ifp->if_flt_lock);
1902 if (dlil_verbose) {
1903 printf("%s: %s filter detached\n",
1904 if_name(ifp), filter->filt_name);
1905 }
1906 goto destroy;
1907 }
1908 lck_mtx_unlock(&ifp->if_flt_lock);
1909 }
1910 ifnet_head_done();
1911
1912 /* filter parameter is not a valid filter ref */
1913 retval = EINVAL;
1914 goto done;
1915 }
1916
1917 if (dlil_verbose)
1918 printf("%s filter detached\n", filter->filt_name);
1919
1920 destroy:
1921
1922 /* Call the detached function if there is one */
1923 if (filter->filt_detached)
1924 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1925
1926 /*
1927 * Decrease filter count and route_generation ID to let TCP
1928 * know it should reevalute doing TSO or not
1929 */
1930 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1931 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
1932 routegenid_update();
1933 }
1934
1935 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1936
1937 /* Free the filter */
1938 zfree(dlif_filt_zone, filter);
1939 filter = NULL;
1940 done:
1941 if (retval != 0 && filter != NULL) {
1942 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1943 filter->filt_name, retval);
1944 }
1945
1946 return (retval);
1947 }
1948
1949 __private_extern__ void
1950 dlil_detach_filter(interface_filter_t filter)
1951 {
1952 if (filter == NULL)
1953 return;
1954 dlil_detach_filter_internal(filter, 0);
1955 }
1956
1957 /*
1958 * Main input thread:
1959 *
1960 * a) handles all inbound packets for lo0
1961 * b) handles all inbound packets for interfaces with no dedicated
1962 * input thread (e.g. anything but Ethernet/PDP or those that support
1963 * opportunistic polling.)
1964 * c) protocol registrations
1965 * d) packet injections
1966 */
1967 __attribute__((noreturn))
1968 static void
1969 dlil_main_input_thread_func(void *v, wait_result_t w)
1970 {
1971 #pragma unused(w)
1972 struct dlil_main_threading_info *inpm = v;
1973 struct dlil_threading_info *inp = v;
1974
1975 VERIFY(inp == dlil_main_input_thread);
1976 VERIFY(inp->ifp == NULL);
1977 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1978
1979 while (1) {
1980 struct mbuf *m = NULL, *m_loop = NULL;
1981 u_int32_t m_cnt, m_cnt_loop;
1982 boolean_t proto_req;
1983
1984 lck_mtx_lock_spin(&inp->input_lck);
1985
1986 /* Wait until there is work to be done */
1987 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1988 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
1989 (void) msleep(&inp->input_waiting, &inp->input_lck,
1990 (PZERO - 1) | PSPIN, inp->input_name, NULL);
1991 }
1992
1993 inp->input_waiting |= DLIL_INPUT_RUNNING;
1994 inp->input_waiting &= ~DLIL_INPUT_WAITING;
1995
1996 /* Main input thread cannot be terminated */
1997 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
1998
1999 proto_req = (inp->input_waiting &
2000 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
2001
2002 /* Packets for non-dedicated interfaces other than lo0 */
2003 m_cnt = qlen(&inp->rcvq_pkts);
2004 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2005
2006 /* Packets exclusive to lo0 */
2007 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
2008 m_loop = _getq_all(&inpm->lo_rcvq_pkts, NULL, NULL, NULL);
2009
2010 inp->wtot = 0;
2011
2012 lck_mtx_unlock(&inp->input_lck);
2013
2014 /*
2015 * NOTE warning %%% attention !!!!
2016 * We should think about putting some thread starvation
2017 * safeguards if we deal with long chains of packets.
2018 */
2019 if (m_loop != NULL)
2020 dlil_input_packet_list_extended(lo_ifp, m_loop,
2021 m_cnt_loop, inp->mode);
2022
2023 if (m != NULL)
2024 dlil_input_packet_list_extended(NULL, m,
2025 m_cnt, inp->mode);
2026
2027 if (proto_req)
2028 proto_input_run();
2029 }
2030
2031 /* NOTREACHED */
2032 VERIFY(0); /* we should never get here */
2033 }
2034
2035 /*
2036 * Input thread for interfaces with legacy input model.
2037 */
2038 static void
2039 dlil_input_thread_func(void *v, wait_result_t w)
2040 {
2041 #pragma unused(w)
2042 char thread_name[MAXTHREADNAMESIZE];
2043 struct dlil_threading_info *inp = v;
2044 struct ifnet *ifp = inp->ifp;
2045
2046 /* Construct the name for this thread, and then apply it. */
2047 bzero(thread_name, sizeof(thread_name));
2048 snprintf(thread_name, sizeof(thread_name), "dlil_input_%s", ifp->if_xname);
2049 thread_set_thread_name(inp->input_thr, thread_name);
2050
2051 VERIFY(inp != dlil_main_input_thread);
2052 VERIFY(ifp != NULL);
2053 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
2054 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2055
2056 while (1) {
2057 struct mbuf *m = NULL;
2058 u_int32_t m_cnt;
2059
2060 lck_mtx_lock_spin(&inp->input_lck);
2061
2062 /* Wait until there is work to be done */
2063 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2064 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2065 (void) msleep(&inp->input_waiting, &inp->input_lck,
2066 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2067 }
2068
2069 inp->input_waiting |= DLIL_INPUT_RUNNING;
2070 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2071
2072 /*
2073 * Protocol registration and injection must always use
2074 * the main input thread; in theory the latter can utilize
2075 * the corresponding input thread where the packet arrived
2076 * on, but that requires our knowing the interface in advance
2077 * (and the benefits might not worth the trouble.)
2078 */
2079 VERIFY(!(inp->input_waiting &
2080 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2081
2082 /* Packets for this interface */
2083 m_cnt = qlen(&inp->rcvq_pkts);
2084 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2085
2086 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2087 lck_mtx_unlock(&inp->input_lck);
2088
2089 /* Free up pending packets */
2090 if (m != NULL)
2091 mbuf_freem_list(m);
2092
2093 dlil_terminate_input_thread(inp);
2094 /* NOTREACHED */
2095 return;
2096 }
2097
2098 inp->wtot = 0;
2099
2100 dlil_input_stats_sync(ifp, inp);
2101
2102 lck_mtx_unlock(&inp->input_lck);
2103
2104 /*
2105 * NOTE warning %%% attention !!!!
2106 * We should think about putting some thread starvation
2107 * safeguards if we deal with long chains of packets.
2108 */
2109 if (m != NULL)
2110 dlil_input_packet_list_extended(NULL, m,
2111 m_cnt, inp->mode);
2112 }
2113
2114 /* NOTREACHED */
2115 VERIFY(0); /* we should never get here */
2116 }
2117
2118 /*
2119 * Input thread for interfaces with opportunistic polling input model.
2120 */
2121 static void
2122 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2123 {
2124 #pragma unused(w)
2125 struct dlil_threading_info *inp = v;
2126 struct ifnet *ifp = inp->ifp;
2127 struct timespec ts;
2128
2129 VERIFY(inp != dlil_main_input_thread);
2130 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
2131
2132 while (1) {
2133 struct mbuf *m = NULL;
2134 u_int32_t m_cnt, m_size, poll_req = 0;
2135 ifnet_model_t mode;
2136 struct timespec now, delta;
2137 u_int64_t ival;
2138
2139 lck_mtx_lock_spin(&inp->input_lck);
2140
2141 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
2142 ival = IF_RXPOLL_INTERVALTIME_MIN;
2143
2144 /* Link parameters changed? */
2145 if (ifp->if_poll_update != 0) {
2146 ifp->if_poll_update = 0;
2147 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
2148 }
2149
2150 /* Current operating mode */
2151 mode = inp->mode;
2152
2153 /* Wait until there is work to be done */
2154 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2155 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2156 (void) msleep(&inp->input_waiting, &inp->input_lck,
2157 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2158 }
2159
2160 inp->input_waiting |= DLIL_INPUT_RUNNING;
2161 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2162
2163 /*
2164 * Protocol registration and injection must always use
2165 * the main input thread; in theory the latter can utilize
2166 * the corresponding input thread where the packet arrived
2167 * on, but that requires our knowing the interface in advance
2168 * (and the benefits might not worth the trouble.)
2169 */
2170 VERIFY(!(inp->input_waiting &
2171 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2172
2173 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2174 /* Free up pending packets */
2175 lck_mtx_convert_spin(&inp->input_lck);
2176 _flushq(&inp->rcvq_pkts);
2177 if (inp->input_mit_tcall != NULL) {
2178 if (thread_call_isactive(inp->input_mit_tcall))
2179 thread_call_cancel(inp->input_mit_tcall);
2180 }
2181 lck_mtx_unlock(&inp->input_lck);
2182
2183 dlil_terminate_input_thread(inp);
2184 /* NOTREACHED */
2185 return;
2186 }
2187
2188 /* Total count of all packets */
2189 m_cnt = qlen(&inp->rcvq_pkts);
2190
2191 /* Total bytes of all packets */
2192 m_size = qsize(&inp->rcvq_pkts);
2193
2194 /* Packets for this interface */
2195 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2196 VERIFY(m != NULL || m_cnt == 0);
2197
2198 nanouptime(&now);
2199 if (!net_timerisset(&inp->sample_lasttime))
2200 *(&inp->sample_lasttime) = *(&now);
2201
2202 net_timersub(&now, &inp->sample_lasttime, &delta);
2203 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
2204 u_int32_t ptot, btot;
2205
2206 /* Accumulate statistics for current sampling */
2207 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
2208
2209 if (net_timercmp(&delta, &inp->sample_holdtime, <))
2210 goto skip;
2211
2212 *(&inp->sample_lasttime) = *(&now);
2213
2214 /* Calculate min/max of inbound bytes */
2215 btot = (u_int32_t)inp->sstats.bytes;
2216 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
2217 inp->rxpoll_bmin = btot;
2218 if (btot > inp->rxpoll_bmax)
2219 inp->rxpoll_bmax = btot;
2220
2221 /* Calculate EWMA of inbound bytes */
2222 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
2223
2224 /* Calculate min/max of inbound packets */
2225 ptot = (u_int32_t)inp->sstats.packets;
2226 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
2227 inp->rxpoll_pmin = ptot;
2228 if (ptot > inp->rxpoll_pmax)
2229 inp->rxpoll_pmax = ptot;
2230
2231 /* Calculate EWMA of inbound packets */
2232 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
2233
2234 /* Reset sampling statistics */
2235 PKTCNTR_CLEAR(&inp->sstats);
2236
2237 /* Calculate EWMA of wakeup requests */
2238 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2239 inp->wtot = 0;
2240
2241 if (dlil_verbose) {
2242 if (!net_timerisset(&inp->dbg_lasttime))
2243 *(&inp->dbg_lasttime) = *(&now);
2244 net_timersub(&now, &inp->dbg_lasttime, &delta);
2245 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2246 *(&inp->dbg_lasttime) = *(&now);
2247 printf("%s: [%s] pkts avg %d max %d "
2248 "limits [%d/%d], wreq avg %d "
2249 "limits [%d/%d], bytes avg %d "
2250 "limits [%d/%d]\n", if_name(ifp),
2251 (inp->mode ==
2252 IFNET_MODEL_INPUT_POLL_ON) ?
2253 "ON" : "OFF", inp->rxpoll_pavg,
2254 inp->rxpoll_pmax,
2255 inp->rxpoll_plowat,
2256 inp->rxpoll_phiwat,
2257 inp->rxpoll_wavg,
2258 inp->rxpoll_wlowat,
2259 inp->rxpoll_whiwat,
2260 inp->rxpoll_bavg,
2261 inp->rxpoll_blowat,
2262 inp->rxpoll_bhiwat);
2263 }
2264 }
2265
2266 /* Perform mode transition, if necessary */
2267 if (!net_timerisset(&inp->mode_lasttime))
2268 *(&inp->mode_lasttime) = *(&now);
2269
2270 net_timersub(&now, &inp->mode_lasttime, &delta);
2271 if (net_timercmp(&delta, &inp->mode_holdtime, <))
2272 goto skip;
2273
2274 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2275 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
2276 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2277 mode = IFNET_MODEL_INPUT_POLL_OFF;
2278 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2279 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2280 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2281 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2282 mode = IFNET_MODEL_INPUT_POLL_ON;
2283 }
2284
2285 if (mode != inp->mode) {
2286 inp->mode = mode;
2287 *(&inp->mode_lasttime) = *(&now);
2288 poll_req++;
2289 }
2290 }
2291 skip:
2292 dlil_input_stats_sync(ifp, inp);
2293
2294 lck_mtx_unlock(&inp->input_lck);
2295
2296 /*
2297 * If there's a mode change and interface is still attached,
2298 * perform a downcall to the driver for the new mode. Also
2299 * hold an IO refcnt on the interface to prevent it from
2300 * being detached (will be release below.)
2301 */
2302 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2303 struct ifnet_model_params p = { mode, { 0 } };
2304 errno_t err;
2305
2306 if (dlil_verbose) {
2307 printf("%s: polling is now %s, "
2308 "pkts avg %d max %d limits [%d/%d], "
2309 "wreq avg %d limits [%d/%d], "
2310 "bytes avg %d limits [%d/%d]\n",
2311 if_name(ifp),
2312 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2313 "ON" : "OFF", inp->rxpoll_pavg,
2314 inp->rxpoll_pmax, inp->rxpoll_plowat,
2315 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2316 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2317 inp->rxpoll_bavg, inp->rxpoll_blowat,
2318 inp->rxpoll_bhiwat);
2319 }
2320
2321 if ((err = ((*ifp->if_input_ctl)(ifp,
2322 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
2323 printf("%s: error setting polling mode "
2324 "to %s (%d)\n", if_name(ifp),
2325 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2326 "ON" : "OFF", err);
2327 }
2328
2329 switch (mode) {
2330 case IFNET_MODEL_INPUT_POLL_OFF:
2331 ifnet_set_poll_cycle(ifp, NULL);
2332 inp->rxpoll_offreq++;
2333 if (err != 0)
2334 inp->rxpoll_offerr++;
2335 break;
2336
2337 case IFNET_MODEL_INPUT_POLL_ON:
2338 net_nsectimer(&ival, &ts);
2339 ifnet_set_poll_cycle(ifp, &ts);
2340 ifnet_poll(ifp);
2341 inp->rxpoll_onreq++;
2342 if (err != 0)
2343 inp->rxpoll_onerr++;
2344 break;
2345
2346 default:
2347 VERIFY(0);
2348 /* NOTREACHED */
2349 }
2350
2351 /* Release the IO refcnt */
2352 ifnet_decr_iorefcnt(ifp);
2353 }
2354
2355 /*
2356 * NOTE warning %%% attention !!!!
2357 * We should think about putting some thread starvation
2358 * safeguards if we deal with long chains of packets.
2359 */
2360 if (m != NULL)
2361 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2362 }
2363
2364 /* NOTREACHED */
2365 VERIFY(0); /* we should never get here */
2366 }
2367
2368 /*
2369 * Must be called on an attached ifnet (caller is expected to check.)
2370 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2371 */
2372 errno_t
2373 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2374 boolean_t locked)
2375 {
2376 struct dlil_threading_info *inp;
2377 u_int64_t sample_holdtime, inbw;
2378
2379 VERIFY(ifp != NULL);
2380 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2381 return (ENXIO);
2382
2383 if (p != NULL) {
2384 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2385 (p->packets_lowat != 0 && p->packets_hiwat == 0))
2386 return (EINVAL);
2387 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2388 p->packets_lowat >= p->packets_hiwat)
2389 return (EINVAL);
2390 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2391 (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
2392 return (EINVAL);
2393 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2394 p->bytes_lowat >= p->bytes_hiwat)
2395 return (EINVAL);
2396 if (p->interval_time != 0 &&
2397 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
2398 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2399 }
2400
2401 if (!locked)
2402 lck_mtx_lock(&inp->input_lck);
2403
2404 LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2405
2406 /*
2407 * Normally, we'd reset the parameters to the auto-tuned values
2408 * if the the input thread detects a change in link rate. If the
2409 * driver provides its own parameters right after a link rate
2410 * changes, but before the input thread gets to run, we want to
2411 * make sure to keep the driver's values. Clearing if_poll_update
2412 * will achieve that.
2413 */
2414 if (p != NULL && !locked && ifp->if_poll_update != 0)
2415 ifp->if_poll_update = 0;
2416
2417 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2418 sample_holdtime = 0; /* polling is disabled */
2419 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2420 inp->rxpoll_blowat = 0;
2421 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2422 inp->rxpoll_bhiwat = (u_int32_t)-1;
2423 inp->rxpoll_plim = 0;
2424 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2425 } else {
2426 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2427 u_int64_t ival;
2428 unsigned int n, i;
2429
2430 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2431 if (inbw < rxpoll_tbl[i].speed)
2432 break;
2433 n = i;
2434 }
2435 /* auto-tune if caller didn't specify a value */
2436 plowat = ((p == NULL || p->packets_lowat == 0) ?
2437 rxpoll_tbl[n].plowat : p->packets_lowat);
2438 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2439 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2440 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2441 rxpoll_tbl[n].blowat : p->bytes_lowat);
2442 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2443 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2444 plim = ((p == NULL || p->packets_limit == 0) ?
2445 if_rxpoll_max : p->packets_limit);
2446 ival = ((p == NULL || p->interval_time == 0) ?
2447 if_rxpoll_interval_time : p->interval_time);
2448
2449 VERIFY(plowat != 0 && phiwat != 0);
2450 VERIFY(blowat != 0 && bhiwat != 0);
2451 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2452
2453 sample_holdtime = if_rxpoll_sample_holdtime;
2454 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2455 inp->rxpoll_whiwat = if_rxpoll_whiwat;
2456 inp->rxpoll_plowat = plowat;
2457 inp->rxpoll_phiwat = phiwat;
2458 inp->rxpoll_blowat = blowat;
2459 inp->rxpoll_bhiwat = bhiwat;
2460 inp->rxpoll_plim = plim;
2461 inp->rxpoll_ival = ival;
2462 }
2463
2464 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2465 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2466
2467 if (dlil_verbose) {
2468 printf("%s: speed %llu bps, sample per %llu nsec, "
2469 "poll interval %llu nsec, pkts per poll %u, "
2470 "pkt limits [%u/%u], wreq limits [%u/%u], "
2471 "bytes limits [%u/%u]\n", if_name(ifp),
2472 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2473 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2474 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
2475 }
2476
2477 if (!locked)
2478 lck_mtx_unlock(&inp->input_lck);
2479
2480 return (0);
2481 }
2482
2483 /*
2484 * Must be called on an attached ifnet (caller is expected to check.)
2485 */
2486 errno_t
2487 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2488 {
2489 struct dlil_threading_info *inp;
2490
2491 VERIFY(ifp != NULL && p != NULL);
2492 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2493 return (ENXIO);
2494
2495 bzero(p, sizeof (*p));
2496
2497 lck_mtx_lock(&inp->input_lck);
2498 p->packets_limit = inp->rxpoll_plim;
2499 p->packets_lowat = inp->rxpoll_plowat;
2500 p->packets_hiwat = inp->rxpoll_phiwat;
2501 p->bytes_lowat = inp->rxpoll_blowat;
2502 p->bytes_hiwat = inp->rxpoll_bhiwat;
2503 p->interval_time = inp->rxpoll_ival;
2504 lck_mtx_unlock(&inp->input_lck);
2505
2506 return (0);
2507 }
2508
2509 errno_t
2510 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2511 const struct ifnet_stat_increment_param *s)
2512 {
2513 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
2514 }
2515
2516 errno_t
2517 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2518 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2519 {
2520 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
2521 }
2522
2523 static errno_t
2524 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2525 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2526 {
2527 dlil_input_func input_func;
2528 struct ifnet_stat_increment_param _s;
2529 u_int32_t m_cnt = 0, m_size = 0;
2530 struct mbuf *last;
2531 errno_t err = 0;
2532
2533 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2534 if (m_head != NULL)
2535 mbuf_freem_list(m_head);
2536 return (EINVAL);
2537 }
2538
2539 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2540 VERIFY(m_tail == NULL || ext);
2541 VERIFY(s != NULL || !ext);
2542
2543 /*
2544 * Drop the packet(s) if the parameters are invalid, or if the
2545 * interface is no longer attached; else hold an IO refcnt to
2546 * prevent it from being detached (will be released below.)
2547 */
2548 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
2549 if (m_head != NULL)
2550 mbuf_freem_list(m_head);
2551 return (EINVAL);
2552 }
2553
2554 input_func = ifp->if_input_dlil;
2555 VERIFY(input_func != NULL);
2556
2557 if (m_tail == NULL) {
2558 last = m_head;
2559 while (m_head != NULL) {
2560 #if IFNET_INPUT_SANITY_CHK
2561 if (dlil_input_sanity_check != 0)
2562 DLIL_INPUT_CHECK(last, ifp);
2563 #endif /* IFNET_INPUT_SANITY_CHK */
2564 m_cnt++;
2565 m_size += m_length(last);
2566 if (mbuf_nextpkt(last) == NULL)
2567 break;
2568 last = mbuf_nextpkt(last);
2569 }
2570 m_tail = last;
2571 } else {
2572 #if IFNET_INPUT_SANITY_CHK
2573 if (dlil_input_sanity_check != 0) {
2574 last = m_head;
2575 while (1) {
2576 DLIL_INPUT_CHECK(last, ifp);
2577 m_cnt++;
2578 m_size += m_length(last);
2579 if (mbuf_nextpkt(last) == NULL)
2580 break;
2581 last = mbuf_nextpkt(last);
2582 }
2583 } else {
2584 m_cnt = s->packets_in;
2585 m_size = s->bytes_in;
2586 last = m_tail;
2587 }
2588 #else
2589 m_cnt = s->packets_in;
2590 m_size = s->bytes_in;
2591 last = m_tail;
2592 #endif /* IFNET_INPUT_SANITY_CHK */
2593 }
2594
2595 if (last != m_tail) {
2596 panic_plain("%s: invalid input packet chain for %s, "
2597 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2598 m_tail, last);
2599 }
2600
2601 /*
2602 * Assert packet count only for the extended variant, for backwards
2603 * compatibility, since this came directly from the device driver.
2604 * Relax this assertion for input bytes, as the driver may have
2605 * included the link-layer headers in the computation; hence
2606 * m_size is just an approximation.
2607 */
2608 if (ext && s->packets_in != m_cnt) {
2609 panic_plain("%s: input packet count mismatch for %s, "
2610 "%d instead of %d\n", __func__, if_name(ifp),
2611 s->packets_in, m_cnt);
2612 }
2613
2614 if (s == NULL) {
2615 bzero(&_s, sizeof (_s));
2616 s = &_s;
2617 } else {
2618 _s = *s;
2619 }
2620 _s.packets_in = m_cnt;
2621 _s.bytes_in = m_size;
2622
2623 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2624
2625 if (ifp != lo_ifp) {
2626 /* Release the IO refcnt */
2627 ifnet_decr_iorefcnt(ifp);
2628 }
2629
2630 return (err);
2631 }
2632
2633
2634 errno_t
2635 dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2636 {
2637 return (ifp->if_output(ifp, m));
2638 }
2639
2640 errno_t
2641 dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2642 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2643 boolean_t poll, struct thread *tp)
2644 {
2645 struct dlil_threading_info *inp;
2646 u_int32_t m_cnt = s->packets_in;
2647 u_int32_t m_size = s->bytes_in;
2648
2649 if ((inp = ifp->if_inp) == NULL)
2650 inp = dlil_main_input_thread;
2651
2652 /*
2653 * If there is a matching DLIL input thread associated with an
2654 * affinity set, associate this thread with the same set. We
2655 * will only do this once.
2656 */
2657 lck_mtx_lock_spin(&inp->input_lck);
2658 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
2659 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2660 (poll && inp->poll_thr == THREAD_NULL))) {
2661 u_int32_t tag = inp->tag;
2662
2663 if (poll) {
2664 VERIFY(inp->poll_thr == THREAD_NULL);
2665 inp->poll_thr = tp;
2666 } else {
2667 VERIFY(inp->wloop_thr == THREAD_NULL);
2668 inp->wloop_thr = tp;
2669 }
2670 lck_mtx_unlock(&inp->input_lck);
2671
2672 /* Associate the current thread with the new affinity tag */
2673 (void) dlil_affinity_set(tp, tag);
2674
2675 /*
2676 * Take a reference on the current thread; during detach,
2677 * we will need to refer to it in order to tear down its
2678 * affinity.
2679 */
2680 thread_reference(tp);
2681 lck_mtx_lock_spin(&inp->input_lck);
2682 }
2683
2684 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2685
2686 /*
2687 * Because of loopbacked multicast we cannot stuff the ifp in
2688 * the rcvif of the packet header: loopback (lo0) packets use a
2689 * dedicated list so that we can later associate them with lo_ifp
2690 * on their way up the stack. Packets for other interfaces without
2691 * dedicated input threads go to the regular list.
2692 */
2693 if (m_head != NULL) {
2694 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2695 struct dlil_main_threading_info *inpm =
2696 (struct dlil_main_threading_info *)inp;
2697 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2698 m_cnt, m_size);
2699 } else {
2700 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2701 m_cnt, m_size);
2702 }
2703 }
2704
2705 #if IFNET_INPUT_SANITY_CHK
2706 if (dlil_input_sanity_check != 0) {
2707 u_int32_t count;
2708 struct mbuf *m0;
2709
2710 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
2711 count++;
2712
2713 if (count != m_cnt) {
2714 panic_plain("%s: invalid packet count %d "
2715 "(expected %d)\n", if_name(ifp),
2716 count, m_cnt);
2717 /* NOTREACHED */
2718 }
2719
2720 inp->input_mbuf_cnt += m_cnt;
2721 }
2722 #endif /* IFNET_INPUT_SANITY_CHK */
2723
2724 dlil_input_stats_add(s, inp, poll);
2725 /*
2726 * If we're using the main input thread, synchronize the
2727 * stats now since we have the interface context. All
2728 * other cases involving dedicated input threads will
2729 * have their stats synchronized there.
2730 */
2731 if (inp == dlil_main_input_thread)
2732 dlil_input_stats_sync(ifp, inp);
2733
2734 if (inp->input_mit_tcall &&
2735 qlen(&inp->rcvq_pkts) >= dlil_rcv_mit_pkts_min &&
2736 qlen(&inp->rcvq_pkts) < dlil_rcv_mit_pkts_max &&
2737 (ifp->if_family == IFNET_FAMILY_ETHERNET ||
2738 ifp->if_type == IFT_CELLULAR)
2739 ) {
2740 if (!thread_call_isactive(inp->input_mit_tcall)) {
2741 uint64_t deadline;
2742 clock_interval_to_deadline(dlil_rcv_mit_interval,
2743 1, &deadline);
2744 (void) thread_call_enter_delayed(
2745 inp->input_mit_tcall, deadline);
2746 }
2747 } else {
2748 inp->input_waiting |= DLIL_INPUT_WAITING;
2749 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2750 inp->wtot++;
2751 wakeup_one((caddr_t)&inp->input_waiting);
2752 }
2753 }
2754 lck_mtx_unlock(&inp->input_lck);
2755
2756 return (0);
2757 }
2758
2759
2760 static void
2761 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
2762 {
2763 if (!(ifp->if_eflags & IFEF_TXSTART))
2764 return;
2765 /*
2766 * If the starter thread is inactive, signal it to do work,
2767 * unless the interface is being flow controlled from below,
2768 * e.g. a virtual interface being flow controlled by a real
2769 * network interface beneath it, or it's been disabled via
2770 * a call to ifnet_disable_output().
2771 */
2772 lck_mtx_lock_spin(&ifp->if_start_lock);
2773 if (resetfc) {
2774 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2775 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2776 lck_mtx_unlock(&ifp->if_start_lock);
2777 return;
2778 }
2779 ifp->if_start_req++;
2780 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2781 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2782 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
2783 ifp->if_start_delayed == 0)) {
2784 (void) thread_wakeup_thread((caddr_t)&ifp->if_start_thread,
2785 ifp->if_start_thread);
2786 }
2787 lck_mtx_unlock(&ifp->if_start_lock);
2788 }
2789
2790 void
2791 ifnet_start(struct ifnet *ifp)
2792 {
2793 ifnet_start_common(ifp, FALSE);
2794 }
2795
2796 static void
2797 ifnet_start_thread_fn(void *v, wait_result_t w)
2798 {
2799 #pragma unused(w)
2800 struct ifnet *ifp = v;
2801 char ifname[IFNAMSIZ + 1];
2802 char thread_name[MAXTHREADNAMESIZE];
2803 struct timespec *ts = NULL;
2804 struct ifclassq *ifq = &ifp->if_snd;
2805 struct timespec delay_start_ts;
2806
2807 /* Construct the name for this thread, and then apply it. */
2808 bzero(thread_name, sizeof(thread_name));
2809 (void) snprintf(thread_name, sizeof (thread_name),
2810 "ifnet_start_%s", ifp->if_xname);
2811 thread_set_thread_name(ifp->if_start_thread, thread_name);
2812
2813 /*
2814 * Treat the dedicated starter thread for lo0 as equivalent to
2815 * the driver workloop thread; if net_affinity is enabled for
2816 * the main input thread, associate this starter thread to it
2817 * by binding them with the same affinity tag. This is done
2818 * only once (as we only have one lo_ifp which never goes away.)
2819 */
2820 if (ifp == lo_ifp) {
2821 struct dlil_threading_info *inp = dlil_main_input_thread;
2822 struct thread *tp = current_thread();
2823
2824 lck_mtx_lock(&inp->input_lck);
2825 if (inp->net_affinity) {
2826 u_int32_t tag = inp->tag;
2827
2828 VERIFY(inp->wloop_thr == THREAD_NULL);
2829 VERIFY(inp->poll_thr == THREAD_NULL);
2830 inp->wloop_thr = tp;
2831 lck_mtx_unlock(&inp->input_lck);
2832
2833 /* Associate this thread with the affinity tag */
2834 (void) dlil_affinity_set(tp, tag);
2835 } else {
2836 lck_mtx_unlock(&inp->input_lck);
2837 }
2838 }
2839
2840 (void) snprintf(ifname, sizeof (ifname), "%s_starter", if_name(ifp));
2841
2842 lck_mtx_lock_spin(&ifp->if_start_lock);
2843
2844 for (;;) {
2845 if (ifp->if_start_thread != NULL) {
2846 (void) msleep(&ifp->if_start_thread,
2847 &ifp->if_start_lock,
2848 (PZERO - 1) | PSPIN, ifname, ts);
2849 }
2850 /* interface is detached? */
2851 if (ifp->if_start_thread == THREAD_NULL) {
2852 ifnet_set_start_cycle(ifp, NULL);
2853 lck_mtx_unlock(&ifp->if_start_lock);
2854 ifnet_purge(ifp);
2855
2856 if (dlil_verbose) {
2857 printf("%s: starter thread terminated\n",
2858 if_name(ifp));
2859 }
2860
2861 /* for the extra refcnt from kernel_thread_start() */
2862 thread_deallocate(current_thread());
2863 /* this is the end */
2864 thread_terminate(current_thread());
2865 /* NOTREACHED */
2866 return;
2867 }
2868
2869 ifp->if_start_active = 1;
2870
2871 for (;;) {
2872 u_int32_t req = ifp->if_start_req;
2873 if (!IFCQ_IS_EMPTY(ifq) &&
2874 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2875 ifp->if_start_delayed == 0 &&
2876 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2877 (ifp->if_eflags & IFEF_DELAY_START)) {
2878 ifp->if_start_delayed = 1;
2879 ifnet_start_delayed++;
2880 break;
2881 } else {
2882 ifp->if_start_delayed = 0;
2883 }
2884 lck_mtx_unlock(&ifp->if_start_lock);
2885
2886 /*
2887 * If no longer attached, don't call start because ifp
2888 * is being destroyed; else hold an IO refcnt to
2889 * prevent the interface from being detached (will be
2890 * released below.)
2891 */
2892 if (!ifnet_is_attached(ifp, 1)) {
2893 lck_mtx_lock_spin(&ifp->if_start_lock);
2894 break;
2895 }
2896
2897 /* invoke the driver's start routine */
2898 ((*ifp->if_start)(ifp));
2899
2900 /*
2901 * Release the io ref count taken by ifnet_is_attached.
2902 */
2903 ifnet_decr_iorefcnt(ifp);
2904
2905 lck_mtx_lock_spin(&ifp->if_start_lock);
2906
2907 /*
2908 * If there's no pending request or if the
2909 * interface has been disabled, we're done.
2910 */
2911 if (req == ifp->if_start_req ||
2912 (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
2913 break;
2914 }
2915 }
2916
2917 ifp->if_start_req = 0;
2918 ifp->if_start_active = 0;
2919
2920 /*
2921 * Wakeup N ns from now if rate-controlled by TBR, and if
2922 * there are still packets in the send queue which haven't
2923 * been dequeued so far; else sleep indefinitely (ts = NULL)
2924 * until ifnet_start() is called again.
2925 */
2926 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2927 &ifp->if_start_cycle : NULL);
2928
2929 if (ts == NULL && ifp->if_start_delayed == 1) {
2930 delay_start_ts.tv_sec = 0;
2931 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2932 ts = &delay_start_ts;
2933 }
2934
2935 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
2936 ts = NULL;
2937 }
2938
2939 /* NOTREACHED */
2940 }
2941
2942 void
2943 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2944 {
2945 if (ts == NULL)
2946 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
2947 else
2948 *(&ifp->if_start_cycle) = *ts;
2949
2950 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
2951 printf("%s: restart interval set to %lu nsec\n",
2952 if_name(ifp), ts->tv_nsec);
2953 }
2954
2955 static void
2956 ifnet_poll(struct ifnet *ifp)
2957 {
2958 /*
2959 * If the poller thread is inactive, signal it to do work.
2960 */
2961 lck_mtx_lock_spin(&ifp->if_poll_lock);
2962 ifp->if_poll_req++;
2963 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
2964 wakeup_one((caddr_t)&ifp->if_poll_thread);
2965 }
2966 lck_mtx_unlock(&ifp->if_poll_lock);
2967 }
2968
2969 static void
2970 ifnet_poll_thread_fn(void *v, wait_result_t w)
2971 {
2972 #pragma unused(w)
2973 struct dlil_threading_info *inp;
2974 struct ifnet *ifp = v;
2975 char ifname[IFNAMSIZ + 1];
2976 struct timespec *ts = NULL;
2977 struct ifnet_stat_increment_param s;
2978
2979 snprintf(ifname, sizeof (ifname), "%s_poller",
2980 if_name(ifp));
2981 bzero(&s, sizeof (s));
2982
2983 lck_mtx_lock_spin(&ifp->if_poll_lock);
2984
2985 inp = ifp->if_inp;
2986 VERIFY(inp != NULL);
2987
2988 for (;;) {
2989 if (ifp->if_poll_thread != THREAD_NULL) {
2990 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
2991 (PZERO - 1) | PSPIN, ifname, ts);
2992 }
2993
2994 /* interface is detached (maybe while asleep)? */
2995 if (ifp->if_poll_thread == THREAD_NULL) {
2996 ifnet_set_poll_cycle(ifp, NULL);
2997 lck_mtx_unlock(&ifp->if_poll_lock);
2998
2999 if (dlil_verbose) {
3000 printf("%s: poller thread terminated\n",
3001 if_name(ifp));
3002 }
3003
3004 /* for the extra refcnt from kernel_thread_start() */
3005 thread_deallocate(current_thread());
3006 /* this is the end */
3007 thread_terminate(current_thread());
3008 /* NOTREACHED */
3009 return;
3010 }
3011
3012 ifp->if_poll_active = 1;
3013 for (;;) {
3014 struct mbuf *m_head, *m_tail;
3015 u_int32_t m_lim, m_cnt, m_totlen;
3016 u_int16_t req = ifp->if_poll_req;
3017
3018 lck_mtx_unlock(&ifp->if_poll_lock);
3019
3020 /*
3021 * If no longer attached, there's nothing to do;
3022 * else hold an IO refcnt to prevent the interface
3023 * from being detached (will be released below.)
3024 */
3025 if (!ifnet_is_attached(ifp, 1)) {
3026 lck_mtx_lock_spin(&ifp->if_poll_lock);
3027 break;
3028 }
3029
3030 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
3031 MAX((qlimit(&inp->rcvq_pkts)),
3032 (inp->rxpoll_phiwat << 2));
3033
3034 if (dlil_verbose > 1) {
3035 printf("%s: polling up to %d pkts, "
3036 "pkts avg %d max %d, wreq avg %d, "
3037 "bytes avg %d\n",
3038 if_name(ifp), m_lim,
3039 inp->rxpoll_pavg, inp->rxpoll_pmax,
3040 inp->rxpoll_wavg, inp->rxpoll_bavg);
3041 }
3042
3043 /* invoke the driver's input poll routine */
3044 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3045 &m_cnt, &m_totlen));
3046
3047 if (m_head != NULL) {
3048 VERIFY(m_tail != NULL && m_cnt > 0);
3049
3050 if (dlil_verbose > 1) {
3051 printf("%s: polled %d pkts, "
3052 "pkts avg %d max %d, wreq avg %d, "
3053 "bytes avg %d\n",
3054 if_name(ifp), m_cnt,
3055 inp->rxpoll_pavg, inp->rxpoll_pmax,
3056 inp->rxpoll_wavg, inp->rxpoll_bavg);
3057 }
3058
3059 /* stats are required for extended variant */
3060 s.packets_in = m_cnt;
3061 s.bytes_in = m_totlen;
3062
3063 (void) ifnet_input_common(ifp, m_head, m_tail,
3064 &s, TRUE, TRUE);
3065 } else {
3066 if (dlil_verbose > 1) {
3067 printf("%s: no packets, "
3068 "pkts avg %d max %d, wreq avg %d, "
3069 "bytes avg %d\n",
3070 if_name(ifp), inp->rxpoll_pavg,
3071 inp->rxpoll_pmax, inp->rxpoll_wavg,
3072 inp->rxpoll_bavg);
3073 }
3074
3075 (void) ifnet_input_common(ifp, NULL, NULL,
3076 NULL, FALSE, TRUE);
3077 }
3078
3079 /* Release the io ref count */
3080 ifnet_decr_iorefcnt(ifp);
3081
3082 lck_mtx_lock_spin(&ifp->if_poll_lock);
3083
3084 /* if there's no pending request, we're done */
3085 if (req == ifp->if_poll_req) {
3086 break;
3087 }
3088 }
3089 ifp->if_poll_req = 0;
3090 ifp->if_poll_active = 0;
3091
3092 /*
3093 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3094 * until ifnet_poll() is called again.
3095 */
3096 ts = &ifp->if_poll_cycle;
3097 if (ts->tv_sec == 0 && ts->tv_nsec == 0)
3098 ts = NULL;
3099 }
3100
3101 /* NOTREACHED */
3102 }
3103
3104 void
3105 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3106 {
3107 if (ts == NULL)
3108 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
3109 else
3110 *(&ifp->if_poll_cycle) = *ts;
3111
3112 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
3113 printf("%s: poll interval set to %lu nsec\n",
3114 if_name(ifp), ts->tv_nsec);
3115 }
3116
3117 void
3118 ifnet_purge(struct ifnet *ifp)
3119 {
3120 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
3121 if_qflush(ifp, 0);
3122 }
3123
3124 void
3125 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3126 {
3127 IFCQ_LOCK_ASSERT_HELD(ifq);
3128
3129 if (!(IFCQ_IS_READY(ifq)))
3130 return;
3131
3132 if (IFCQ_TBR_IS_ENABLED(ifq)) {
3133 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
3134 ifq->ifcq_tbr.tbr_percent, 0 };
3135 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3136 }
3137
3138 ifclassq_update(ifq, ev);
3139 }
3140
3141 void
3142 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3143 {
3144 switch (ev) {
3145 case CLASSQ_EV_LINK_BANDWIDTH:
3146 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
3147 ifp->if_poll_update++;
3148 break;
3149
3150 default:
3151 break;
3152 }
3153 }
3154
3155 errno_t
3156 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3157 {
3158 struct ifclassq *ifq;
3159 u_int32_t omodel;
3160 errno_t err;
3161
3162 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX)
3163 return (EINVAL);
3164 else if (!(ifp->if_eflags & IFEF_TXSTART))
3165 return (ENXIO);
3166
3167 ifq = &ifp->if_snd;
3168 IFCQ_LOCK(ifq);
3169 omodel = ifp->if_output_sched_model;
3170 ifp->if_output_sched_model = model;
3171 if ((err = ifclassq_pktsched_setup(ifq)) != 0)
3172 ifp->if_output_sched_model = omodel;
3173 IFCQ_UNLOCK(ifq);
3174
3175 return (err);
3176 }
3177
3178 errno_t
3179 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3180 {
3181 if (ifp == NULL)
3182 return (EINVAL);
3183 else if (!(ifp->if_eflags & IFEF_TXSTART))
3184 return (ENXIO);
3185
3186 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3187
3188 return (0);
3189 }
3190
3191 errno_t
3192 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3193 {
3194 if (ifp == NULL || maxqlen == NULL)
3195 return (EINVAL);
3196 else if (!(ifp->if_eflags & IFEF_TXSTART))
3197 return (ENXIO);
3198
3199 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3200
3201 return (0);
3202 }
3203
3204 errno_t
3205 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
3206 {
3207 errno_t err;
3208
3209 if (ifp == NULL || pkts == NULL)
3210 err = EINVAL;
3211 else if (!(ifp->if_eflags & IFEF_TXSTART))
3212 err = ENXIO;
3213 else
3214 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3215 pkts, NULL);
3216
3217 return (err);
3218 }
3219
3220 errno_t
3221 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3222 u_int32_t *pkts, u_int32_t *bytes)
3223 {
3224 errno_t err;
3225
3226 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3227 (pkts == NULL && bytes == NULL))
3228 err = EINVAL;
3229 else if (!(ifp->if_eflags & IFEF_TXSTART))
3230 err = ENXIO;
3231 else
3232 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3233
3234 return (err);
3235 }
3236
3237 errno_t
3238 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3239 {
3240 struct dlil_threading_info *inp;
3241
3242 if (ifp == NULL)
3243 return (EINVAL);
3244 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3245 return (ENXIO);
3246
3247 if (maxqlen == 0)
3248 maxqlen = if_rcvq_maxlen;
3249 else if (maxqlen < IF_RCVQ_MINLEN)
3250 maxqlen = IF_RCVQ_MINLEN;
3251
3252 inp = ifp->if_inp;
3253 lck_mtx_lock(&inp->input_lck);
3254 qlimit(&inp->rcvq_pkts) = maxqlen;
3255 lck_mtx_unlock(&inp->input_lck);
3256
3257 return (0);
3258 }
3259
3260 errno_t
3261 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3262 {
3263 struct dlil_threading_info *inp;
3264
3265 if (ifp == NULL || maxqlen == NULL)
3266 return (EINVAL);
3267 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3268 return (ENXIO);
3269
3270 inp = ifp->if_inp;
3271 lck_mtx_lock(&inp->input_lck);
3272 *maxqlen = qlimit(&inp->rcvq_pkts);
3273 lck_mtx_unlock(&inp->input_lck);
3274 return (0);
3275 }
3276
3277 void
3278 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3279 uint16_t delay_timeout)
3280 {
3281 if (delay_qlen > 0 && delay_timeout > 0) {
3282 ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
3283 ifp->if_start_delay_qlen = min(100, delay_qlen);
3284 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3285 /* convert timeout to nanoseconds */
3286 ifp->if_start_delay_timeout *= 1000;
3287 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3288 ifp->if_xname, (uint32_t)delay_qlen,
3289 (uint32_t)delay_timeout);
3290 } else {
3291 ifp->if_eflags &= ~IFEF_ENQUEUE_MULTI;
3292 }
3293 }
3294
3295 static inline errno_t
3296 ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype,
3297 boolean_t flush, boolean_t *pdrop)
3298 {
3299 volatile uint64_t *fg_ts = NULL;
3300 volatile uint64_t *rt_ts = NULL;
3301 struct mbuf *m = p;
3302 struct timespec now;
3303 u_int64_t now_nsec = 0;
3304 int error = 0;
3305
3306 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3307
3308 /*
3309 * If packet already carries a timestamp, either from dlil_output()
3310 * or from flowswitch, use it here. Otherwise, record timestamp.
3311 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3312 * the timestamp value is used internally there.
3313 */
3314 switch (ptype) {
3315 case QP_MBUF:
3316 ASSERT(m->m_flags & M_PKTHDR);
3317 ASSERT(m->m_nextpkt == NULL);
3318
3319 if (!(m->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3320 m->m_pkthdr.pkt_timestamp == 0) {
3321 nanouptime(&now);
3322 net_timernsec(&now, &now_nsec);
3323 m->m_pkthdr.pkt_timestamp = now_nsec;
3324 }
3325 m->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3326 /*
3327 * If the packet service class is not background,
3328 * update the timestamp to indicate recent activity
3329 * on a foreground socket.
3330 */
3331 if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3332 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3333 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND)) {
3334 ifp->if_fg_sendts = _net_uptime;
3335 if (fg_ts != NULL)
3336 *fg_ts = _net_uptime;
3337 }
3338 if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3339 ifp->if_rt_sendts = _net_uptime;
3340 if (rt_ts != NULL)
3341 *rt_ts = _net_uptime;
3342 }
3343 }
3344 break;
3345
3346
3347 default:
3348 VERIFY(0);
3349 /* NOTREACHED */
3350 }
3351
3352 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3353 if (now_nsec == 0) {
3354 nanouptime(&now);
3355 net_timernsec(&now, &now_nsec);
3356 }
3357 /*
3358 * If the driver chose to delay start callback for
3359 * coalescing multiple packets, Then use the following
3360 * heuristics to make sure that start callback will
3361 * be delayed only when bulk data transfer is detected.
3362 * 1. number of packets enqueued in (delay_win * 2) is
3363 * greater than or equal to the delay qlen.
3364 * 2. If delay_start is enabled it will stay enabled for
3365 * another 10 idle windows. This is to take into account
3366 * variable RTT and burst traffic.
3367 * 3. If the time elapsed since last enqueue is more
3368 * than 200ms we disable delaying start callback. This is
3369 * is to take idle time into account.
3370 */
3371 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3372 if (ifp->if_start_delay_swin > 0) {
3373 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3374 ifp->if_start_delay_cnt++;
3375 } else if ((now_nsec - ifp->if_start_delay_swin)
3376 >= (200 * 1000 * 1000)) {
3377 ifp->if_start_delay_swin = now_nsec;
3378 ifp->if_start_delay_cnt = 1;
3379 ifp->if_start_delay_idle = 0;
3380 if (ifp->if_eflags & IFEF_DELAY_START) {
3381 ifp->if_eflags &=
3382 ~(IFEF_DELAY_START);
3383 ifnet_delay_start_disabled++;
3384 }
3385 } else {
3386 if (ifp->if_start_delay_cnt >=
3387 ifp->if_start_delay_qlen) {
3388 ifp->if_eflags |= IFEF_DELAY_START;
3389 ifp->if_start_delay_idle = 0;
3390 } else {
3391 if (ifp->if_start_delay_idle >= 10) {
3392 ifp->if_eflags &= ~(IFEF_DELAY_START);
3393 ifnet_delay_start_disabled++;
3394 } else {
3395 ifp->if_start_delay_idle++;
3396 }
3397 }
3398 ifp->if_start_delay_swin = now_nsec;
3399 ifp->if_start_delay_cnt = 1;
3400 }
3401 } else {
3402 ifp->if_start_delay_swin = now_nsec;
3403 ifp->if_start_delay_cnt = 1;
3404 ifp->if_start_delay_idle = 0;
3405 ifp->if_eflags &= ~(IFEF_DELAY_START);
3406 }
3407 } else {
3408 ifp->if_eflags &= ~(IFEF_DELAY_START);
3409 }
3410
3411 switch (ptype) {
3412 case QP_MBUF:
3413 /* enqueue the packet (caller consumes object) */
3414 error = ifclassq_enqueue(&ifp->if_snd, m, QP_MBUF, pdrop);
3415 m = NULL;
3416 break;
3417
3418
3419 default:
3420 break;
3421 }
3422
3423 /*
3424 * Tell the driver to start dequeueing; do this even when the queue
3425 * for the packet is suspended (EQSUSPENDED), as the driver could still
3426 * be dequeueing from other unsuspended queues.
3427 */
3428 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3429 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED))
3430 ifnet_start(ifp);
3431
3432 return (error);
3433 }
3434
3435 errno_t
3436 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3437 {
3438 boolean_t pdrop;
3439 return (ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop));
3440 }
3441
3442 errno_t
3443 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3444 boolean_t *pdrop)
3445 {
3446 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3447 m->m_nextpkt != NULL) {
3448 if (m != NULL) {
3449 m_freem_list(m);
3450 *pdrop = TRUE;
3451 }
3452 return (EINVAL);
3453 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3454 !IF_FULLY_ATTACHED(ifp)) {
3455 /* flag tested without lock for performance */
3456 m_freem(m);
3457 *pdrop = TRUE;
3458 return (ENXIO);
3459 } else if (!(ifp->if_flags & IFF_UP)) {
3460 m_freem(m);
3461 *pdrop = TRUE;
3462 return (ENETDOWN);
3463 }
3464
3465 return (ifnet_enqueue_common(ifp, m, QP_MBUF, flush, pdrop));
3466 }
3467
3468
3469 errno_t
3470 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3471 {
3472 errno_t rc;
3473 classq_pkt_type_t ptype;
3474 if (ifp == NULL || mp == NULL)
3475 return (EINVAL);
3476 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3477 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3478 return (ENXIO);
3479 if (!ifnet_is_attached(ifp, 1))
3480 return (ENXIO);
3481
3482 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3483 (void **)mp, NULL, NULL, NULL, &ptype);
3484 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
3485 ifnet_decr_iorefcnt(ifp);
3486
3487 return (rc);
3488 }
3489
3490 errno_t
3491 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3492 struct mbuf **mp)
3493 {
3494 errno_t rc;
3495 classq_pkt_type_t ptype;
3496 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
3497 return (EINVAL);
3498 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3499 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3500 return (ENXIO);
3501 if (!ifnet_is_attached(ifp, 1))
3502 return (ENXIO);
3503
3504 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
3505 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)mp, NULL, NULL,
3506 NULL, &ptype);
3507 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
3508 ifnet_decr_iorefcnt(ifp);
3509 return (rc);
3510 }
3511
3512 errno_t
3513 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3514 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3515 {
3516 errno_t rc;
3517 classq_pkt_type_t ptype;
3518 if (ifp == NULL || head == NULL || pkt_limit < 1)
3519 return (EINVAL);
3520 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3521 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3522 return (ENXIO);
3523 if (!ifnet_is_attached(ifp, 1))
3524 return (ENXIO);
3525
3526 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
3527 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head, (void **)tail, cnt,
3528 len, &ptype);
3529 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3530 ifnet_decr_iorefcnt(ifp);
3531 return (rc);
3532 }
3533
3534 errno_t
3535 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3536 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3537 {
3538 errno_t rc;
3539 classq_pkt_type_t ptype;
3540 if (ifp == NULL || head == NULL || byte_limit < 1)
3541 return (EINVAL);
3542 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3543 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3544 return (ENXIO);
3545 if (!ifnet_is_attached(ifp, 1))
3546 return (ENXIO);
3547
3548 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3549 byte_limit, (void **)head, (void **)tail, cnt, len, &ptype);
3550 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3551 ifnet_decr_iorefcnt(ifp);
3552 return (rc);
3553 }
3554
3555 errno_t
3556 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3557 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3558 u_int32_t *len)
3559 {
3560 errno_t rc;
3561 classq_pkt_type_t ptype;
3562 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3563 !MBUF_VALID_SC(sc))
3564 return (EINVAL);
3565 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3566 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3567 return (ENXIO);
3568 if (!ifnet_is_attached(ifp, 1))
3569 return (ENXIO);
3570
3571 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
3572 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head,
3573 (void **)tail, cnt, len, &ptype);
3574 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3575 ifnet_decr_iorefcnt(ifp);
3576 return (rc);
3577 }
3578
3579 #if !CONFIG_EMBEDDED
3580 errno_t
3581 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3582 const struct sockaddr *dest, const char *dest_linkaddr,
3583 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3584 {
3585 if (pre != NULL)
3586 *pre = 0;
3587 if (post != NULL)
3588 *post = 0;
3589
3590 return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
3591 }
3592 #endif /* !CONFIG_EMBEDDED */
3593
3594 static int
3595 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3596 char **frame_header_p, protocol_family_t protocol_family)
3597 {
3598 struct ifnet_filter *filter;
3599
3600 /*
3601 * Pass the inbound packet to the interface filters
3602 */
3603 lck_mtx_lock_spin(&ifp->if_flt_lock);
3604 /* prevent filter list from changing in case we drop the lock */
3605 if_flt_monitor_busy(ifp);
3606 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3607 int result;
3608
3609 if (!filter->filt_skip && filter->filt_input != NULL &&
3610 (filter->filt_protocol == 0 ||
3611 filter->filt_protocol == protocol_family)) {
3612 lck_mtx_unlock(&ifp->if_flt_lock);
3613
3614 result = (*filter->filt_input)(filter->filt_cookie,
3615 ifp, protocol_family, m_p, frame_header_p);
3616
3617 lck_mtx_lock_spin(&ifp->if_flt_lock);
3618 if (result != 0) {
3619 /* we're done with the filter list */
3620 if_flt_monitor_unbusy(ifp);
3621 lck_mtx_unlock(&ifp->if_flt_lock);
3622 return (result);
3623 }
3624 }
3625 }
3626 /* we're done with the filter list */
3627 if_flt_monitor_unbusy(ifp);
3628 lck_mtx_unlock(&ifp->if_flt_lock);
3629
3630 /*
3631 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3632 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3633 */
3634 if (*m_p != NULL)
3635 (*m_p)->m_flags &= ~M_PROTO1;
3636
3637 return (0);
3638 }
3639
3640 static int
3641 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3642 protocol_family_t protocol_family)
3643 {
3644 struct ifnet_filter *filter;
3645
3646 /*
3647 * Pass the outbound packet to the interface filters
3648 */
3649 lck_mtx_lock_spin(&ifp->if_flt_lock);
3650 /* prevent filter list from changing in case we drop the lock */
3651 if_flt_monitor_busy(ifp);
3652 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3653 int result;
3654
3655 if (!filter->filt_skip && filter->filt_output != NULL &&
3656 (filter->filt_protocol == 0 ||
3657 filter->filt_protocol == protocol_family)) {
3658 lck_mtx_unlock(&ifp->if_flt_lock);
3659
3660 result = filter->filt_output(filter->filt_cookie, ifp,
3661 protocol_family, m_p);
3662
3663 lck_mtx_lock_spin(&ifp->if_flt_lock);
3664 if (result != 0) {
3665 /* we're done with the filter list */
3666 if_flt_monitor_unbusy(ifp);
3667 lck_mtx_unlock(&ifp->if_flt_lock);
3668 return (result);
3669 }
3670 }
3671 }
3672 /* we're done with the filter list */
3673 if_flt_monitor_unbusy(ifp);
3674 lck_mtx_unlock(&ifp->if_flt_lock);
3675
3676 return (0);
3677 }
3678
3679 static void
3680 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
3681 {
3682 int error;
3683
3684 if (ifproto->proto_kpi == kProtoKPI_v1) {
3685 /* Version 1 protocols get one packet at a time */
3686 while (m != NULL) {
3687 char * frame_header;
3688 mbuf_t next_packet;
3689
3690 next_packet = m->m_nextpkt;
3691 m->m_nextpkt = NULL;
3692 frame_header = m->m_pkthdr.pkt_hdr;
3693 m->m_pkthdr.pkt_hdr = NULL;
3694 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3695 ifproto->protocol_family, m, frame_header);
3696 if (error != 0 && error != EJUSTRETURN)
3697 m_freem(m);
3698 m = next_packet;
3699 }
3700 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
3701 /* Version 2 protocols support packet lists */
3702 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
3703 ifproto->protocol_family, m);
3704 if (error != 0 && error != EJUSTRETURN)
3705 m_freem_list(m);
3706 }
3707 }
3708
3709 static void
3710 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3711 struct dlil_threading_info *inp, boolean_t poll)
3712 {
3713 struct ifnet_stat_increment_param *d = &inp->stats;
3714
3715 if (s->packets_in != 0)
3716 d->packets_in += s->packets_in;
3717 if (s->bytes_in != 0)
3718 d->bytes_in += s->bytes_in;
3719 if (s->errors_in != 0)
3720 d->errors_in += s->errors_in;
3721
3722 if (s->packets_out != 0)
3723 d->packets_out += s->packets_out;
3724 if (s->bytes_out != 0)
3725 d->bytes_out += s->bytes_out;
3726 if (s->errors_out != 0)
3727 d->errors_out += s->errors_out;
3728
3729 if (s->collisions != 0)
3730 d->collisions += s->collisions;
3731 if (s->dropped != 0)
3732 d->dropped += s->dropped;
3733
3734 if (poll)
3735 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
3736 }
3737
3738 static void
3739 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3740 {
3741 struct ifnet_stat_increment_param *s = &inp->stats;
3742
3743 /*
3744 * Use of atomic operations is unavoidable here because
3745 * these stats may also be incremented elsewhere via KPIs.
3746 */
3747 if (s->packets_in != 0) {
3748 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3749 s->packets_in = 0;
3750 }
3751 if (s->bytes_in != 0) {
3752 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3753 s->bytes_in = 0;
3754 }
3755 if (s->errors_in != 0) {
3756 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3757 s->errors_in = 0;
3758 }
3759
3760 if (s->packets_out != 0) {
3761 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3762 s->packets_out = 0;
3763 }
3764 if (s->bytes_out != 0) {
3765 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3766 s->bytes_out = 0;
3767 }
3768 if (s->errors_out != 0) {
3769 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3770 s->errors_out = 0;
3771 }
3772
3773 if (s->collisions != 0) {
3774 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3775 s->collisions = 0;
3776 }
3777 if (s->dropped != 0) {
3778 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3779 s->dropped = 0;
3780 }
3781
3782 if (ifp->if_data_threshold != 0) {
3783 lck_mtx_convert_spin(&inp->input_lck);
3784 ifnet_notify_data_threshold(ifp);
3785 }
3786
3787 /*
3788 * No need for atomic operations as they are modified here
3789 * only from within the DLIL input thread context.
3790 */
3791 if (inp->tstats.packets != 0) {
3792 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3793 inp->tstats.packets = 0;
3794 }
3795 if (inp->tstats.bytes != 0) {
3796 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3797 inp->tstats.bytes = 0;
3798 }
3799 }
3800
3801 __private_extern__ void
3802 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3803 {
3804 return (dlil_input_packet_list_common(ifp, m, 0,
3805 IFNET_MODEL_INPUT_POLL_OFF, FALSE));
3806 }
3807
3808 __private_extern__ void
3809 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3810 u_int32_t cnt, ifnet_model_t mode)
3811 {
3812 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
3813 }
3814
3815 static void
3816 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3817 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
3818 {
3819 int error = 0;
3820 protocol_family_t protocol_family;
3821 mbuf_t next_packet;
3822 ifnet_t ifp = ifp_param;
3823 char * frame_header;
3824 struct if_proto * last_ifproto = NULL;
3825 mbuf_t pkt_first = NULL;
3826 mbuf_t * pkt_next = NULL;
3827 u_int32_t poll_thresh = 0, poll_ival = 0;
3828
3829 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
3830
3831 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
3832 (poll_ival = if_rxpoll_interval_pkts) > 0)
3833 poll_thresh = cnt;
3834
3835 while (m != NULL) {
3836 struct if_proto *ifproto = NULL;
3837 int iorefcnt = 0;
3838 uint32_t pktf_mask; /* pkt flags to preserve */
3839
3840 if (ifp_param == NULL)
3841 ifp = m->m_pkthdr.rcvif;
3842
3843 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
3844 poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
3845 ifnet_poll(ifp);
3846
3847 /* Check if this mbuf looks valid */
3848 MBUF_INPUT_CHECK(m, ifp);
3849
3850 next_packet = m->m_nextpkt;
3851 m->m_nextpkt = NULL;
3852 frame_header = m->m_pkthdr.pkt_hdr;
3853 m->m_pkthdr.pkt_hdr = NULL;
3854
3855 /*
3856 * Get an IO reference count if the interface is not
3857 * loopback (lo0) and it is attached; lo0 never goes
3858 * away, so optimize for that.
3859 */
3860 if (ifp != lo_ifp) {
3861 if (!ifnet_is_attached(ifp, 1)) {
3862 m_freem(m);
3863 goto next;
3864 }
3865 iorefcnt = 1;
3866 /*
3867 * Preserve the time stamp if it was set.
3868 */
3869 pktf_mask = PKTF_TS_VALID;
3870 } else {
3871 /*
3872 * If this arrived on lo0, preserve interface addr
3873 * info to allow for connectivity between loopback
3874 * and local interface addresses.
3875 */
3876 pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
3877 }
3878
3879 /* make sure packet comes in clean */
3880 m_classifier_init(m, pktf_mask);
3881
3882 ifp_inc_traffic_class_in(ifp, m);
3883
3884 /* find which protocol family this packet is for */
3885 ifnet_lock_shared(ifp);
3886 error = (*ifp->if_demux)(ifp, m, frame_header,
3887 &protocol_family);
3888 ifnet_lock_done(ifp);
3889 if (error != 0) {
3890 if (error == EJUSTRETURN)
3891 goto next;
3892 protocol_family = 0;
3893 }
3894
3895 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
3896 !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
3897 dlil_input_cksum_dbg(ifp, m, frame_header,
3898 protocol_family);
3899
3900 /*
3901 * For partial checksum offload, we expect the driver to
3902 * set the start offset indicating the start of the span
3903 * that is covered by the hardware-computed checksum;
3904 * adjust this start offset accordingly because the data
3905 * pointer has been advanced beyond the link-layer header.
3906 *
3907 * Don't adjust if the interface is a bridge member, as
3908 * the adjustment will occur from the context of the
3909 * bridge interface during input.
3910 */
3911 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
3912 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
3913 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
3914 int adj;
3915
3916 if (frame_header == NULL ||
3917 frame_header < (char *)mbuf_datastart(m) ||
3918 frame_header > (char *)m->m_data ||
3919 (adj = (m->m_data - frame_header)) >
3920 m->m_pkthdr.csum_rx_start) {
3921 m->m_pkthdr.csum_data = 0;
3922 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
3923 hwcksum_in_invalidated++;
3924 } else {
3925 m->m_pkthdr.csum_rx_start -= adj;
3926 }
3927 }
3928
3929 pktap_input(ifp, protocol_family, m, frame_header);
3930
3931 if (m->m_flags & (M_BCAST|M_MCAST))
3932 atomic_add_64(&ifp->if_imcasts, 1);
3933
3934 /* run interface filters, exclude VLAN packets PR-3586856 */
3935 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
3936 error = dlil_interface_filters_input(ifp, &m,
3937 &frame_header, protocol_family);
3938 if (error != 0) {
3939 if (error != EJUSTRETURN)
3940 m_freem(m);
3941 goto next;
3942 }
3943 }
3944 if (error != 0 || ((m->m_flags & M_PROMISC) != 0)) {
3945 m_freem(m);
3946 goto next;
3947 }
3948
3949 /* Lookup the protocol attachment to this interface */
3950 if (protocol_family == 0) {
3951 ifproto = NULL;
3952 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
3953 (last_ifproto->protocol_family == protocol_family)) {
3954 VERIFY(ifproto == NULL);
3955 ifproto = last_ifproto;
3956 if_proto_ref(last_ifproto);
3957 } else {
3958 VERIFY(ifproto == NULL);
3959 ifnet_lock_shared(ifp);
3960 /* callee holds a proto refcnt upon success */
3961 ifproto = find_attached_proto(ifp, protocol_family);
3962 ifnet_lock_done(ifp);
3963 }
3964 if (ifproto == NULL) {
3965 /* no protocol for this packet, discard */
3966 m_freem(m);
3967 goto next;
3968 }
3969 if (ifproto != last_ifproto) {
3970 if (last_ifproto != NULL) {
3971 /* pass up the list for the previous protocol */
3972 dlil_ifproto_input(last_ifproto, pkt_first);
3973 pkt_first = NULL;
3974 if_proto_free(last_ifproto);
3975 }
3976 last_ifproto = ifproto;
3977 if_proto_ref(ifproto);
3978 }
3979 /* extend the list */
3980 m->m_pkthdr.pkt_hdr = frame_header;
3981 if (pkt_first == NULL) {
3982 pkt_first = m;
3983 } else {
3984 *pkt_next = m;
3985 }
3986 pkt_next = &m->m_nextpkt;
3987
3988 next:
3989 if (next_packet == NULL && last_ifproto != NULL) {
3990 /* pass up the last list of packets */
3991 dlil_ifproto_input(last_ifproto, pkt_first);
3992 if_proto_free(last_ifproto);
3993 last_ifproto = NULL;
3994 }
3995 if (ifproto != NULL) {
3996 if_proto_free(ifproto);
3997 ifproto = NULL;
3998 }
3999
4000 m = next_packet;
4001
4002 /* update the driver's multicast filter, if needed */
4003 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
4004 ifp->if_updatemcasts = 0;
4005 if (iorefcnt == 1)
4006 ifnet_decr_iorefcnt(ifp);
4007 }
4008
4009 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4010 }
4011
4012 errno_t
4013 if_mcasts_update(struct ifnet *ifp)
4014 {
4015 errno_t err;
4016
4017 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
4018 if (err == EAFNOSUPPORT)
4019 err = 0;
4020 printf("%s: %s %d suspended link-layer multicast membership(s) "
4021 "(err=%d)\n", if_name(ifp),
4022 (err == 0 ? "successfully restored" : "failed to restore"),
4023 ifp->if_updatemcasts, err);
4024
4025 /* just return success */
4026 return (0);
4027 }
4028
4029 /* If ifp is set, we will increment the generation for the interface */
4030 int
4031 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
4032 {
4033 if (ifp != NULL) {
4034 ifnet_increment_generation(ifp);
4035 }
4036
4037 #if NECP
4038 necp_update_all_clients();
4039 #endif /* NECP */
4040
4041 return (kev_post_msg(event));
4042 }
4043
4044 __private_extern__ void
4045 dlil_post_sifflags_msg(struct ifnet * ifp)
4046 {
4047 struct kev_msg ev_msg;
4048 struct net_event_data ev_data;
4049
4050 bzero(&ev_data, sizeof (ev_data));
4051 bzero(&ev_msg, sizeof (ev_msg));
4052 ev_msg.vendor_code = KEV_VENDOR_APPLE;
4053 ev_msg.kev_class = KEV_NETWORK_CLASS;
4054 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
4055 ev_msg.event_code = KEV_DL_SIFFLAGS;
4056 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
4057 ev_data.if_family = ifp->if_family;
4058 ev_data.if_unit = (u_int32_t) ifp->if_unit;
4059 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
4060 ev_msg.dv[0].data_ptr = &ev_data;
4061 ev_msg.dv[1].data_length = 0;
4062 dlil_post_complete_msg(ifp, &ev_msg);
4063 }
4064
4065 #define TMP_IF_PROTO_ARR_SIZE 10
4066 static int
4067 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
4068 {
4069 struct ifnet_filter *filter = NULL;
4070 struct if_proto *proto = NULL;
4071 int if_proto_count = 0;
4072 struct if_proto **tmp_ifproto_arr = NULL;
4073 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4074 int tmp_ifproto_arr_idx = 0;
4075 bool tmp_malloc = false;
4076
4077 /*
4078 * Pass the event to the interface filters
4079 */
4080 lck_mtx_lock_spin(&ifp->if_flt_lock);
4081 /* prevent filter list from changing in case we drop the lock */
4082 if_flt_monitor_busy(ifp);
4083 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4084 if (filter->filt_event != NULL) {
4085 lck_mtx_unlock(&ifp->if_flt_lock);
4086
4087 filter->filt_event(filter->filt_cookie, ifp,
4088 filter->filt_protocol, event);
4089
4090 lck_mtx_lock_spin(&ifp->if_flt_lock);
4091 }
4092 }
4093 /* we're done with the filter list */
4094 if_flt_monitor_unbusy(ifp);
4095 lck_mtx_unlock(&ifp->if_flt_lock);
4096
4097 /* Get an io ref count if the interface is attached */
4098 if (!ifnet_is_attached(ifp, 1))
4099 goto done;
4100
4101 /*
4102 * An embedded tmp_list_entry in if_proto may still get
4103 * over-written by another thread after giving up ifnet lock,
4104 * therefore we are avoiding embedded pointers here.
4105 */
4106 ifnet_lock_shared(ifp);
4107 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
4108 if (if_proto_count) {
4109 int i;
4110 VERIFY(ifp->if_proto_hash != NULL);
4111 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
4112 tmp_ifproto_arr = tmp_ifproto_stack_arr;
4113 } else {
4114 MALLOC(tmp_ifproto_arr, struct if_proto **,
4115 sizeof (*tmp_ifproto_arr) * if_proto_count,
4116 M_TEMP, M_ZERO);
4117 if (tmp_ifproto_arr == NULL) {
4118 ifnet_lock_done(ifp);
4119 goto cleanup;
4120 }
4121 tmp_malloc = true;
4122 }
4123
4124 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
4125 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
4126 next_hash) {
4127 if_proto_ref(proto);
4128 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
4129 tmp_ifproto_arr_idx++;
4130 }
4131 }
4132 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
4133 }
4134 ifnet_lock_done(ifp);
4135
4136 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
4137 tmp_ifproto_arr_idx++) {
4138 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
4139 VERIFY(proto != NULL);
4140 proto_media_event eventp =
4141 (proto->proto_kpi == kProtoKPI_v1 ?
4142 proto->kpi.v1.event :
4143 proto->kpi.v2.event);
4144
4145 if (eventp != NULL) {
4146 eventp(ifp, proto->protocol_family,
4147 event);
4148 }
4149 if_proto_free(proto);
4150 }
4151
4152 cleanup:
4153 if (tmp_malloc) {
4154 FREE(tmp_ifproto_arr, M_TEMP);
4155 }
4156
4157 /* Pass the event to the interface */
4158 if (ifp->if_event != NULL)
4159 ifp->if_event(ifp, event);
4160
4161 /* Release the io ref count */
4162 ifnet_decr_iorefcnt(ifp);
4163 done:
4164 return (dlil_post_complete_msg(update_generation ? ifp : NULL, event));
4165 }
4166
4167 errno_t
4168 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
4169 {
4170 struct kev_msg kev_msg;
4171 int result = 0;
4172
4173 if (ifp == NULL || event == NULL)
4174 return (EINVAL);
4175
4176 bzero(&kev_msg, sizeof (kev_msg));
4177 kev_msg.vendor_code = event->vendor_code;
4178 kev_msg.kev_class = event->kev_class;
4179 kev_msg.kev_subclass = event->kev_subclass;
4180 kev_msg.event_code = event->event_code;
4181 kev_msg.dv[0].data_ptr = &event->event_data[0];
4182 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
4183 kev_msg.dv[1].data_length = 0;
4184
4185 result = dlil_event_internal(ifp, &kev_msg, TRUE);
4186
4187 return (result);
4188 }
4189
4190 #if CONFIG_MACF_NET
4191 #include <netinet/ip6.h>
4192 #include <netinet/ip.h>
4193 static int
4194 dlil_get_socket_type(struct mbuf **mp, int family, int raw)
4195 {
4196 struct mbuf *m;
4197 struct ip *ip;
4198 struct ip6_hdr *ip6;
4199 int type = SOCK_RAW;
4200
4201 if (!raw) {
4202 switch (family) {
4203 case PF_INET:
4204 m = m_pullup(*mp, sizeof(struct ip));
4205 if (m == NULL)
4206 break;
4207 *mp = m;
4208 ip = mtod(m, struct ip *);
4209 if (ip->ip_p == IPPROTO_TCP)
4210 type = SOCK_STREAM;
4211 else if (ip->ip_p == IPPROTO_UDP)
4212 type = SOCK_DGRAM;
4213 break;
4214 case PF_INET6:
4215 m = m_pullup(*mp, sizeof(struct ip6_hdr));
4216 if (m == NULL)
4217 break;
4218 *mp = m;
4219 ip6 = mtod(m, struct ip6_hdr *);
4220 if (ip6->ip6_nxt == IPPROTO_TCP)
4221 type = SOCK_STREAM;
4222 else if (ip6->ip6_nxt == IPPROTO_UDP)
4223 type = SOCK_DGRAM;
4224 break;
4225 }
4226 }
4227
4228 return (type);
4229 }
4230 #endif
4231
4232 static void
4233 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4234 {
4235 mbuf_t n = m;
4236 int chainlen = 0;
4237
4238 while (n != NULL) {
4239 chainlen++;
4240 n = n->m_next;
4241 }
4242 switch (chainlen) {
4243 case 0:
4244 break;
4245 case 1:
4246 atomic_add_64(&cls->cls_one, 1);
4247 break;
4248 case 2:
4249 atomic_add_64(&cls->cls_two, 1);
4250 break;
4251 case 3:
4252 atomic_add_64(&cls->cls_three, 1);
4253 break;
4254 case 4:
4255 atomic_add_64(&cls->cls_four, 1);
4256 break;
4257 case 5:
4258 default:
4259 atomic_add_64(&cls->cls_five_or_more, 1);
4260 break;
4261 }
4262 }
4263
4264 /*
4265 * dlil_output
4266 *
4267 * Caller should have a lock on the protocol domain if the protocol
4268 * doesn't support finer grained locking. In most cases, the lock
4269 * will be held from the socket layer and won't be released until
4270 * we return back to the socket layer.
4271 *
4272 * This does mean that we must take a protocol lock before we take
4273 * an interface lock if we're going to take both. This makes sense
4274 * because a protocol is likely to interact with an ifp while it
4275 * is under the protocol lock.
4276 *
4277 * An advisory code will be returned if adv is not null. This
4278 * can be used to provide feedback about interface queues to the
4279 * application.
4280 */
4281 errno_t
4282 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
4283 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
4284 {
4285 char *frame_type = NULL;
4286 char *dst_linkaddr = NULL;
4287 int retval = 0;
4288 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4289 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
4290 struct if_proto *proto = NULL;
4291 mbuf_t m;
4292 mbuf_t send_head = NULL;
4293 mbuf_t *send_tail = &send_head;
4294 int iorefcnt = 0;
4295 u_int32_t pre = 0, post = 0;
4296 u_int32_t fpkts = 0, fbytes = 0;
4297 int32_t flen = 0;
4298 struct timespec now;
4299 u_int64_t now_nsec;
4300
4301 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4302
4303 /*
4304 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4305 * from happening while this operation is in progress
4306 */
4307 if (!ifnet_is_attached(ifp, 1)) {
4308 retval = ENXIO;
4309 goto cleanup;
4310 }
4311 iorefcnt = 1;
4312
4313 VERIFY(ifp->if_output_dlil != NULL);
4314
4315 /* update the driver's multicast filter, if needed */
4316 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
4317 ifp->if_updatemcasts = 0;
4318
4319 frame_type = frame_type_buffer;
4320 dst_linkaddr = dst_linkaddr_buffer;
4321
4322 if (raw == 0) {
4323 ifnet_lock_shared(ifp);
4324 /* callee holds a proto refcnt upon success */
4325 proto = find_attached_proto(ifp, proto_family);
4326 if (proto == NULL) {
4327 ifnet_lock_done(ifp);
4328 retval = ENXIO;
4329 goto cleanup;
4330 }
4331 ifnet_lock_done(ifp);
4332 }
4333
4334 preout_again:
4335 if (packetlist == NULL)
4336 goto cleanup;
4337
4338 m = packetlist;
4339 packetlist = packetlist->m_nextpkt;
4340 m->m_nextpkt = NULL;
4341
4342 if (raw == 0) {
4343 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
4344 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
4345 retval = 0;
4346 if (preoutp != NULL) {
4347 retval = preoutp(ifp, proto_family, &m, dest, route,
4348 frame_type, dst_linkaddr);
4349
4350 if (retval != 0) {
4351 if (retval == EJUSTRETURN)
4352 goto preout_again;
4353 m_freem(m);
4354 goto cleanup;
4355 }
4356 }
4357 }
4358
4359 #if CONFIG_MACF_NET
4360 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
4361 dlil_get_socket_type(&m, proto_family, raw));
4362 if (retval != 0) {
4363 m_freem(m);
4364 goto cleanup;
4365 }
4366 #endif
4367
4368 do {
4369 #if CONFIG_DTRACE
4370 if (!raw && proto_family == PF_INET) {
4371 struct ip *ip = mtod(m, struct ip *);
4372 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4373 struct ip *, ip, struct ifnet *, ifp,
4374 struct ip *, ip, struct ip6_hdr *, NULL);
4375
4376 } else if (!raw && proto_family == PF_INET6) {
4377 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
4378 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4379 struct ip6_hdr *, ip6, struct ifnet *, ifp,
4380 struct ip *, NULL, struct ip6_hdr *, ip6);
4381 }
4382 #endif /* CONFIG_DTRACE */
4383
4384 if (raw == 0 && ifp->if_framer != NULL) {
4385 int rcvif_set = 0;
4386
4387 /*
4388 * If this is a broadcast packet that needs to be
4389 * looped back into the system, set the inbound ifp
4390 * to that of the outbound ifp. This will allow
4391 * us to determine that it is a legitimate packet
4392 * for the system. Only set the ifp if it's not
4393 * already set, just to be safe.
4394 */
4395 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
4396 m->m_pkthdr.rcvif == NULL) {
4397 m->m_pkthdr.rcvif = ifp;
4398 rcvif_set = 1;
4399 }
4400
4401 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
4402 frame_type, &pre, &post);
4403 if (retval != 0) {
4404 if (retval != EJUSTRETURN)
4405 m_freem(m);
4406 goto next;
4407 }
4408
4409 /*
4410 * For partial checksum offload, adjust the start
4411 * and stuff offsets based on the prepended header.
4412 */
4413 if ((m->m_pkthdr.csum_flags &
4414 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4415 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4416 m->m_pkthdr.csum_tx_stuff += pre;
4417 m->m_pkthdr.csum_tx_start += pre;
4418 }
4419
4420 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
4421 dlil_output_cksum_dbg(ifp, m, pre,
4422 proto_family);
4423
4424 /*
4425 * Clear the ifp if it was set above, and to be
4426 * safe, only if it is still the same as the
4427 * outbound ifp we have in context. If it was
4428 * looped back, then a copy of it was sent to the
4429 * loopback interface with the rcvif set, and we
4430 * are clearing the one that will go down to the
4431 * layer below.
4432 */
4433 if (rcvif_set && m->m_pkthdr.rcvif == ifp)
4434 m->m_pkthdr.rcvif = NULL;
4435 }
4436
4437 /*
4438 * Let interface filters (if any) do their thing ...
4439 */
4440 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4441 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
4442 retval = dlil_interface_filters_output(ifp,
4443 &m, proto_family);
4444 if (retval != 0) {
4445 if (retval != EJUSTRETURN)
4446 m_freem(m);
4447 goto next;
4448 }
4449 }
4450 /*
4451 * Strip away M_PROTO1 bit prior to sending packet
4452 * to the driver as this field may be used by the driver
4453 */
4454 m->m_flags &= ~M_PROTO1;
4455
4456 /*
4457 * If the underlying interface is not capable of handling a
4458 * packet whose data portion spans across physically disjoint
4459 * pages, we need to "normalize" the packet so that we pass
4460 * down a chain of mbufs where each mbuf points to a span that
4461 * resides in the system page boundary. If the packet does
4462 * not cross page(s), the following is a no-op.
4463 */
4464 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
4465 if ((m = m_normalize(m)) == NULL)
4466 goto next;
4467 }
4468
4469 /*
4470 * If this is a TSO packet, make sure the interface still
4471 * advertise TSO capability.
4472 */
4473 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
4474 retval = EMSGSIZE;
4475 m_freem(m);
4476 goto cleanup;
4477 }
4478
4479 ifp_inc_traffic_class_out(ifp, m);
4480 pktap_output(ifp, proto_family, m, pre, post);
4481
4482 /*
4483 * Count the number of elements in the mbuf chain
4484 */
4485 if (tx_chain_len_count) {
4486 dlil_count_chain_len(m, &tx_chain_len_stats);
4487 }
4488
4489 /*
4490 * Record timestamp; ifnet_enqueue() will use this info
4491 * rather than redoing the work. An optimization could
4492 * involve doing this just once at the top, if there are
4493 * no interface filters attached, but that's probably
4494 * not a big deal.
4495 */
4496 nanouptime(&now);
4497 net_timernsec(&now, &now_nsec);
4498 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
4499
4500 /*
4501 * Discard partial sum information if this packet originated
4502 * from another interface; the packet would already have the
4503 * final checksum and we shouldn't recompute it.
4504 */
4505 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
4506 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID|CSUM_PARTIAL)) ==
4507 (CSUM_DATA_VALID|CSUM_PARTIAL)) {
4508 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4509 m->m_pkthdr.csum_data = 0;
4510 }
4511
4512 /*
4513 * Finally, call the driver.
4514 */
4515 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
4516 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4517 flen += (m_pktlen(m) - (pre + post));
4518 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4519 }
4520 *send_tail = m;
4521 send_tail = &m->m_nextpkt;
4522 } else {
4523 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4524 flen = (m_pktlen(m) - (pre + post));
4525 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4526 } else {
4527 flen = 0;
4528 }
4529 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4530 0, 0, 0, 0, 0);
4531 retval = (*ifp->if_output_dlil)(ifp, m);
4532 if (retval == EQFULL || retval == EQSUSPENDED) {
4533 if (adv != NULL && adv->code == FADV_SUCCESS) {
4534 adv->code = (retval == EQFULL ?
4535 FADV_FLOW_CONTROLLED :
4536 FADV_SUSPENDED);
4537 }
4538 retval = 0;
4539 }
4540 if (retval == 0 && flen > 0) {
4541 fbytes += flen;
4542 fpkts++;
4543 }
4544 if (retval != 0 && dlil_verbose) {
4545 printf("%s: output error on %s retval = %d\n",
4546 __func__, if_name(ifp),
4547 retval);
4548 }
4549 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
4550 0, 0, 0, 0, 0);
4551 }
4552 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4553
4554 next:
4555 m = packetlist;
4556 if (m != NULL) {
4557 packetlist = packetlist->m_nextpkt;
4558 m->m_nextpkt = NULL;
4559 }
4560 } while (m != NULL);
4561
4562 if (send_head != NULL) {
4563 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4564 0, 0, 0, 0, 0);
4565 if (ifp->if_eflags & IFEF_SENDLIST) {
4566 retval = (*ifp->if_output_dlil)(ifp, send_head);
4567 if (retval == EQFULL || retval == EQSUSPENDED) {
4568 if (adv != NULL) {
4569 adv->code = (retval == EQFULL ?
4570 FADV_FLOW_CONTROLLED :
4571 FADV_SUSPENDED);
4572 }
4573 retval = 0;
4574 }
4575 if (retval == 0 && flen > 0) {
4576 fbytes += flen;
4577 fpkts++;
4578 }
4579 if (retval != 0 && dlil_verbose) {
4580 printf("%s: output error on %s retval = %d\n",
4581 __func__, if_name(ifp), retval);
4582 }
4583 } else {
4584 struct mbuf *send_m;
4585 int enq_cnt = 0;
4586 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
4587 while (send_head != NULL) {
4588 send_m = send_head;
4589 send_head = send_m->m_nextpkt;
4590 send_m->m_nextpkt = NULL;
4591 retval = (*ifp->if_output_dlil)(ifp, send_m);
4592 if (retval == EQFULL || retval == EQSUSPENDED) {
4593 if (adv != NULL) {
4594 adv->code = (retval == EQFULL ?
4595 FADV_FLOW_CONTROLLED :
4596 FADV_SUSPENDED);
4597 }
4598 retval = 0;
4599 }
4600 if (retval == 0) {
4601 enq_cnt++;
4602 if (flen > 0)
4603 fpkts++;
4604 }
4605 if (retval != 0 && dlil_verbose) {
4606 printf("%s: output error on %s "
4607 "retval = %d\n",
4608 __func__, if_name(ifp), retval);
4609 }
4610 }
4611 if (enq_cnt > 0) {
4612 fbytes += flen;
4613 ifnet_start(ifp);
4614 }
4615 }
4616 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4617 }
4618
4619 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4620
4621 cleanup:
4622 if (fbytes > 0)
4623 ifp->if_fbytes += fbytes;
4624 if (fpkts > 0)
4625 ifp->if_fpackets += fpkts;
4626 if (proto != NULL)
4627 if_proto_free(proto);
4628 if (packetlist) /* if any packets are left, clean up */
4629 mbuf_freem_list(packetlist);
4630 if (retval == EJUSTRETURN)
4631 retval = 0;
4632 if (iorefcnt == 1)
4633 ifnet_decr_iorefcnt(ifp);
4634
4635 return (retval);
4636 }
4637
4638 errno_t
4639 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
4640 void *ioctl_arg)
4641 {
4642 struct ifnet_filter *filter;
4643 int retval = EOPNOTSUPP;
4644 int result = 0;
4645
4646 if (ifp == NULL || ioctl_code == 0)
4647 return (EINVAL);
4648
4649 /* Get an io ref count if the interface is attached */
4650 if (!ifnet_is_attached(ifp, 1))
4651 return (EOPNOTSUPP);
4652
4653 /*
4654 * Run the interface filters first.
4655 * We want to run all filters before calling the protocol,
4656 * interface family, or interface.
4657 */
4658 lck_mtx_lock_spin(&ifp->if_flt_lock);
4659 /* prevent filter list from changing in case we drop the lock */
4660 if_flt_monitor_busy(ifp);
4661 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4662 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
4663 filter->filt_protocol == proto_fam)) {
4664 lck_mtx_unlock(&ifp->if_flt_lock);
4665
4666 result = filter->filt_ioctl(filter->filt_cookie, ifp,
4667 proto_fam, ioctl_code, ioctl_arg);
4668
4669 lck_mtx_lock_spin(&ifp->if_flt_lock);
4670
4671 /* Only update retval if no one has handled the ioctl */
4672 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4673 if (result == ENOTSUP)
4674 result = EOPNOTSUPP;
4675 retval = result;
4676 if (retval != 0 && retval != EOPNOTSUPP) {
4677 /* we're done with the filter list */
4678 if_flt_monitor_unbusy(ifp);
4679 lck_mtx_unlock(&ifp->if_flt_lock);
4680 goto cleanup;
4681 }
4682 }
4683 }
4684 }
4685 /* we're done with the filter list */
4686 if_flt_monitor_unbusy(ifp);
4687 lck_mtx_unlock(&ifp->if_flt_lock);
4688
4689 /* Allow the protocol to handle the ioctl */
4690 if (proto_fam != 0) {
4691 struct if_proto *proto;
4692
4693 /* callee holds a proto refcnt upon success */
4694 ifnet_lock_shared(ifp);
4695 proto = find_attached_proto(ifp, proto_fam);
4696 ifnet_lock_done(ifp);
4697 if (proto != NULL) {
4698 proto_media_ioctl ioctlp =
4699 (proto->proto_kpi == kProtoKPI_v1 ?
4700 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
4701 result = EOPNOTSUPP;
4702 if (ioctlp != NULL)
4703 result = ioctlp(ifp, proto_fam, ioctl_code,
4704 ioctl_arg);
4705 if_proto_free(proto);
4706
4707 /* Only update retval if no one has handled the ioctl */
4708 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4709 if (result == ENOTSUP)
4710 result = EOPNOTSUPP;
4711 retval = result;
4712 if (retval && retval != EOPNOTSUPP)
4713 goto cleanup;
4714 }
4715 }
4716 }
4717
4718 /* retval is either 0 or EOPNOTSUPP */
4719
4720 /*
4721 * Let the interface handle this ioctl.
4722 * If it returns EOPNOTSUPP, ignore that, we may have
4723 * already handled this in the protocol or family.
4724 */
4725 if (ifp->if_ioctl)
4726 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
4727
4728 /* Only update retval if no one has handled the ioctl */
4729 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4730 if (result == ENOTSUP)
4731 result = EOPNOTSUPP;
4732 retval = result;
4733 if (retval && retval != EOPNOTSUPP) {
4734 goto cleanup;
4735 }
4736 }
4737
4738 cleanup:
4739 if (retval == EJUSTRETURN)
4740 retval = 0;
4741
4742 ifnet_decr_iorefcnt(ifp);
4743
4744 return (retval);
4745 }
4746
4747 __private_extern__ errno_t
4748 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
4749 {
4750 errno_t error = 0;
4751
4752
4753 if (ifp->if_set_bpf_tap) {
4754 /* Get an io reference on the interface if it is attached */
4755 if (!ifnet_is_attached(ifp, 1))
4756 return (ENXIO);
4757 error = ifp->if_set_bpf_tap(ifp, mode, callback);
4758 ifnet_decr_iorefcnt(ifp);
4759 }
4760 return (error);
4761 }
4762
4763 errno_t
4764 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4765 struct sockaddr *ll_addr, size_t ll_len)
4766 {
4767 errno_t result = EOPNOTSUPP;
4768 struct if_proto *proto;
4769 const struct sockaddr *verify;
4770 proto_media_resolve_multi resolvep;
4771
4772 if (!ifnet_is_attached(ifp, 1))
4773 return (result);
4774
4775 bzero(ll_addr, ll_len);
4776
4777 /* Call the protocol first; callee holds a proto refcnt upon success */
4778 ifnet_lock_shared(ifp);
4779 proto = find_attached_proto(ifp, proto_addr->sa_family);
4780 ifnet_lock_done(ifp);
4781 if (proto != NULL) {
4782 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4783 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4784 if (resolvep != NULL)
4785 result = resolvep(ifp, proto_addr,
4786 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
4787 if_proto_free(proto);
4788 }
4789
4790 /* Let the interface verify the multicast address */
4791 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4792 if (result == 0)
4793 verify = ll_addr;
4794 else
4795 verify = proto_addr;
4796 result = ifp->if_check_multi(ifp, verify);
4797 }
4798
4799 ifnet_decr_iorefcnt(ifp);
4800 return (result);
4801 }
4802
4803 __private_extern__ errno_t
4804 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4805 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4806 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4807 {
4808 struct if_proto *proto;
4809 errno_t result = 0;
4810
4811 /* callee holds a proto refcnt upon success */
4812 ifnet_lock_shared(ifp);
4813 proto = find_attached_proto(ifp, target_proto->sa_family);
4814 ifnet_lock_done(ifp);
4815 if (proto == NULL) {
4816 result = ENOTSUP;
4817 } else {
4818 proto_media_send_arp arpp;
4819 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4820 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4821 if (arpp == NULL) {
4822 result = ENOTSUP;
4823 } else {
4824 switch (arpop) {
4825 case ARPOP_REQUEST:
4826 arpstat.txrequests++;
4827 if (target_hw != NULL)
4828 arpstat.txurequests++;
4829 break;
4830 case ARPOP_REPLY:
4831 arpstat.txreplies++;
4832 break;
4833 }
4834 result = arpp(ifp, arpop, sender_hw, sender_proto,
4835 target_hw, target_proto);
4836 }
4837 if_proto_free(proto);
4838 }
4839
4840 return (result);
4841 }
4842
4843 struct net_thread_marks { };
4844 static const struct net_thread_marks net_thread_marks_base = { };
4845
4846 __private_extern__ const net_thread_marks_t net_thread_marks_none =
4847 &net_thread_marks_base;
4848
4849 __private_extern__ net_thread_marks_t
4850 net_thread_marks_push(u_int32_t push)
4851 {
4852 static const char *const base = (const void*)&net_thread_marks_base;
4853 u_int32_t pop = 0;
4854
4855 if (push != 0) {
4856 struct uthread *uth = get_bsdthread_info(current_thread());
4857
4858 pop = push & ~uth->uu_network_marks;
4859 if (pop != 0)
4860 uth->uu_network_marks |= pop;
4861 }
4862
4863 return ((net_thread_marks_t)&base[pop]);
4864 }
4865
4866 __private_extern__ net_thread_marks_t
4867 net_thread_unmarks_push(u_int32_t unpush)
4868 {
4869 static const char *const base = (const void*)&net_thread_marks_base;
4870 u_int32_t unpop = 0;
4871
4872 if (unpush != 0) {
4873 struct uthread *uth = get_bsdthread_info(current_thread());
4874
4875 unpop = unpush & uth->uu_network_marks;
4876 if (unpop != 0)
4877 uth->uu_network_marks &= ~unpop;
4878 }
4879
4880 return ((net_thread_marks_t)&base[unpop]);
4881 }
4882
4883 __private_extern__ void
4884 net_thread_marks_pop(net_thread_marks_t popx)
4885 {
4886 static const char *const base = (const void*)&net_thread_marks_base;
4887 const ptrdiff_t pop = (const char *)popx - (const char *)base;
4888
4889 if (pop != 0) {
4890 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4891 struct uthread *uth = get_bsdthread_info(current_thread());
4892
4893 VERIFY((pop & ones) == pop);
4894 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
4895 uth->uu_network_marks &= ~pop;
4896 }
4897 }
4898
4899 __private_extern__ void
4900 net_thread_unmarks_pop(net_thread_marks_t unpopx)
4901 {
4902 static const char *const base = (const void*)&net_thread_marks_base;
4903 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
4904
4905 if (unpop != 0) {
4906 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
4907 struct uthread *uth = get_bsdthread_info(current_thread());
4908
4909 VERIFY((unpop & ones) == unpop);
4910 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
4911 uth->uu_network_marks |= unpop;
4912 }
4913 }
4914
4915 __private_extern__ u_int32_t
4916 net_thread_is_marked(u_int32_t check)
4917 {
4918 if (check != 0) {
4919 struct uthread *uth = get_bsdthread_info(current_thread());
4920 return (uth->uu_network_marks & check);
4921 }
4922 else
4923 return (0);
4924 }
4925
4926 __private_extern__ u_int32_t
4927 net_thread_is_unmarked(u_int32_t check)
4928 {
4929 if (check != 0) {
4930 struct uthread *uth = get_bsdthread_info(current_thread());
4931 return (~uth->uu_network_marks & check);
4932 }
4933 else
4934 return (0);
4935 }
4936
4937 static __inline__ int
4938 _is_announcement(const struct sockaddr_in * sender_sin,
4939 const struct sockaddr_in * target_sin)
4940 {
4941 if (sender_sin == NULL) {
4942 return (FALSE);
4943 }
4944 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
4945 }
4946
4947 __private_extern__ errno_t
4948 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4949 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4950 const struct sockaddr *target_proto0, u_int32_t rtflags)
4951 {
4952 errno_t result = 0;
4953 const struct sockaddr_in * sender_sin;
4954 const struct sockaddr_in * target_sin;
4955 struct sockaddr_inarp target_proto_sinarp;
4956 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
4957
4958 if (target_proto == NULL || (sender_proto != NULL &&
4959 sender_proto->sa_family != target_proto->sa_family))
4960 return (EINVAL);
4961
4962 /*
4963 * If the target is a (default) router, provide that
4964 * information to the send_arp callback routine.
4965 */
4966 if (rtflags & RTF_ROUTER) {
4967 bcopy(target_proto, &target_proto_sinarp,
4968 sizeof (struct sockaddr_in));
4969 target_proto_sinarp.sin_other |= SIN_ROUTER;
4970 target_proto = (struct sockaddr *)&target_proto_sinarp;
4971 }
4972
4973 /*
4974 * If this is an ARP request and the target IP is IPv4LL,
4975 * send the request on all interfaces. The exception is
4976 * an announcement, which must only appear on the specific
4977 * interface.
4978 */
4979 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
4980 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
4981 if (target_proto->sa_family == AF_INET &&
4982 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4983 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4984 !_is_announcement(target_sin, sender_sin)) {
4985 ifnet_t *ifp_list;
4986 u_int32_t count;
4987 u_int32_t ifp_on;
4988
4989 result = ENOTSUP;
4990
4991 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4992 for (ifp_on = 0; ifp_on < count; ifp_on++) {
4993 errno_t new_result;
4994 ifaddr_t source_hw = NULL;
4995 ifaddr_t source_ip = NULL;
4996 struct sockaddr_in source_ip_copy;
4997 struct ifnet *cur_ifp = ifp_list[ifp_on];
4998
4999 /*
5000 * Only arp on interfaces marked for IPv4LL
5001 * ARPing. This may mean that we don't ARP on
5002 * the interface the subnet route points to.
5003 */
5004 if (!(cur_ifp->if_eflags & IFEF_ARPLL))
5005 continue;
5006
5007 /* Find the source IP address */
5008 ifnet_lock_shared(cur_ifp);
5009 source_hw = cur_ifp->if_lladdr;
5010 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
5011 ifa_link) {
5012 IFA_LOCK(source_ip);
5013 if (source_ip->ifa_addr != NULL &&
5014 source_ip->ifa_addr->sa_family ==
5015 AF_INET) {
5016 /* Copy the source IP address */
5017 source_ip_copy =
5018 *(struct sockaddr_in *)
5019 (void *)source_ip->ifa_addr;
5020 IFA_UNLOCK(source_ip);
5021 break;
5022 }
5023 IFA_UNLOCK(source_ip);
5024 }
5025
5026 /* No IP Source, don't arp */
5027 if (source_ip == NULL) {
5028 ifnet_lock_done(cur_ifp);
5029 continue;
5030 }
5031
5032 IFA_ADDREF(source_hw);
5033 ifnet_lock_done(cur_ifp);
5034
5035 /* Send the ARP */
5036 new_result = dlil_send_arp_internal(cur_ifp,
5037 arpop, (struct sockaddr_dl *)(void *)
5038 source_hw->ifa_addr,
5039 (struct sockaddr *)&source_ip_copy, NULL,
5040 target_proto);
5041
5042 IFA_REMREF(source_hw);
5043 if (result == ENOTSUP) {
5044 result = new_result;
5045 }
5046 }
5047 ifnet_list_free(ifp_list);
5048 }
5049 } else {
5050 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
5051 sender_proto, target_hw, target_proto);
5052 }
5053
5054 return (result);
5055 }
5056
5057 /*
5058 * Caller must hold ifnet head lock.
5059 */
5060 static int
5061 ifnet_lookup(struct ifnet *ifp)
5062 {
5063 struct ifnet *_ifp;
5064
5065 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
5066 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
5067 if (_ifp == ifp)
5068 break;
5069 }
5070 return (_ifp != NULL);
5071 }
5072
5073 /*
5074 * Caller has to pass a non-zero refio argument to get a
5075 * IO reference count. This will prevent ifnet_detach from
5076 * being called when there are outstanding io reference counts.
5077 */
5078 int
5079 ifnet_is_attached(struct ifnet *ifp, int refio)
5080 {
5081 int ret;
5082
5083 lck_mtx_lock_spin(&ifp->if_ref_lock);
5084 if ((ret = IF_FULLY_ATTACHED(ifp))) {
5085 if (refio > 0)
5086 ifp->if_refio++;
5087 }
5088 lck_mtx_unlock(&ifp->if_ref_lock);
5089
5090 return (ret);
5091 }
5092
5093 /*
5094 * Caller must ensure the interface is attached; the assumption is that
5095 * there is at least an outstanding IO reference count held already.
5096 * Most callers would call ifnet_is_attached() instead.
5097 */
5098 void
5099 ifnet_incr_iorefcnt(struct ifnet *ifp)
5100 {
5101 lck_mtx_lock_spin(&ifp->if_ref_lock);
5102 VERIFY(IF_FULLY_ATTACHED(ifp));
5103 VERIFY(ifp->if_refio > 0);
5104 ifp->if_refio++;
5105 lck_mtx_unlock(&ifp->if_ref_lock);
5106 }
5107
5108 void
5109 ifnet_decr_iorefcnt(struct ifnet *ifp)
5110 {
5111 lck_mtx_lock_spin(&ifp->if_ref_lock);
5112 VERIFY(ifp->if_refio > 0);
5113 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
5114 ifp->if_refio--;
5115
5116 /*
5117 * if there are no more outstanding io references, wakeup the
5118 * ifnet_detach thread if detaching flag is set.
5119 */
5120 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING))
5121 wakeup(&(ifp->if_refio));
5122
5123 lck_mtx_unlock(&ifp->if_ref_lock);
5124 }
5125
5126 static void
5127 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
5128 {
5129 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
5130 ctrace_t *tr;
5131 u_int32_t idx;
5132 u_int16_t *cnt;
5133
5134 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
5135 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
5136 /* NOTREACHED */
5137 }
5138
5139 if (refhold) {
5140 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
5141 tr = dl_if_dbg->dldbg_if_refhold;
5142 } else {
5143 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
5144 tr = dl_if_dbg->dldbg_if_refrele;
5145 }
5146
5147 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
5148 ctrace_record(&tr[idx]);
5149 }
5150
5151 errno_t
5152 dlil_if_ref(struct ifnet *ifp)
5153 {
5154 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5155
5156 if (dl_if == NULL)
5157 return (EINVAL);
5158
5159 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5160 ++dl_if->dl_if_refcnt;
5161 if (dl_if->dl_if_refcnt == 0) {
5162 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
5163 /* NOTREACHED */
5164 }
5165 if (dl_if->dl_if_trace != NULL)
5166 (*dl_if->dl_if_trace)(dl_if, TRUE);
5167 lck_mtx_unlock(&dl_if->dl_if_lock);
5168
5169 return (0);
5170 }
5171
5172 errno_t
5173 dlil_if_free(struct ifnet *ifp)
5174 {
5175 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5176 bool need_release = FALSE;
5177
5178 if (dl_if == NULL)
5179 return (EINVAL);
5180
5181 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5182 switch (dl_if->dl_if_refcnt) {
5183 case 0:
5184 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
5185 /* NOTREACHED */
5186 break;
5187 case 1:
5188 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
5189 need_release = TRUE;
5190 }
5191 break;
5192 default:
5193 break;
5194 }
5195 --dl_if->dl_if_refcnt;
5196 if (dl_if->dl_if_trace != NULL)
5197 (*dl_if->dl_if_trace)(dl_if, FALSE);
5198 lck_mtx_unlock(&dl_if->dl_if_lock);
5199 if (need_release) {
5200 dlil_if_release(ifp);
5201 }
5202 return (0);
5203 }
5204
5205 static errno_t
5206 dlil_attach_protocol_internal(struct if_proto *proto,
5207 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
5208 uint32_t * proto_count)
5209 {
5210 struct kev_dl_proto_data ev_pr_data;
5211 struct ifnet *ifp = proto->ifp;
5212 int retval = 0;
5213 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
5214 struct if_proto *prev_proto;
5215 struct if_proto *_proto;
5216
5217 /* callee holds a proto refcnt upon success */
5218 ifnet_lock_exclusive(ifp);
5219 _proto = find_attached_proto(ifp, proto->protocol_family);
5220 if (_proto != NULL) {
5221 ifnet_lock_done(ifp);
5222 if_proto_free(_proto);
5223 return (EEXIST);
5224 }
5225
5226 /*
5227 * Call family module add_proto routine so it can refine the
5228 * demux descriptors as it wishes.
5229 */
5230 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
5231 demux_count);
5232 if (retval) {
5233 ifnet_lock_done(ifp);
5234 return (retval);
5235 }
5236
5237 /*
5238 * Insert the protocol in the hash
5239 */
5240 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
5241 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
5242 prev_proto = SLIST_NEXT(prev_proto, next_hash);
5243 if (prev_proto)
5244 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
5245 else
5246 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
5247 proto, next_hash);
5248
5249 /* hold a proto refcnt for attach */
5250 if_proto_ref(proto);
5251
5252 /*
5253 * The reserved field carries the number of protocol still attached
5254 * (subject to change)
5255 */
5256 ev_pr_data.proto_family = proto->protocol_family;
5257 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
5258
5259 ifnet_lock_done(ifp);
5260
5261 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
5262 (struct net_event_data *)&ev_pr_data,
5263 sizeof (struct kev_dl_proto_data));
5264 if (proto_count != NULL) {
5265 *proto_count = ev_pr_data.proto_remaining_count;
5266 }
5267 return (retval);
5268 }
5269
5270 errno_t
5271 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
5272 const struct ifnet_attach_proto_param *proto_details)
5273 {
5274 int retval = 0;
5275 struct if_proto *ifproto = NULL;
5276 uint32_t proto_count = 0;
5277
5278 ifnet_head_lock_shared();
5279 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5280 retval = EINVAL;
5281 goto end;
5282 }
5283 /* Check that the interface is in the global list */
5284 if (!ifnet_lookup(ifp)) {
5285 retval = ENXIO;
5286 goto end;
5287 }
5288
5289 ifproto = zalloc(dlif_proto_zone);
5290 if (ifproto == NULL) {
5291 retval = ENOMEM;
5292 goto end;
5293 }
5294 bzero(ifproto, dlif_proto_size);
5295
5296 /* refcnt held above during lookup */
5297 ifproto->ifp = ifp;
5298 ifproto->protocol_family = protocol;
5299 ifproto->proto_kpi = kProtoKPI_v1;
5300 ifproto->kpi.v1.input = proto_details->input;
5301 ifproto->kpi.v1.pre_output = proto_details->pre_output;
5302 ifproto->kpi.v1.event = proto_details->event;
5303 ifproto->kpi.v1.ioctl = proto_details->ioctl;
5304 ifproto->kpi.v1.detached = proto_details->detached;
5305 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
5306 ifproto->kpi.v1.send_arp = proto_details->send_arp;
5307
5308 retval = dlil_attach_protocol_internal(ifproto,
5309 proto_details->demux_list, proto_details->demux_count,
5310 &proto_count);
5311
5312 end:
5313 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5314 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5315 if_name(ifp), protocol, retval);
5316 } else {
5317 if (dlil_verbose) {
5318 printf("%s: attached v1 protocol %d (count = %d)\n",
5319 if_name(ifp),
5320 protocol, proto_count);
5321 }
5322 }
5323 ifnet_head_done();
5324 if (retval == 0) {
5325 /*
5326 * A protocol has been attached, mark the interface up.
5327 * This used to be done by configd.KernelEventMonitor, but that
5328 * is inherently prone to races (rdar://problem/30810208).
5329 */
5330 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
5331 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
5332 dlil_post_sifflags_msg(ifp);
5333 } else if (ifproto != NULL) {
5334 zfree(dlif_proto_zone, ifproto);
5335 }
5336 return (retval);
5337 }
5338
5339 errno_t
5340 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
5341 const struct ifnet_attach_proto_param_v2 *proto_details)
5342 {
5343 int retval = 0;
5344 struct if_proto *ifproto = NULL;
5345 uint32_t proto_count = 0;
5346
5347 ifnet_head_lock_shared();
5348 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5349 retval = EINVAL;
5350 goto end;
5351 }
5352 /* Check that the interface is in the global list */
5353 if (!ifnet_lookup(ifp)) {
5354 retval = ENXIO;
5355 goto end;
5356 }
5357
5358 ifproto = zalloc(dlif_proto_zone);
5359 if (ifproto == NULL) {
5360 retval = ENOMEM;
5361 goto end;
5362 }
5363 bzero(ifproto, sizeof(*ifproto));
5364
5365 /* refcnt held above during lookup */
5366 ifproto->ifp = ifp;
5367 ifproto->protocol_family = protocol;
5368 ifproto->proto_kpi = kProtoKPI_v2;
5369 ifproto->kpi.v2.input = proto_details->input;
5370 ifproto->kpi.v2.pre_output = proto_details->pre_output;
5371 ifproto->kpi.v2.event = proto_details->event;
5372 ifproto->kpi.v2.ioctl = proto_details->ioctl;
5373 ifproto->kpi.v2.detached = proto_details->detached;
5374 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
5375 ifproto->kpi.v2.send_arp = proto_details->send_arp;
5376
5377 retval = dlil_attach_protocol_internal(ifproto,
5378 proto_details->demux_list, proto_details->demux_count,
5379 &proto_count);
5380
5381 end:
5382 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5383 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5384 if_name(ifp), protocol, retval);
5385 } else {
5386 if (dlil_verbose) {
5387 printf("%s: attached v2 protocol %d (count = %d)\n",
5388 if_name(ifp),
5389 protocol, proto_count);
5390 }
5391 }
5392 ifnet_head_done();
5393 if (retval == 0) {
5394 /*
5395 * A protocol has been attached, mark the interface up.
5396 * This used to be done by configd.KernelEventMonitor, but that
5397 * is inherently prone to races (rdar://problem/30810208).
5398 */
5399 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
5400 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
5401 dlil_post_sifflags_msg(ifp);
5402 } else if (ifproto != NULL) {
5403 zfree(dlif_proto_zone, ifproto);
5404 }
5405 return (retval);
5406 }
5407
5408 errno_t
5409 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
5410 {
5411 struct if_proto *proto = NULL;
5412 int retval = 0;
5413
5414 if (ifp == NULL || proto_family == 0) {
5415 retval = EINVAL;
5416 goto end;
5417 }
5418
5419 ifnet_lock_exclusive(ifp);
5420 /* callee holds a proto refcnt upon success */
5421 proto = find_attached_proto(ifp, proto_family);
5422 if (proto == NULL) {
5423 retval = ENXIO;
5424 ifnet_lock_done(ifp);
5425 goto end;
5426 }
5427
5428 /* call family module del_proto */
5429 if (ifp->if_del_proto)
5430 ifp->if_del_proto(ifp, proto->protocol_family);
5431
5432 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
5433 proto, if_proto, next_hash);
5434
5435 if (proto->proto_kpi == kProtoKPI_v1) {
5436 proto->kpi.v1.input = ifproto_media_input_v1;
5437 proto->kpi.v1.pre_output = ifproto_media_preout;
5438 proto->kpi.v1.event = ifproto_media_event;
5439 proto->kpi.v1.ioctl = ifproto_media_ioctl;
5440 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
5441 proto->kpi.v1.send_arp = ifproto_media_send_arp;
5442 } else {
5443 proto->kpi.v2.input = ifproto_media_input_v2;
5444 proto->kpi.v2.pre_output = ifproto_media_preout;
5445 proto->kpi.v2.event = ifproto_media_event;
5446 proto->kpi.v2.ioctl = ifproto_media_ioctl;
5447 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
5448 proto->kpi.v2.send_arp = ifproto_media_send_arp;
5449 }
5450 proto->detached = 1;
5451 ifnet_lock_done(ifp);
5452
5453 if (dlil_verbose) {
5454 printf("%s: detached %s protocol %d\n", if_name(ifp),
5455 (proto->proto_kpi == kProtoKPI_v1) ?
5456 "v1" : "v2", proto_family);
5457 }
5458
5459 /* release proto refcnt held during protocol attach */
5460 if_proto_free(proto);
5461
5462 /*
5463 * Release proto refcnt held during lookup; the rest of
5464 * protocol detach steps will happen when the last proto
5465 * reference is released.
5466 */
5467 if_proto_free(proto);
5468
5469 end:
5470 return (retval);
5471 }
5472
5473
5474 static errno_t
5475 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
5476 struct mbuf *packet, char *header)
5477 {
5478 #pragma unused(ifp, protocol, packet, header)
5479 return (ENXIO);
5480 }
5481
5482 static errno_t
5483 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
5484 struct mbuf *packet)
5485 {
5486 #pragma unused(ifp, protocol, packet)
5487 return (ENXIO);
5488
5489 }
5490
5491 static errno_t
5492 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
5493 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
5494 char *link_layer_dest)
5495 {
5496 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5497 return (ENXIO);
5498
5499 }
5500
5501 static void
5502 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
5503 const struct kev_msg *event)
5504 {
5505 #pragma unused(ifp, protocol, event)
5506 }
5507
5508 static errno_t
5509 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
5510 unsigned long command, void *argument)
5511 {
5512 #pragma unused(ifp, protocol, command, argument)
5513 return (ENXIO);
5514 }
5515
5516 static errno_t
5517 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
5518 struct sockaddr_dl *out_ll, size_t ll_len)
5519 {
5520 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5521 return (ENXIO);
5522 }
5523
5524 static errno_t
5525 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
5526 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5527 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5528 {
5529 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5530 return (ENXIO);
5531 }
5532
5533 extern int if_next_index(void);
5534 extern int tcp_ecn_outbound;
5535
5536 errno_t
5537 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
5538 {
5539 struct ifnet *tmp_if;
5540 struct ifaddr *ifa;
5541 struct if_data_internal if_data_saved;
5542 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5543 struct dlil_threading_info *dl_inp;
5544 u_int32_t sflags = 0;
5545 int err;
5546
5547 if (ifp == NULL)
5548 return (EINVAL);
5549
5550 /*
5551 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5552 * prevent the interface from being configured while it is
5553 * embryonic, as ifnet_head_lock is dropped and reacquired
5554 * below prior to marking the ifnet with IFRF_ATTACHED.
5555 */
5556 dlil_if_lock();
5557 ifnet_head_lock_exclusive();
5558 /* Verify we aren't already on the list */
5559 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
5560 if (tmp_if == ifp) {
5561 ifnet_head_done();
5562 dlil_if_unlock();
5563 return (EEXIST);
5564 }
5565 }
5566
5567 lck_mtx_lock_spin(&ifp->if_ref_lock);
5568 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
5569 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
5570 __func__, ifp);
5571 /* NOTREACHED */
5572 }
5573 lck_mtx_unlock(&ifp->if_ref_lock);
5574
5575 ifnet_lock_exclusive(ifp);
5576
5577 /* Sanity check */
5578 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5579 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5580
5581 if (ll_addr != NULL) {
5582 if (ifp->if_addrlen == 0) {
5583 ifp->if_addrlen = ll_addr->sdl_alen;
5584 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
5585 ifnet_lock_done(ifp);
5586 ifnet_head_done();
5587 dlil_if_unlock();
5588 return (EINVAL);
5589 }
5590 }
5591
5592 /*
5593 * Allow interfaces without protocol families to attach
5594 * only if they have the necessary fields filled out.
5595 */
5596 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
5597 DLIL_PRINTF("%s: Attempt to attach interface without "
5598 "family module - %d\n", __func__, ifp->if_family);
5599 ifnet_lock_done(ifp);
5600 ifnet_head_done();
5601 dlil_if_unlock();
5602 return (ENODEV);
5603 }
5604
5605 /* Allocate protocol hash table */
5606 VERIFY(ifp->if_proto_hash == NULL);
5607 ifp->if_proto_hash = zalloc(dlif_phash_zone);
5608 if (ifp->if_proto_hash == NULL) {
5609 ifnet_lock_done(ifp);
5610 ifnet_head_done();
5611 dlil_if_unlock();
5612 return (ENOBUFS);
5613 }
5614 bzero(ifp->if_proto_hash, dlif_phash_size);
5615
5616 lck_mtx_lock_spin(&ifp->if_flt_lock);
5617 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
5618 TAILQ_INIT(&ifp->if_flt_head);
5619 VERIFY(ifp->if_flt_busy == 0);
5620 VERIFY(ifp->if_flt_waiters == 0);
5621 lck_mtx_unlock(&ifp->if_flt_lock);
5622
5623 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
5624 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
5625 LIST_INIT(&ifp->if_multiaddrs);
5626 }
5627
5628 VERIFY(ifp->if_allhostsinm == NULL);
5629 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5630 TAILQ_INIT(&ifp->if_addrhead);
5631
5632 if (ifp->if_index == 0) {
5633 int idx = if_next_index();
5634
5635 if (idx == -1) {
5636 ifp->if_index = 0;
5637 ifnet_lock_done(ifp);
5638 ifnet_head_done();
5639 dlil_if_unlock();
5640 return (ENOBUFS);
5641 }
5642 ifp->if_index = idx;
5643 }
5644 /* There should not be anything occupying this slot */
5645 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5646
5647 /* allocate (if needed) and initialize a link address */
5648 ifa = dlil_alloc_lladdr(ifp, ll_addr);
5649 if (ifa == NULL) {
5650 ifnet_lock_done(ifp);
5651 ifnet_head_done();
5652 dlil_if_unlock();
5653 return (ENOBUFS);
5654 }
5655
5656 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5657 ifnet_addrs[ifp->if_index - 1] = ifa;
5658
5659 /* make this address the first on the list */
5660 IFA_LOCK(ifa);
5661 /* hold a reference for ifnet_addrs[] */
5662 IFA_ADDREF_LOCKED(ifa);
5663 /* if_attach_link_ifa() holds a reference for ifa_link */
5664 if_attach_link_ifa(ifp, ifa);
5665 IFA_UNLOCK(ifa);
5666
5667 #if CONFIG_MACF_NET
5668 mac_ifnet_label_associate(ifp);
5669 #endif
5670
5671 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5672 ifindex2ifnet[ifp->if_index] = ifp;
5673
5674 /* Hold a reference to the underlying dlil_ifnet */
5675 ifnet_reference(ifp);
5676
5677 /* Clear stats (save and restore other fields that we care) */
5678 if_data_saved = ifp->if_data;
5679 bzero(&ifp->if_data, sizeof (ifp->if_data));
5680 ifp->if_data.ifi_type = if_data_saved.ifi_type;
5681 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5682 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5683 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5684 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5685 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5686 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5687 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5688 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5689 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5690 ifnet_touch_lastchange(ifp);
5691
5692 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
5693 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
5694 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
5695
5696 /* By default, use SFB and enable flow advisory */
5697 sflags = PKTSCHEDF_QALG_SFB;
5698 if (if_flowadv)
5699 sflags |= PKTSCHEDF_QALG_FLOWCTL;
5700
5701 if (if_delaybased_queue)
5702 sflags |= PKTSCHEDF_QALG_DELAYBASED;
5703
5704 if (ifp->if_output_sched_model ==
5705 IFNET_SCHED_MODEL_DRIVER_MANAGED)
5706 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
5707
5708 /* Initialize transmit queue(s) */
5709 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
5710 if (err != 0) {
5711 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5712 "err=%d", __func__, ifp, err);
5713 /* NOTREACHED */
5714 }
5715
5716 /* Sanity checks on the input thread storage */
5717 dl_inp = &dl_if->dl_if_inpstorage;
5718 bzero(&dl_inp->stats, sizeof (dl_inp->stats));
5719 VERIFY(dl_inp->input_waiting == 0);
5720 VERIFY(dl_inp->wtot == 0);
5721 VERIFY(dl_inp->ifp == NULL);
5722 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
5723 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
5724 VERIFY(!dl_inp->net_affinity);
5725 VERIFY(ifp->if_inp == NULL);
5726 VERIFY(dl_inp->input_thr == THREAD_NULL);
5727 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
5728 VERIFY(dl_inp->poll_thr == THREAD_NULL);
5729 VERIFY(dl_inp->tag == 0);
5730 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
5731 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
5732 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
5733 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
5734 #if IFNET_INPUT_SANITY_CHK
5735 VERIFY(dl_inp->input_mbuf_cnt == 0);
5736 #endif /* IFNET_INPUT_SANITY_CHK */
5737
5738 /*
5739 * A specific DLIL input thread is created per Ethernet/cellular
5740 * interface or for an interface which supports opportunistic
5741 * input polling. Pseudo interfaces or other types of interfaces
5742 * use the main input thread instead.
5743 */
5744 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
5745 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
5746 ifp->if_inp = dl_inp;
5747 err = dlil_create_input_thread(ifp, ifp->if_inp);
5748 if (err != 0) {
5749 panic_plain("%s: ifp=%p couldn't get an input thread; "
5750 "err=%d", __func__, ifp, err);
5751 /* NOTREACHED */
5752 }
5753 }
5754
5755 if (ifp->if_inp != NULL && ifp->if_inp->input_mit_tcall == NULL) {
5756 ifp->if_inp->input_mit_tcall =
5757 thread_call_allocate_with_priority(dlil_mit_tcall_fn,
5758 ifp, THREAD_CALL_PRIORITY_KERNEL);
5759 }
5760
5761 /*
5762 * If the driver supports the new transmit model, calculate flow hash
5763 * and create a workloop starter thread to invoke the if_start callback
5764 * where the packets may be dequeued and transmitted.
5765 */
5766 if (ifp->if_eflags & IFEF_TXSTART) {
5767 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5768 VERIFY(ifp->if_flowhash != 0);
5769 VERIFY(ifp->if_start_thread == THREAD_NULL);
5770
5771 ifnet_set_start_cycle(ifp, NULL);
5772 ifp->if_start_active = 0;
5773 ifp->if_start_req = 0;
5774 ifp->if_start_flags = 0;
5775 VERIFY(ifp->if_start != NULL);
5776 if ((err = kernel_thread_start(ifnet_start_thread_fn,
5777 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
5778 panic_plain("%s: "
5779 "ifp=%p couldn't get a start thread; "
5780 "err=%d", __func__, ifp, err);
5781 /* NOTREACHED */
5782 }
5783 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
5784 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5785 } else {
5786 ifp->if_flowhash = 0;
5787 }
5788
5789 /*
5790 * If the driver supports the new receive model, create a poller
5791 * thread to invoke if_input_poll callback where the packets may
5792 * be dequeued from the driver and processed for reception.
5793 */
5794 if (ifp->if_eflags & IFEF_RXPOLL) {
5795 VERIFY(ifp->if_input_poll != NULL);
5796 VERIFY(ifp->if_input_ctl != NULL);
5797 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5798
5799 ifnet_set_poll_cycle(ifp, NULL);
5800 ifp->if_poll_update = 0;
5801 ifp->if_poll_active = 0;
5802 ifp->if_poll_req = 0;
5803 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
5804 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5805 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5806 "err=%d", __func__, ifp, err);
5807 /* NOTREACHED */
5808 }
5809 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
5810 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
5811 }
5812
5813 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5814 VERIFY(ifp->if_desc.ifd_len == 0);
5815 VERIFY(ifp->if_desc.ifd_desc != NULL);
5816
5817 /* Record attach PC stacktrace */
5818 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5819
5820 ifp->if_updatemcasts = 0;
5821 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5822 struct ifmultiaddr *ifma;
5823 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5824 IFMA_LOCK(ifma);
5825 if (ifma->ifma_addr->sa_family == AF_LINK ||
5826 ifma->ifma_addr->sa_family == AF_UNSPEC)
5827 ifp->if_updatemcasts++;
5828 IFMA_UNLOCK(ifma);
5829 }
5830
5831 printf("%s: attached with %d suspended link-layer multicast "
5832 "membership(s)\n", if_name(ifp),
5833 ifp->if_updatemcasts);
5834 }
5835
5836 /* Clear logging parameters */
5837 bzero(&ifp->if_log, sizeof (ifp->if_log));
5838
5839 /* Clear foreground/realtime activity timestamps */
5840 ifp->if_fg_sendts = 0;
5841 ifp->if_rt_sendts = 0;
5842
5843 VERIFY(ifp->if_delegated.ifp == NULL);
5844 VERIFY(ifp->if_delegated.type == 0);
5845 VERIFY(ifp->if_delegated.family == 0);
5846 VERIFY(ifp->if_delegated.subfamily == 0);
5847 VERIFY(ifp->if_delegated.expensive == 0);
5848
5849 VERIFY(ifp->if_agentids == NULL);
5850 VERIFY(ifp->if_agentcount == 0);
5851
5852 /* Reset interface state */
5853 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5854 ifp->if_interface_state.valid_bitmask |=
5855 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5856 ifp->if_interface_state.interface_availability =
5857 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5858
5859 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5860 if (ifp == lo_ifp) {
5861 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5862 ifp->if_interface_state.valid_bitmask |=
5863 IF_INTERFACE_STATE_LQM_STATE_VALID;
5864 } else {
5865 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5866 }
5867
5868 /*
5869 * Enable ECN capability on this interface depending on the
5870 * value of ECN global setting
5871 */
5872 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
5873 ifp->if_eflags |= IFEF_ECN_ENABLE;
5874 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
5875 }
5876
5877 /*
5878 * Built-in Cyclops always on policy for WiFi infra
5879 */
5880 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5881 errno_t error;
5882
5883 error = if_set_qosmarking_mode(ifp,
5884 IFRTYPE_QOSMARKING_FASTLANE);
5885 if (error != 0) {
5886 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5887 __func__, ifp->if_xname, error);
5888 } else {
5889 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
5890 #if (DEVELOPMENT || DEBUG)
5891 printf("%s fastlane enabled on %s\n",
5892 __func__, ifp->if_xname);
5893 #endif /* (DEVELOPMENT || DEBUG) */
5894 }
5895 }
5896
5897 ifnet_lock_done(ifp);
5898 ifnet_head_done();
5899
5900
5901 lck_mtx_lock(&ifp->if_cached_route_lock);
5902 /* Enable forwarding cached route */
5903 ifp->if_fwd_cacheok = 1;
5904 /* Clean up any existing cached routes */
5905 ROUTE_RELEASE(&ifp->if_fwd_route);
5906 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
5907 ROUTE_RELEASE(&ifp->if_src_route);
5908 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
5909 ROUTE_RELEASE(&ifp->if_src_route6);
5910 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
5911 lck_mtx_unlock(&ifp->if_cached_route_lock);
5912
5913 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5914
5915 /*
5916 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5917 * and trees; do this before the ifnet is marked as attached.
5918 * The ifnet keeps the reference to the info structures even after
5919 * the ifnet is detached, since the network-layer records still
5920 * refer to the info structures even after that. This also
5921 * makes it possible for them to still function after the ifnet
5922 * is recycled or reattached.
5923 */
5924 #if INET
5925 if (IGMP_IFINFO(ifp) == NULL) {
5926 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
5927 VERIFY(IGMP_IFINFO(ifp) != NULL);
5928 } else {
5929 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5930 igmp_domifreattach(IGMP_IFINFO(ifp));
5931 }
5932 #endif /* INET */
5933 #if INET6
5934 if (MLD_IFINFO(ifp) == NULL) {
5935 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
5936 VERIFY(MLD_IFINFO(ifp) != NULL);
5937 } else {
5938 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5939 mld_domifreattach(MLD_IFINFO(ifp));
5940 }
5941 #endif /* INET6 */
5942
5943 VERIFY(ifp->if_data_threshold == 0);
5944 VERIFY(ifp->if_dt_tcall != NULL);
5945
5946 /*
5947 * Finally, mark this ifnet as attached.
5948 */
5949 lck_mtx_lock(rnh_lock);
5950 ifnet_lock_exclusive(ifp);
5951 lck_mtx_lock_spin(&ifp->if_ref_lock);
5952 ifp->if_refflags = IFRF_ATTACHED; /* clears embryonic */
5953 lck_mtx_unlock(&ifp->if_ref_lock);
5954 if (net_rtref) {
5955 /* boot-args override; enable idle notification */
5956 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5957 IFRF_IDLE_NOTIFY);
5958 } else {
5959 /* apply previous request(s) to set the idle flags, if any */
5960 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5961 ifp->if_idle_new_flags_mask);
5962
5963 }
5964 ifnet_lock_done(ifp);
5965 lck_mtx_unlock(rnh_lock);
5966 dlil_if_unlock();
5967
5968 #if PF
5969 /*
5970 * Attach packet filter to this interface, if enabled.
5971 */
5972 pf_ifnet_hook(ifp, 1);
5973 #endif /* PF */
5974
5975 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
5976
5977 if (dlil_verbose) {
5978 printf("%s: attached%s\n", if_name(ifp),
5979 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5980 }
5981
5982 return (0);
5983 }
5984
5985 /*
5986 * Prepare the storage for the first/permanent link address, which must
5987 * must have the same lifetime as the ifnet itself. Although the link
5988 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5989 * its location in memory must never change as it may still be referred
5990 * to by some parts of the system afterwards (unfortunate implementation
5991 * artifacts inherited from BSD.)
5992 *
5993 * Caller must hold ifnet lock as writer.
5994 */
5995 static struct ifaddr *
5996 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
5997 {
5998 struct ifaddr *ifa, *oifa;
5999 struct sockaddr_dl *asdl, *msdl;
6000 char workbuf[IFNAMSIZ*2];
6001 int namelen, masklen, socksize;
6002 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6003
6004 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
6005 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
6006
6007 namelen = snprintf(workbuf, sizeof (workbuf), "%s",
6008 if_name(ifp));
6009 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
6010 + ((namelen > 0) ? namelen : 0);
6011 socksize = masklen + ifp->if_addrlen;
6012 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
6013 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
6014 socksize = sizeof(struct sockaddr_dl);
6015 socksize = ROUNDUP(socksize);
6016 #undef ROUNDUP
6017
6018 ifa = ifp->if_lladdr;
6019 if (socksize > DLIL_SDLMAXLEN ||
6020 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
6021 /*
6022 * Rare, but in the event that the link address requires
6023 * more storage space than DLIL_SDLMAXLEN, allocate the
6024 * largest possible storages for address and mask, such
6025 * that we can reuse the same space when if_addrlen grows.
6026 * This same space will be used when if_addrlen shrinks.
6027 */
6028 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
6029 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
6030 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
6031 if (ifa == NULL)
6032 return (NULL);
6033 ifa_lock_init(ifa);
6034 /* Don't set IFD_ALLOC, as this is permanent */
6035 ifa->ifa_debug = IFD_LINK;
6036 }
6037 IFA_LOCK(ifa);
6038 /* address and mask sockaddr_dl locations */
6039 asdl = (struct sockaddr_dl *)(ifa + 1);
6040 bzero(asdl, SOCK_MAXADDRLEN);
6041 msdl = (struct sockaddr_dl *)(void *)
6042 ((char *)asdl + SOCK_MAXADDRLEN);
6043 bzero(msdl, SOCK_MAXADDRLEN);
6044 } else {
6045 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
6046 /*
6047 * Use the storage areas for address and mask within the
6048 * dlil_ifnet structure. This is the most common case.
6049 */
6050 if (ifa == NULL) {
6051 ifa = &dl_if->dl_if_lladdr.ifa;
6052 ifa_lock_init(ifa);
6053 /* Don't set IFD_ALLOC, as this is permanent */
6054 ifa->ifa_debug = IFD_LINK;
6055 }
6056 IFA_LOCK(ifa);
6057 /* address and mask sockaddr_dl locations */
6058 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
6059 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
6060 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
6061 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
6062 }
6063
6064 /* hold a permanent reference for the ifnet itself */
6065 IFA_ADDREF_LOCKED(ifa);
6066 oifa = ifp->if_lladdr;
6067 ifp->if_lladdr = ifa;
6068
6069 VERIFY(ifa->ifa_debug == IFD_LINK);
6070 ifa->ifa_ifp = ifp;
6071 ifa->ifa_rtrequest = link_rtrequest;
6072 ifa->ifa_addr = (struct sockaddr *)asdl;
6073 asdl->sdl_len = socksize;
6074 asdl->sdl_family = AF_LINK;
6075 if (namelen > 0) {
6076 bcopy(workbuf, asdl->sdl_data, min(namelen,
6077 sizeof (asdl->sdl_data)));
6078 asdl->sdl_nlen = namelen;
6079 } else {
6080 asdl->sdl_nlen = 0;
6081 }
6082 asdl->sdl_index = ifp->if_index;
6083 asdl->sdl_type = ifp->if_type;
6084 if (ll_addr != NULL) {
6085 asdl->sdl_alen = ll_addr->sdl_alen;
6086 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
6087 } else {
6088 asdl->sdl_alen = 0;
6089 }
6090 ifa->ifa_netmask = (struct sockaddr *)msdl;
6091 msdl->sdl_len = masklen;
6092 while (namelen > 0)
6093 msdl->sdl_data[--namelen] = 0xff;
6094 IFA_UNLOCK(ifa);
6095
6096 if (oifa != NULL)
6097 IFA_REMREF(oifa);
6098
6099 return (ifa);
6100 }
6101
6102 static void
6103 if_purgeaddrs(struct ifnet *ifp)
6104 {
6105 #if INET
6106 in_purgeaddrs(ifp);
6107 #endif /* INET */
6108 #if INET6
6109 in6_purgeaddrs(ifp);
6110 #endif /* INET6 */
6111 }
6112
6113 errno_t
6114 ifnet_detach(ifnet_t ifp)
6115 {
6116 struct ifnet *delegated_ifp;
6117 struct nd_ifinfo *ndi = NULL;
6118
6119 if (ifp == NULL)
6120 return (EINVAL);
6121
6122 ndi = ND_IFINFO(ifp);
6123 if (NULL != ndi)
6124 ndi->cga_initialized = FALSE;
6125
6126 lck_mtx_lock(rnh_lock);
6127 ifnet_head_lock_exclusive();
6128 ifnet_lock_exclusive(ifp);
6129
6130 /*
6131 * Check to see if this interface has previously triggered
6132 * aggressive protocol draining; if so, decrement the global
6133 * refcnt and clear PR_AGGDRAIN on the route domain if
6134 * there are no more of such an interface around.
6135 */
6136 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
6137
6138 lck_mtx_lock_spin(&ifp->if_ref_lock);
6139 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
6140 lck_mtx_unlock(&ifp->if_ref_lock);
6141 ifnet_lock_done(ifp);
6142 ifnet_head_done();
6143 lck_mtx_unlock(rnh_lock);
6144 return (EINVAL);
6145 } else if (ifp->if_refflags & IFRF_DETACHING) {
6146 /* Interface has already been detached */
6147 lck_mtx_unlock(&ifp->if_ref_lock);
6148 ifnet_lock_done(ifp);
6149 ifnet_head_done();
6150 lck_mtx_unlock(rnh_lock);
6151 return (ENXIO);
6152 }
6153 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
6154 /* Indicate this interface is being detached */
6155 ifp->if_refflags &= ~IFRF_ATTACHED;
6156 ifp->if_refflags |= IFRF_DETACHING;
6157 lck_mtx_unlock(&ifp->if_ref_lock);
6158
6159 if (dlil_verbose) {
6160 printf("%s: detaching\n", if_name(ifp));
6161 }
6162
6163 /* clean up flow control entry object if there's any */
6164 if (ifp->if_eflags & IFEF_TXSTART) {
6165 ifnet_flowadv(ifp->if_flowhash);
6166 }
6167
6168 /* Reset ECN enable/disable flags */
6169 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
6170 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
6171
6172 /*
6173 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6174 * no longer be visible during lookups from this point.
6175 */
6176 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
6177 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
6178 ifp->if_link.tqe_next = NULL;
6179 ifp->if_link.tqe_prev = NULL;
6180 if (ifp->if_ordered_link.tqe_next != NULL ||
6181 ifp->if_ordered_link.tqe_prev != NULL) {
6182 ifnet_remove_from_ordered_list(ifp);
6183 }
6184 ifindex2ifnet[ifp->if_index] = NULL;
6185
6186 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6187 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
6188
6189 /* Record detach PC stacktrace */
6190 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
6191
6192 /* Clear logging parameters */
6193 bzero(&ifp->if_log, sizeof (ifp->if_log));
6194
6195 /* Clear delegated interface info (reference released below) */
6196 delegated_ifp = ifp->if_delegated.ifp;
6197 bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
6198
6199 /* Reset interface state */
6200 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
6201
6202 ifnet_lock_done(ifp);
6203 ifnet_head_done();
6204 lck_mtx_unlock(rnh_lock);
6205
6206
6207 /* Release reference held on the delegated interface */
6208 if (delegated_ifp != NULL)
6209 ifnet_release(delegated_ifp);
6210
6211 /* Reset Link Quality Metric (unless loopback [lo0]) */
6212 if (ifp != lo_ifp)
6213 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
6214
6215 /* Reset TCP local statistics */
6216 if (ifp->if_tcp_stat != NULL)
6217 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
6218
6219 /* Reset UDP local statistics */
6220 if (ifp->if_udp_stat != NULL)
6221 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
6222
6223 /* Reset ifnet IPv4 stats */
6224 if (ifp->if_ipv4_stat != NULL)
6225 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
6226
6227 /* Reset ifnet IPv6 stats */
6228 if (ifp->if_ipv6_stat != NULL)
6229 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
6230
6231 /* Release memory held for interface link status report */
6232 if (ifp->if_link_status != NULL) {
6233 FREE(ifp->if_link_status, M_TEMP);
6234 ifp->if_link_status = NULL;
6235 }
6236
6237 /* Clear agent IDs */
6238 if (ifp->if_agentids != NULL) {
6239 FREE(ifp->if_agentids, M_NETAGENT);
6240 ifp->if_agentids = NULL;
6241 }
6242 ifp->if_agentcount = 0;
6243
6244
6245 /* Let BPF know we're detaching */
6246 bpfdetach(ifp);
6247
6248 /* Mark the interface as DOWN */
6249 if_down(ifp);
6250
6251 /* Disable forwarding cached route */
6252 lck_mtx_lock(&ifp->if_cached_route_lock);
6253 ifp->if_fwd_cacheok = 0;
6254 lck_mtx_unlock(&ifp->if_cached_route_lock);
6255
6256 /* Disable data threshold and wait for any pending event posting */
6257 ifp->if_data_threshold = 0;
6258 VERIFY(ifp->if_dt_tcall != NULL);
6259 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
6260
6261 /*
6262 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6263 * references to the info structures and leave them attached to
6264 * this ifnet.
6265 */
6266 #if INET
6267 igmp_domifdetach(ifp);
6268 #endif /* INET */
6269 #if INET6
6270 mld_domifdetach(ifp);
6271 #endif /* INET6 */
6272
6273 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
6274
6275 /* Let worker thread take care of the rest, to avoid reentrancy */
6276 dlil_if_lock();
6277 ifnet_detaching_enqueue(ifp);
6278 dlil_if_unlock();
6279
6280 return (0);
6281 }
6282
6283 static void
6284 ifnet_detaching_enqueue(struct ifnet *ifp)
6285 {
6286 dlil_if_lock_assert();
6287
6288 ++ifnet_detaching_cnt;
6289 VERIFY(ifnet_detaching_cnt != 0);
6290 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
6291 wakeup((caddr_t)&ifnet_delayed_run);
6292 }
6293
6294 static struct ifnet *
6295 ifnet_detaching_dequeue(void)
6296 {
6297 struct ifnet *ifp;
6298
6299 dlil_if_lock_assert();
6300
6301 ifp = TAILQ_FIRST(&ifnet_detaching_head);
6302 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
6303 if (ifp != NULL) {
6304 VERIFY(ifnet_detaching_cnt != 0);
6305 --ifnet_detaching_cnt;
6306 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
6307 ifp->if_detaching_link.tqe_next = NULL;
6308 ifp->if_detaching_link.tqe_prev = NULL;
6309 }
6310 return (ifp);
6311 }
6312
6313 static int
6314 ifnet_detacher_thread_cont(int err)
6315 {
6316 #pragma unused(err)
6317 struct ifnet *ifp;
6318
6319 for (;;) {
6320 dlil_if_lock_assert();
6321 while (ifnet_detaching_cnt == 0) {
6322 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6323 (PZERO - 1), "ifnet_detacher_cont", 0,
6324 ifnet_detacher_thread_cont);
6325 /* NOTREACHED */
6326 }
6327
6328 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
6329
6330 /* Take care of detaching ifnet */
6331 ifp = ifnet_detaching_dequeue();
6332 if (ifp != NULL) {
6333 dlil_if_unlock();
6334 ifnet_detach_final(ifp);
6335 dlil_if_lock();
6336 }
6337 }
6338 }
6339
6340 static void
6341 ifnet_detacher_thread_func(void *v, wait_result_t w)
6342 {
6343 #pragma unused(v, w)
6344 dlil_if_lock();
6345 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6346 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
6347 /*
6348 * msleep0() shouldn't have returned as PCATCH was not set;
6349 * therefore assert in this case.
6350 */
6351 dlil_if_unlock();
6352 VERIFY(0);
6353 }
6354
6355 static void
6356 ifnet_detach_final(struct ifnet *ifp)
6357 {
6358 struct ifnet_filter *filter, *filter_next;
6359 struct ifnet_filter_head fhead;
6360 struct dlil_threading_info *inp;
6361 struct ifaddr *ifa;
6362 ifnet_detached_func if_free;
6363 int i;
6364
6365 lck_mtx_lock(&ifp->if_ref_lock);
6366 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6367 panic("%s: flags mismatch (detaching not set) ifp=%p",
6368 __func__, ifp);
6369 /* NOTREACHED */
6370 }
6371
6372 /*
6373 * Wait until the existing IO references get released
6374 * before we proceed with ifnet_detach. This is not a
6375 * common case, so block without using a continuation.
6376 */
6377 while (ifp->if_refio > 0) {
6378 printf("%s: Waiting for IO references on %s interface "
6379 "to be released\n", __func__, if_name(ifp));
6380 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
6381 (PZERO - 1), "ifnet_ioref_wait", NULL);
6382 }
6383 lck_mtx_unlock(&ifp->if_ref_lock);
6384
6385 /* Drain and destroy send queue */
6386 ifclassq_teardown(ifp);
6387
6388 /* Detach interface filters */
6389 lck_mtx_lock(&ifp->if_flt_lock);
6390 if_flt_monitor_enter(ifp);
6391
6392 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6393 fhead = ifp->if_flt_head;
6394 TAILQ_INIT(&ifp->if_flt_head);
6395
6396 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
6397 filter_next = TAILQ_NEXT(filter, filt_next);
6398 lck_mtx_unlock(&ifp->if_flt_lock);
6399
6400 dlil_detach_filter_internal(filter, 1);
6401 lck_mtx_lock(&ifp->if_flt_lock);
6402 }
6403 if_flt_monitor_leave(ifp);
6404 lck_mtx_unlock(&ifp->if_flt_lock);
6405
6406 /* Tell upper layers to drop their network addresses */
6407 if_purgeaddrs(ifp);
6408
6409 ifnet_lock_exclusive(ifp);
6410
6411 /* Uplumb all protocols */
6412 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6413 struct if_proto *proto;
6414
6415 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6416 while (proto != NULL) {
6417 protocol_family_t family = proto->protocol_family;
6418 ifnet_lock_done(ifp);
6419 proto_unplumb(family, ifp);
6420 ifnet_lock_exclusive(ifp);
6421 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6422 }
6423 /* There should not be any protocols left */
6424 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
6425 }
6426 zfree(dlif_phash_zone, ifp->if_proto_hash);
6427 ifp->if_proto_hash = NULL;
6428
6429 /* Detach (permanent) link address from if_addrhead */
6430 ifa = TAILQ_FIRST(&ifp->if_addrhead);
6431 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
6432 IFA_LOCK(ifa);
6433 if_detach_link_ifa(ifp, ifa);
6434 IFA_UNLOCK(ifa);
6435
6436 /* Remove (permanent) link address from ifnet_addrs[] */
6437 IFA_REMREF(ifa);
6438 ifnet_addrs[ifp->if_index - 1] = NULL;
6439
6440 /* This interface should not be on {ifnet_head,detaching} */
6441 VERIFY(ifp->if_link.tqe_next == NULL);
6442 VERIFY(ifp->if_link.tqe_prev == NULL);
6443 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6444 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
6445 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
6446 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
6447
6448 /* The slot should have been emptied */
6449 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6450
6451 /* There should not be any addresses left */
6452 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
6453
6454 /*
6455 * Signal the starter thread to terminate itself.
6456 */
6457 if (ifp->if_start_thread != THREAD_NULL) {
6458 lck_mtx_lock_spin(&ifp->if_start_lock);
6459 ifp->if_start_flags = 0;
6460 ifp->if_start_thread = THREAD_NULL;
6461 wakeup_one((caddr_t)&ifp->if_start_thread);
6462 lck_mtx_unlock(&ifp->if_start_lock);
6463 }
6464
6465 /*
6466 * Signal the poller thread to terminate itself.
6467 */
6468 if (ifp->if_poll_thread != THREAD_NULL) {
6469 lck_mtx_lock_spin(&ifp->if_poll_lock);
6470 ifp->if_poll_thread = THREAD_NULL;
6471 wakeup_one((caddr_t)&ifp->if_poll_thread);
6472 lck_mtx_unlock(&ifp->if_poll_lock);
6473 }
6474
6475 /*
6476 * If thread affinity was set for the workloop thread, we will need
6477 * to tear down the affinity and release the extra reference count
6478 * taken at attach time. Does not apply to lo0 or other interfaces
6479 * without dedicated input threads.
6480 */
6481 if ((inp = ifp->if_inp) != NULL) {
6482 VERIFY(inp != dlil_main_input_thread);
6483
6484 if (inp->net_affinity) {
6485 struct thread *tp, *wtp, *ptp;
6486
6487 lck_mtx_lock_spin(&inp->input_lck);
6488 wtp = inp->wloop_thr;
6489 inp->wloop_thr = THREAD_NULL;
6490 ptp = inp->poll_thr;
6491 inp->poll_thr = THREAD_NULL;
6492 tp = inp->input_thr; /* don't nullify now */
6493 inp->tag = 0;
6494 inp->net_affinity = FALSE;
6495 lck_mtx_unlock(&inp->input_lck);
6496
6497 /* Tear down poll thread affinity */
6498 if (ptp != NULL) {
6499 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
6500 (void) dlil_affinity_set(ptp,
6501 THREAD_AFFINITY_TAG_NULL);
6502 thread_deallocate(ptp);
6503 }
6504
6505 /* Tear down workloop thread affinity */
6506 if (wtp != NULL) {
6507 (void) dlil_affinity_set(wtp,
6508 THREAD_AFFINITY_TAG_NULL);
6509 thread_deallocate(wtp);
6510 }
6511
6512 /* Tear down DLIL input thread affinity */
6513 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
6514 thread_deallocate(tp);
6515 }
6516
6517 /* disassociate ifp DLIL input thread */
6518 ifp->if_inp = NULL;
6519
6520 /* tell the input thread to terminate */
6521 lck_mtx_lock_spin(&inp->input_lck);
6522 inp->input_waiting |= DLIL_INPUT_TERMINATE;
6523 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
6524 wakeup_one((caddr_t)&inp->input_waiting);
6525 }
6526 lck_mtx_unlock(&inp->input_lck);
6527 ifnet_lock_done(ifp);
6528
6529 /* wait for the input thread to terminate */
6530 lck_mtx_lock_spin(&inp->input_lck);
6531 while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE)
6532 == 0) {
6533 (void) msleep(&inp->input_waiting, &inp->input_lck,
6534 (PZERO - 1) | PSPIN, inp->input_name, NULL);
6535 }
6536 lck_mtx_unlock(&inp->input_lck);
6537 ifnet_lock_exclusive(ifp);
6538
6539 /* clean-up input thread state */
6540 dlil_clean_threading_info(inp);
6541
6542 }
6543
6544 /* The driver might unload, so point these to ourselves */
6545 if_free = ifp->if_free;
6546 ifp->if_output_dlil = ifp_if_output;
6547 ifp->if_output = ifp_if_output;
6548 ifp->if_pre_enqueue = ifp_if_output;
6549 ifp->if_start = ifp_if_start;
6550 ifp->if_output_ctl = ifp_if_ctl;
6551 ifp->if_input_dlil = ifp_if_input;
6552 ifp->if_input_poll = ifp_if_input_poll;
6553 ifp->if_input_ctl = ifp_if_ctl;
6554 ifp->if_ioctl = ifp_if_ioctl;
6555 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
6556 ifp->if_free = ifp_if_free;
6557 ifp->if_demux = ifp_if_demux;
6558 ifp->if_event = ifp_if_event;
6559 ifp->if_framer_legacy = ifp_if_framer;
6560 ifp->if_framer = ifp_if_framer_extended;
6561 ifp->if_add_proto = ifp_if_add_proto;
6562 ifp->if_del_proto = ifp_if_del_proto;
6563 ifp->if_check_multi = ifp_if_check_multi;
6564
6565 /* wipe out interface description */
6566 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
6567 ifp->if_desc.ifd_len = 0;
6568 VERIFY(ifp->if_desc.ifd_desc != NULL);
6569 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
6570
6571 /* there shouldn't be any delegation by now */
6572 VERIFY(ifp->if_delegated.ifp == NULL);
6573 VERIFY(ifp->if_delegated.type == 0);
6574 VERIFY(ifp->if_delegated.family == 0);
6575 VERIFY(ifp->if_delegated.subfamily == 0);
6576 VERIFY(ifp->if_delegated.expensive == 0);
6577
6578 /* QoS marking get cleared */
6579 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
6580 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
6581
6582
6583 ifnet_lock_done(ifp);
6584
6585 #if PF
6586 /*
6587 * Detach this interface from packet filter, if enabled.
6588 */
6589 pf_ifnet_hook(ifp, 0);
6590 #endif /* PF */
6591
6592 /* Filter list should be empty */
6593 lck_mtx_lock_spin(&ifp->if_flt_lock);
6594 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6595 VERIFY(ifp->if_flt_busy == 0);
6596 VERIFY(ifp->if_flt_waiters == 0);
6597 lck_mtx_unlock(&ifp->if_flt_lock);
6598
6599 /* Last chance to drain send queue */
6600 if_qflush(ifp, 0);
6601
6602 /* Last chance to cleanup any cached route */
6603 lck_mtx_lock(&ifp->if_cached_route_lock);
6604 VERIFY(!ifp->if_fwd_cacheok);
6605 ROUTE_RELEASE(&ifp->if_fwd_route);
6606 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
6607 ROUTE_RELEASE(&ifp->if_src_route);
6608 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
6609 ROUTE_RELEASE(&ifp->if_src_route6);
6610 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
6611 lck_mtx_unlock(&ifp->if_cached_route_lock);
6612
6613 VERIFY(ifp->if_data_threshold == 0);
6614 VERIFY(ifp->if_dt_tcall != NULL);
6615 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
6616
6617 ifnet_llreach_ifdetach(ifp);
6618
6619 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
6620
6621 /*
6622 * Finally, mark this ifnet as detached.
6623 */
6624 lck_mtx_lock_spin(&ifp->if_ref_lock);
6625 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6626 panic("%s: flags mismatch (detaching not set) ifp=%p",
6627 __func__, ifp);
6628 /* NOTREACHED */
6629 }
6630 ifp->if_refflags &= ~IFRF_DETACHING;
6631 lck_mtx_unlock(&ifp->if_ref_lock);
6632 if (if_free != NULL)
6633 if_free(ifp);
6634
6635 if (dlil_verbose)
6636 printf("%s: detached\n", if_name(ifp));
6637
6638 /* Release reference held during ifnet attach */
6639 ifnet_release(ifp);
6640 }
6641
6642 errno_t
6643 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6644 {
6645 #pragma unused(ifp)
6646 m_freem_list(m);
6647 return (0);
6648 }
6649
6650 void
6651 ifp_if_start(struct ifnet *ifp)
6652 {
6653 ifnet_purge(ifp);
6654 }
6655
6656 static errno_t
6657 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6658 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6659 boolean_t poll, struct thread *tp)
6660 {
6661 #pragma unused(ifp, m_tail, s, poll, tp)
6662 m_freem_list(m_head);
6663 return (ENXIO);
6664 }
6665
6666 static void
6667 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6668 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6669 {
6670 #pragma unused(ifp, flags, max_cnt)
6671 if (m_head != NULL)
6672 *m_head = NULL;
6673 if (m_tail != NULL)
6674 *m_tail = NULL;
6675 if (cnt != NULL)
6676 *cnt = 0;
6677 if (len != NULL)
6678 *len = 0;
6679 }
6680
6681 static errno_t
6682 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6683 {
6684 #pragma unused(ifp, cmd, arglen, arg)
6685 return (EOPNOTSUPP);
6686 }
6687
6688 static errno_t
6689 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6690 {
6691 #pragma unused(ifp, fh, pf)
6692 m_freem(m);
6693 return (EJUSTRETURN);
6694 }
6695
6696 static errno_t
6697 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6698 const struct ifnet_demux_desc *da, u_int32_t dc)
6699 {
6700 #pragma unused(ifp, pf, da, dc)
6701 return (EINVAL);
6702 }
6703
6704 static errno_t
6705 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6706 {
6707 #pragma unused(ifp, pf)
6708 return (EINVAL);
6709 }
6710
6711 static errno_t
6712 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6713 {
6714 #pragma unused(ifp, sa)
6715 return (EOPNOTSUPP);
6716 }
6717
6718 #if CONFIG_EMBEDDED
6719 static errno_t
6720 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6721 const struct sockaddr *sa, const char *ll, const char *t,
6722 u_int32_t *pre, u_int32_t *post)
6723 #else
6724 static errno_t
6725 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6726 const struct sockaddr *sa, const char *ll, const char *t)
6727 #endif /* !CONFIG_EMBEDDED */
6728 {
6729 #pragma unused(ifp, m, sa, ll, t)
6730 #if CONFIG_EMBEDDED
6731 return (ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post));
6732 #else
6733 return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
6734 #endif /* !CONFIG_EMBEDDED */
6735 }
6736
6737 static errno_t
6738 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6739 const struct sockaddr *sa, const char *ll, const char *t,
6740 u_int32_t *pre, u_int32_t *post)
6741 {
6742 #pragma unused(ifp, sa, ll, t)
6743 m_freem(*m);
6744 *m = NULL;
6745
6746 if (pre != NULL)
6747 *pre = 0;
6748 if (post != NULL)
6749 *post = 0;
6750
6751 return (EJUSTRETURN);
6752 }
6753
6754 errno_t
6755 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6756 {
6757 #pragma unused(ifp, cmd, arg)
6758 return (EOPNOTSUPP);
6759 }
6760
6761 static errno_t
6762 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6763 {
6764 #pragma unused(ifp, tm, f)
6765 /* XXX not sure what to do here */
6766 return (0);
6767 }
6768
6769 static void
6770 ifp_if_free(struct ifnet *ifp)
6771 {
6772 #pragma unused(ifp)
6773 }
6774
6775 static void
6776 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6777 {
6778 #pragma unused(ifp, e)
6779 }
6780
6781 int dlil_if_acquire(u_int32_t family, const void *uniqueid,
6782 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
6783 {
6784 struct ifnet *ifp1 = NULL;
6785 struct dlil_ifnet *dlifp1 = NULL;
6786 void *buf, *base, **pbuf;
6787 int ret = 0;
6788
6789 VERIFY(*ifp == NULL);
6790 dlil_if_lock();
6791 /*
6792 * We absolutely can't have an interface with the same name
6793 * in in-use state.
6794 * To make sure of that list has to be traversed completely
6795 */
6796 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
6797 ifp1 = (struct ifnet *)dlifp1;
6798
6799 if (ifp1->if_family != family)
6800 continue;
6801
6802 /*
6803 * If interface is in use, return EBUSY if either unique id
6804 * or interface extended names are the same
6805 */
6806 lck_mtx_lock(&dlifp1->dl_if_lock);
6807 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
6808 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6809 lck_mtx_unlock(&dlifp1->dl_if_lock);
6810 ret = EBUSY;
6811 goto end;
6812 }
6813 }
6814
6815 if (uniqueid_len) {
6816 if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
6817 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
6818 if (dlifp1->dl_if_flags & DLIF_INUSE) {
6819 lck_mtx_unlock(&dlifp1->dl_if_lock);
6820 ret = EBUSY;
6821 goto end;
6822 } else {
6823 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
6824 /* Cache the first interface that can be recycled */
6825 if (*ifp == NULL)
6826 *ifp = ifp1;
6827 /*
6828 * XXX Do not break or jump to end as we have to traverse
6829 * the whole list to ensure there are no name collisions
6830 */
6831 }
6832 }
6833 }
6834 lck_mtx_unlock(&dlifp1->dl_if_lock);
6835 }
6836
6837 /* If there's an interface that can be recycled, use that */
6838 if (*ifp != NULL)
6839 goto end;
6840
6841 /* no interface found, allocate a new one */
6842 buf = zalloc(dlif_zone);
6843 if (buf == NULL) {
6844 ret = ENOMEM;
6845 goto end;
6846 }
6847 bzero(buf, dlif_bufsize);
6848
6849 /* Get the 64-bit aligned base address for this object */
6850 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
6851 sizeof (u_int64_t));
6852 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
6853
6854 /*
6855 * Wind back a pointer size from the aligned base and
6856 * save the original address so we can free it later.
6857 */
6858 pbuf = (void **)((intptr_t)base - sizeof (void *));
6859 *pbuf = buf;
6860 dlifp1 = base;
6861
6862 if (uniqueid_len) {
6863 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
6864 M_NKE, M_WAITOK);
6865 if (dlifp1->dl_if_uniqueid == NULL) {
6866 zfree(dlif_zone, buf);
6867 ret = ENOMEM;
6868 goto end;
6869 }
6870 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
6871 dlifp1->dl_if_uniqueid_len = uniqueid_len;
6872 }
6873
6874 ifp1 = (struct ifnet *)dlifp1;
6875 dlifp1->dl_if_flags = DLIF_INUSE;
6876 if (ifnet_debug) {
6877 dlifp1->dl_if_flags |= DLIF_DEBUG;
6878 dlifp1->dl_if_trace = dlil_if_trace;
6879 }
6880 ifp1->if_name = dlifp1->dl_if_namestorage;
6881 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
6882
6883 /* initialize interface description */
6884 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
6885 ifp1->if_desc.ifd_len = 0;
6886 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
6887
6888
6889 #if CONFIG_MACF_NET
6890 mac_ifnet_label_init(ifp1);
6891 #endif
6892
6893 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
6894 DLIL_PRINTF("%s: failed to allocate if local stats, "
6895 "error: %d\n", __func__, ret);
6896 /* This probably shouldn't be fatal */
6897 ret = 0;
6898 }
6899
6900 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
6901 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
6902 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
6903 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
6904 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
6905 ifnet_lock_attr);
6906 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
6907 #if INET
6908 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
6909 ifnet_lock_attr);
6910 ifp1->if_inetdata = NULL;
6911 #endif
6912 #if INET6
6913 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
6914 ifnet_lock_attr);
6915 ifp1->if_inet6data = NULL;
6916 #endif
6917 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
6918 ifnet_lock_attr);
6919 ifp1->if_link_status = NULL;
6920
6921 /* for send data paths */
6922 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
6923 ifnet_lock_attr);
6924 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
6925 ifnet_lock_attr);
6926 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
6927 ifnet_lock_attr);
6928
6929 /* for receive data paths */
6930 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
6931 ifnet_lock_attr);
6932
6933 /* thread call allocation is done with sleeping zalloc */
6934 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
6935 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
6936 if (ifp1->if_dt_tcall == NULL) {
6937 panic_plain("%s: couldn't create if_dt_tcall", __func__);
6938 /* NOTREACHED */
6939 }
6940
6941 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
6942
6943 *ifp = ifp1;
6944
6945 end:
6946 dlil_if_unlock();
6947
6948 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
6949 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
6950
6951 return (ret);
6952 }
6953
6954 __private_extern__ void
6955 dlil_if_release(ifnet_t ifp)
6956 {
6957 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
6958
6959 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
6960 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
6961 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
6962 }
6963
6964 ifnet_lock_exclusive(ifp);
6965 lck_mtx_lock(&dlifp->dl_if_lock);
6966 dlifp->dl_if_flags &= ~DLIF_INUSE;
6967 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
6968 ifp->if_name = dlifp->dl_if_namestorage;
6969 /* Reset external name (name + unit) */
6970 ifp->if_xname = dlifp->dl_if_xnamestorage;
6971 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
6972 "%s?", ifp->if_name);
6973 lck_mtx_unlock(&dlifp->dl_if_lock);
6974 #if CONFIG_MACF_NET
6975 /*
6976 * We can either recycle the MAC label here or in dlil_if_acquire().
6977 * It seems logical to do it here but this means that anything that
6978 * still has a handle on ifp will now see it as unlabeled.
6979 * Since the interface is "dead" that may be OK. Revisit later.
6980 */
6981 mac_ifnet_label_recycle(ifp);
6982 #endif
6983 ifnet_lock_done(ifp);
6984 }
6985
6986 __private_extern__ void
6987 dlil_if_lock(void)
6988 {
6989 lck_mtx_lock(&dlil_ifnet_lock);
6990 }
6991
6992 __private_extern__ void
6993 dlil_if_unlock(void)
6994 {
6995 lck_mtx_unlock(&dlil_ifnet_lock);
6996 }
6997
6998 __private_extern__ void
6999 dlil_if_lock_assert(void)
7000 {
7001 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
7002 }
7003
7004 __private_extern__ void
7005 dlil_proto_unplumb_all(struct ifnet *ifp)
7006 {
7007 /*
7008 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
7009 * each bucket contains exactly one entry; PF_VLAN does not need an
7010 * explicit unplumb.
7011 *
7012 * if_proto_hash[3] is for other protocols; we expect anything
7013 * in this bucket to respond to the DETACHING event (which would
7014 * have happened by now) and do the unplumb then.
7015 */
7016 (void) proto_unplumb(PF_INET, ifp);
7017 #if INET6
7018 (void) proto_unplumb(PF_INET6, ifp);
7019 #endif /* INET6 */
7020 }
7021
7022 static void
7023 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
7024 {
7025 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7026 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7027
7028 route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
7029
7030 lck_mtx_unlock(&ifp->if_cached_route_lock);
7031 }
7032
7033 static void
7034 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
7035 {
7036 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7037 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7038
7039 if (ifp->if_fwd_cacheok) {
7040 route_copyin(src, &ifp->if_src_route, sizeof (*src));
7041 } else {
7042 ROUTE_RELEASE(src);
7043 }
7044 lck_mtx_unlock(&ifp->if_cached_route_lock);
7045 }
7046
7047 #if INET6
7048 static void
7049 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
7050 {
7051 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7052 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7053
7054 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
7055 sizeof (*dst));
7056
7057 lck_mtx_unlock(&ifp->if_cached_route_lock);
7058 }
7059
7060 static void
7061 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
7062 {
7063 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7064 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7065
7066 if (ifp->if_fwd_cacheok) {
7067 route_copyin((struct route *)src,
7068 (struct route *)&ifp->if_src_route6, sizeof (*src));
7069 } else {
7070 ROUTE_RELEASE(src);
7071 }
7072 lck_mtx_unlock(&ifp->if_cached_route_lock);
7073 }
7074 #endif /* INET6 */
7075
7076 struct rtentry *
7077 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
7078 {
7079 struct route src_rt;
7080 struct sockaddr_in *dst;
7081
7082 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
7083
7084 ifp_src_route_copyout(ifp, &src_rt);
7085
7086 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
7087 ROUTE_RELEASE(&src_rt);
7088 if (dst->sin_family != AF_INET) {
7089 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
7090 dst->sin_len = sizeof (src_rt.ro_dst);
7091 dst->sin_family = AF_INET;
7092 }
7093 dst->sin_addr = src_ip;
7094
7095 VERIFY(src_rt.ro_rt == NULL);
7096 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
7097 0, 0, ifp->if_index);
7098
7099 if (src_rt.ro_rt != NULL) {
7100 /* retain a ref, copyin consumes one */
7101 struct rtentry *rte = src_rt.ro_rt;
7102 RT_ADDREF(rte);
7103 ifp_src_route_copyin(ifp, &src_rt);
7104 src_rt.ro_rt = rte;
7105 }
7106 }
7107
7108 return (src_rt.ro_rt);
7109 }
7110
7111 #if INET6
7112 struct rtentry *
7113 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
7114 {
7115 struct route_in6 src_rt;
7116
7117 ifp_src_route6_copyout(ifp, &src_rt);
7118
7119 if (ROUTE_UNUSABLE(&src_rt) ||
7120 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
7121 ROUTE_RELEASE(&src_rt);
7122 if (src_rt.ro_dst.sin6_family != AF_INET6) {
7123 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
7124 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
7125 src_rt.ro_dst.sin6_family = AF_INET6;
7126 }
7127 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
7128 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
7129 sizeof (src_rt.ro_dst.sin6_addr));
7130
7131 if (src_rt.ro_rt == NULL) {
7132 src_rt.ro_rt = rtalloc1_scoped(
7133 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
7134 ifp->if_index);
7135
7136 if (src_rt.ro_rt != NULL) {
7137 /* retain a ref, copyin consumes one */
7138 struct rtentry *rte = src_rt.ro_rt;
7139 RT_ADDREF(rte);
7140 ifp_src_route6_copyin(ifp, &src_rt);
7141 src_rt.ro_rt = rte;
7142 }
7143 }
7144 }
7145
7146 return (src_rt.ro_rt);
7147 }
7148 #endif /* INET6 */
7149
7150 void
7151 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
7152 {
7153 struct kev_dl_link_quality_metric_data ev_lqm_data;
7154
7155 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
7156
7157 /* Normalize to edge */
7158 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
7159 lqm = IFNET_LQM_THRESH_ABORT;
7160 atomic_bitset_32(&tcbinfo.ipi_flags,
7161 INPCBINFO_HANDLE_LQM_ABORT);
7162 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
7163 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
7164 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
7165 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
7166 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
7167 lqm <= IFNET_LQM_THRESH_POOR) {
7168 lqm = IFNET_LQM_THRESH_POOR;
7169 } else if (lqm > IFNET_LQM_THRESH_POOR &&
7170 lqm <= IFNET_LQM_THRESH_GOOD) {
7171 lqm = IFNET_LQM_THRESH_GOOD;
7172 }
7173
7174 /*
7175 * Take the lock if needed
7176 */
7177 if (!locked)
7178 ifnet_lock_exclusive(ifp);
7179
7180 if (lqm == ifp->if_interface_state.lqm_state &&
7181 (ifp->if_interface_state.valid_bitmask &
7182 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
7183 /*
7184 * Release the lock if was not held by the caller
7185 */
7186 if (!locked)
7187 ifnet_lock_done(ifp);
7188 return; /* nothing to update */
7189 }
7190 ifp->if_interface_state.valid_bitmask |=
7191 IF_INTERFACE_STATE_LQM_STATE_VALID;
7192 ifp->if_interface_state.lqm_state = lqm;
7193
7194 /*
7195 * Don't want to hold the lock when issuing kernel events
7196 */
7197 ifnet_lock_done(ifp);
7198
7199 bzero(&ev_lqm_data, sizeof (ev_lqm_data));
7200 ev_lqm_data.link_quality_metric = lqm;
7201
7202 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
7203 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
7204
7205 /*
7206 * Reacquire the lock for the caller
7207 */
7208 if (locked)
7209 ifnet_lock_exclusive(ifp);
7210 }
7211
7212 static void
7213 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
7214 {
7215 struct kev_dl_rrc_state kev;
7216
7217 if (rrc_state == ifp->if_interface_state.rrc_state &&
7218 (ifp->if_interface_state.valid_bitmask &
7219 IF_INTERFACE_STATE_RRC_STATE_VALID))
7220 return;
7221
7222 ifp->if_interface_state.valid_bitmask |=
7223 IF_INTERFACE_STATE_RRC_STATE_VALID;
7224
7225 ifp->if_interface_state.rrc_state = rrc_state;
7226
7227 /*
7228 * Don't want to hold the lock when issuing kernel events
7229 */
7230 ifnet_lock_done(ifp);
7231
7232 bzero(&kev, sizeof(struct kev_dl_rrc_state));
7233 kev.rrc_state = rrc_state;
7234
7235 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
7236 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
7237
7238 ifnet_lock_exclusive(ifp);
7239 }
7240
7241 errno_t
7242 if_state_update(struct ifnet *ifp,
7243 struct if_interface_state *if_interface_state)
7244 {
7245 u_short if_index_available = 0;
7246
7247 ifnet_lock_exclusive(ifp);
7248
7249 if ((ifp->if_type != IFT_CELLULAR) &&
7250 (if_interface_state->valid_bitmask &
7251 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
7252 ifnet_lock_done(ifp);
7253 return (ENOTSUP);
7254 }
7255 if ((if_interface_state->valid_bitmask &
7256 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
7257 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
7258 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
7259 ifnet_lock_done(ifp);
7260 return (EINVAL);
7261 }
7262 if ((if_interface_state->valid_bitmask &
7263 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
7264 if_interface_state->rrc_state !=
7265 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
7266 if_interface_state->rrc_state !=
7267 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
7268 ifnet_lock_done(ifp);
7269 return (EINVAL);
7270 }
7271
7272 if (if_interface_state->valid_bitmask &
7273 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7274 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
7275 }
7276 if (if_interface_state->valid_bitmask &
7277 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7278 if_rrc_state_update(ifp, if_interface_state->rrc_state);
7279 }
7280 if (if_interface_state->valid_bitmask &
7281 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7282 ifp->if_interface_state.valid_bitmask |=
7283 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7284 ifp->if_interface_state.interface_availability =
7285 if_interface_state->interface_availability;
7286
7287 if (ifp->if_interface_state.interface_availability ==
7288 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
7289 if_index_available = ifp->if_index;
7290 }
7291 }
7292 ifnet_lock_done(ifp);
7293
7294 /*
7295 * Check if the TCP connections going on this interface should be
7296 * forced to send probe packets instead of waiting for TCP timers
7297 * to fire. This will be done when there is an explicit
7298 * notification that the interface became available.
7299 */
7300 if (if_index_available > 0)
7301 tcp_interface_send_probe(if_index_available);
7302
7303 return (0);
7304 }
7305
7306 void
7307 if_get_state(struct ifnet *ifp,
7308 struct if_interface_state *if_interface_state)
7309 {
7310 ifnet_lock_shared(ifp);
7311
7312 if_interface_state->valid_bitmask = 0;
7313
7314 if (ifp->if_interface_state.valid_bitmask &
7315 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7316 if_interface_state->valid_bitmask |=
7317 IF_INTERFACE_STATE_RRC_STATE_VALID;
7318 if_interface_state->rrc_state =
7319 ifp->if_interface_state.rrc_state;
7320 }
7321 if (ifp->if_interface_state.valid_bitmask &
7322 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7323 if_interface_state->valid_bitmask |=
7324 IF_INTERFACE_STATE_LQM_STATE_VALID;
7325 if_interface_state->lqm_state =
7326 ifp->if_interface_state.lqm_state;
7327 }
7328 if (ifp->if_interface_state.valid_bitmask &
7329 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7330 if_interface_state->valid_bitmask |=
7331 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7332 if_interface_state->interface_availability =
7333 ifp->if_interface_state.interface_availability;
7334 }
7335
7336 ifnet_lock_done(ifp);
7337 }
7338
7339 errno_t
7340 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
7341 {
7342 ifnet_lock_exclusive(ifp);
7343 if (conn_probe > 1) {
7344 ifnet_lock_done(ifp);
7345 return (EINVAL);
7346 }
7347 if (conn_probe == 0)
7348 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
7349 else
7350 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
7351 ifnet_lock_done(ifp);
7352
7353 #if NECP
7354 necp_update_all_clients();
7355 #endif /* NECP */
7356
7357 tcp_probe_connectivity(ifp, conn_probe);
7358 return (0);
7359 }
7360
7361 /* for uuid.c */
7362 int
7363 uuid_get_ethernet(u_int8_t *node)
7364 {
7365 struct ifnet *ifp;
7366 struct sockaddr_dl *sdl;
7367
7368 ifnet_head_lock_shared();
7369 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
7370 ifnet_lock_shared(ifp);
7371 IFA_LOCK_SPIN(ifp->if_lladdr);
7372 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
7373 if (sdl->sdl_type == IFT_ETHER) {
7374 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
7375 IFA_UNLOCK(ifp->if_lladdr);
7376 ifnet_lock_done(ifp);
7377 ifnet_head_done();
7378 return (0);
7379 }
7380 IFA_UNLOCK(ifp->if_lladdr);
7381 ifnet_lock_done(ifp);
7382 }
7383 ifnet_head_done();
7384
7385 return (-1);
7386 }
7387
7388 static int
7389 sysctl_rxpoll SYSCTL_HANDLER_ARGS
7390 {
7391 #pragma unused(arg1, arg2)
7392 uint32_t i;
7393 int err;
7394
7395 i = if_rxpoll;
7396
7397 err = sysctl_handle_int(oidp, &i, 0, req);
7398 if (err != 0 || req->newptr == USER_ADDR_NULL)
7399 return (err);
7400
7401 if (net_rxpoll == 0)
7402 return (ENXIO);
7403
7404 if_rxpoll = i;
7405 return (err);
7406 }
7407
7408 static int
7409 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7410 {
7411 #pragma unused(arg1, arg2)
7412 uint64_t q;
7413 int err;
7414
7415 q = if_rxpoll_mode_holdtime;
7416
7417 err = sysctl_handle_quad(oidp, &q, 0, req);
7418 if (err != 0 || req->newptr == USER_ADDR_NULL)
7419 return (err);
7420
7421 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN)
7422 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
7423
7424 if_rxpoll_mode_holdtime = q;
7425
7426 return (err);
7427 }
7428
7429 static int
7430 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7431 {
7432 #pragma unused(arg1, arg2)
7433 uint64_t q;
7434 int err;
7435
7436 q = if_rxpoll_sample_holdtime;
7437
7438 err = sysctl_handle_quad(oidp, &q, 0, req);
7439 if (err != 0 || req->newptr == USER_ADDR_NULL)
7440 return (err);
7441
7442 if (q < IF_RXPOLL_SAMPLETIME_MIN)
7443 q = IF_RXPOLL_SAMPLETIME_MIN;
7444
7445 if_rxpoll_sample_holdtime = q;
7446
7447 return (err);
7448 }
7449
7450 static int
7451 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7452 {
7453 #pragma unused(arg1, arg2)
7454 uint64_t q;
7455 int err;
7456
7457 q = if_rxpoll_interval_time;
7458
7459 err = sysctl_handle_quad(oidp, &q, 0, req);
7460 if (err != 0 || req->newptr == USER_ADDR_NULL)
7461 return (err);
7462
7463 if (q < IF_RXPOLL_INTERVALTIME_MIN)
7464 q = IF_RXPOLL_INTERVALTIME_MIN;
7465
7466 if_rxpoll_interval_time = q;
7467
7468 return (err);
7469 }
7470
7471 static int
7472 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7473 {
7474 #pragma unused(arg1, arg2)
7475 uint32_t i;
7476 int err;
7477
7478 i = if_rxpoll_wlowat;
7479
7480 err = sysctl_handle_int(oidp, &i, 0, req);
7481 if (err != 0 || req->newptr == USER_ADDR_NULL)
7482 return (err);
7483
7484 if (i == 0 || i >= if_rxpoll_whiwat)
7485 return (EINVAL);
7486
7487 if_rxpoll_wlowat = i;
7488 return (err);
7489 }
7490
7491 static int
7492 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7493 {
7494 #pragma unused(arg1, arg2)
7495 uint32_t i;
7496 int err;
7497
7498 i = if_rxpoll_whiwat;
7499
7500 err = sysctl_handle_int(oidp, &i, 0, req);
7501 if (err != 0 || req->newptr == USER_ADDR_NULL)
7502 return (err);
7503
7504 if (i <= if_rxpoll_wlowat)
7505 return (EINVAL);
7506
7507 if_rxpoll_whiwat = i;
7508 return (err);
7509 }
7510
7511 static int
7512 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
7513 {
7514 #pragma unused(arg1, arg2)
7515 int i, err;
7516
7517 i = if_sndq_maxlen;
7518
7519 err = sysctl_handle_int(oidp, &i, 0, req);
7520 if (err != 0 || req->newptr == USER_ADDR_NULL)
7521 return (err);
7522
7523 if (i < IF_SNDQ_MINLEN)
7524 i = IF_SNDQ_MINLEN;
7525
7526 if_sndq_maxlen = i;
7527 return (err);
7528 }
7529
7530 static int
7531 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
7532 {
7533 #pragma unused(arg1, arg2)
7534 int i, err;
7535
7536 i = if_rcvq_maxlen;
7537
7538 err = sysctl_handle_int(oidp, &i, 0, req);
7539 if (err != 0 || req->newptr == USER_ADDR_NULL)
7540 return (err);
7541
7542 if (i < IF_RCVQ_MINLEN)
7543 i = IF_RCVQ_MINLEN;
7544
7545 if_rcvq_maxlen = i;
7546 return (err);
7547 }
7548
7549 void
7550 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
7551 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
7552 {
7553 struct kev_dl_node_presence kev;
7554 struct sockaddr_dl *sdl;
7555 struct sockaddr_in6 *sin6;
7556
7557 VERIFY(ifp);
7558 VERIFY(sa);
7559 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7560
7561 bzero(&kev, sizeof (kev));
7562 sin6 = &kev.sin6_node_address;
7563 sdl = &kev.sdl_node_address;
7564 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7565 kev.rssi = rssi;
7566 kev.link_quality_metric = lqm;
7567 kev.node_proximity_metric = npm;
7568 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
7569
7570 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
7571 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
7572 &kev.link_data, sizeof (kev));
7573 }
7574
7575 void
7576 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
7577 {
7578 struct kev_dl_node_absence kev;
7579 struct sockaddr_in6 *sin6;
7580 struct sockaddr_dl *sdl;
7581
7582 VERIFY(ifp);
7583 VERIFY(sa);
7584 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
7585
7586 bzero(&kev, sizeof (kev));
7587 sin6 = &kev.sin6_node_address;
7588 sdl = &kev.sdl_node_address;
7589 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
7590
7591 nd6_alt_node_absent(ifp, sin6);
7592 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
7593 &kev.link_data, sizeof (kev));
7594 }
7595
7596 const void *
7597 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
7598 kauth_cred_t *credp)
7599 {
7600 const u_int8_t *bytes;
7601 size_t size;
7602
7603 bytes = CONST_LLADDR(sdl);
7604 size = sdl->sdl_alen;
7605
7606 #if CONFIG_MACF
7607 if (dlil_lladdr_ckreq) {
7608 switch (sdl->sdl_type) {
7609 case IFT_ETHER:
7610 case IFT_IEEE1394:
7611 break;
7612 default:
7613 credp = NULL;
7614 break;
7615 };
7616
7617 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
7618 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
7619 [0] = 2
7620 };
7621
7622 bytes = unspec;
7623 }
7624 }
7625 #else
7626 #pragma unused(credp)
7627 #endif
7628
7629 if (sizep != NULL) *sizep = size;
7630 return (bytes);
7631 }
7632
7633 void
7634 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
7635 u_int8_t info[DLIL_MODARGLEN])
7636 {
7637 struct kev_dl_issues kev;
7638 struct timeval tv;
7639
7640 VERIFY(ifp != NULL);
7641 VERIFY(modid != NULL);
7642 _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
7643 _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
7644
7645 bzero(&kev, sizeof (kev));
7646
7647 microtime(&tv);
7648 kev.timestamp = tv.tv_sec;
7649 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
7650 if (info != NULL)
7651 bcopy(info, &kev.info, DLIL_MODARGLEN);
7652
7653 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
7654 &kev.link_data, sizeof (kev));
7655 }
7656
7657 errno_t
7658 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7659 struct proc *p)
7660 {
7661 u_int32_t level = IFNET_THROTTLE_OFF;
7662 errno_t result = 0;
7663
7664 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
7665
7666 if (cmd == SIOCSIFOPPORTUNISTIC) {
7667 /*
7668 * XXX: Use priv_check_cred() instead of root check?
7669 */
7670 if ((result = proc_suser(p)) != 0)
7671 return (result);
7672
7673 if (ifr->ifr_opportunistic.ifo_flags ==
7674 IFRIFOF_BLOCK_OPPORTUNISTIC)
7675 level = IFNET_THROTTLE_OPPORTUNISTIC;
7676 else if (ifr->ifr_opportunistic.ifo_flags == 0)
7677 level = IFNET_THROTTLE_OFF;
7678 else
7679 result = EINVAL;
7680
7681 if (result == 0)
7682 result = ifnet_set_throttle(ifp, level);
7683 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
7684 ifr->ifr_opportunistic.ifo_flags = 0;
7685 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
7686 ifr->ifr_opportunistic.ifo_flags |=
7687 IFRIFOF_BLOCK_OPPORTUNISTIC;
7688 }
7689 }
7690
7691 /*
7692 * Return the count of current opportunistic connections
7693 * over the interface.
7694 */
7695 if (result == 0) {
7696 uint32_t flags = 0;
7697 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
7698 INPCB_OPPORTUNISTIC_SETCMD : 0;
7699 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
7700 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
7701 ifr->ifr_opportunistic.ifo_inuse =
7702 udp_count_opportunistic(ifp->if_index, flags) +
7703 tcp_count_opportunistic(ifp->if_index, flags);
7704 }
7705
7706 if (result == EALREADY)
7707 result = 0;
7708
7709 return (result);
7710 }
7711
7712 int
7713 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
7714 {
7715 struct ifclassq *ifq;
7716 int err = 0;
7717
7718 if (!(ifp->if_eflags & IFEF_TXSTART))
7719 return (ENXIO);
7720
7721 *level = IFNET_THROTTLE_OFF;
7722
7723 ifq = &ifp->if_snd;
7724 IFCQ_LOCK(ifq);
7725 /* Throttling works only for IFCQ, not ALTQ instances */
7726 if (IFCQ_IS_ENABLED(ifq))
7727 IFCQ_GET_THROTTLE(ifq, *level, err);
7728 IFCQ_UNLOCK(ifq);
7729
7730 return (err);
7731 }
7732
7733 int
7734 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
7735 {
7736 struct ifclassq *ifq;
7737 int err = 0;
7738
7739 if (!(ifp->if_eflags & IFEF_TXSTART))
7740 return (ENXIO);
7741
7742 ifq = &ifp->if_snd;
7743
7744 switch (level) {
7745 case IFNET_THROTTLE_OFF:
7746 case IFNET_THROTTLE_OPPORTUNISTIC:
7747 break;
7748 default:
7749 return (EINVAL);
7750 }
7751
7752 IFCQ_LOCK(ifq);
7753 if (IFCQ_IS_ENABLED(ifq))
7754 IFCQ_SET_THROTTLE(ifq, level, err);
7755 IFCQ_UNLOCK(ifq);
7756
7757 if (err == 0) {
7758 printf("%s: throttling level set to %d\n", if_name(ifp),
7759 level);
7760 if (level == IFNET_THROTTLE_OFF)
7761 ifnet_start(ifp);
7762 }
7763
7764 return (err);
7765 }
7766
7767 errno_t
7768 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7769 struct proc *p)
7770 {
7771 #pragma unused(p)
7772 errno_t result = 0;
7773 uint32_t flags;
7774 int level, category, subcategory;
7775
7776 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7777
7778 if (cmd == SIOCSIFLOG) {
7779 if ((result = priv_check_cred(kauth_cred_get(),
7780 PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
7781 return (result);
7782
7783 level = ifr->ifr_log.ifl_level;
7784 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX)
7785 result = EINVAL;
7786
7787 flags = ifr->ifr_log.ifl_flags;
7788 if ((flags &= IFNET_LOGF_MASK) == 0)
7789 result = EINVAL;
7790
7791 category = ifr->ifr_log.ifl_category;
7792 subcategory = ifr->ifr_log.ifl_subcategory;
7793
7794 if (result == 0)
7795 result = ifnet_set_log(ifp, level, flags,
7796 category, subcategory);
7797 } else {
7798 result = ifnet_get_log(ifp, &level, &flags, &category,
7799 &subcategory);
7800 if (result == 0) {
7801 ifr->ifr_log.ifl_level = level;
7802 ifr->ifr_log.ifl_flags = flags;
7803 ifr->ifr_log.ifl_category = category;
7804 ifr->ifr_log.ifl_subcategory = subcategory;
7805 }
7806 }
7807
7808 return (result);
7809 }
7810
7811 int
7812 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7813 int32_t category, int32_t subcategory)
7814 {
7815 int err = 0;
7816
7817 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7818 VERIFY(flags & IFNET_LOGF_MASK);
7819
7820 /*
7821 * The logging level applies to all facilities; make sure to
7822 * update them all with the most current level.
7823 */
7824 flags |= ifp->if_log.flags;
7825
7826 if (ifp->if_output_ctl != NULL) {
7827 struct ifnet_log_params l;
7828
7829 bzero(&l, sizeof (l));
7830 l.level = level;
7831 l.flags = flags;
7832 l.flags &= ~IFNET_LOGF_DLIL;
7833 l.category = category;
7834 l.subcategory = subcategory;
7835
7836 /* Send this request to lower layers */
7837 if (l.flags != 0) {
7838 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7839 sizeof (l), &l);
7840 }
7841 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7842 /*
7843 * If targeted to the lower layers without an output
7844 * control callback registered on the interface, just
7845 * silently ignore facilities other than ours.
7846 */
7847 flags &= IFNET_LOGF_DLIL;
7848 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL)))
7849 level = 0;
7850 }
7851
7852 if (err == 0) {
7853 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT)
7854 ifp->if_log.flags = 0;
7855 else
7856 ifp->if_log.flags |= flags;
7857
7858 log(LOG_INFO, "%s: logging level set to %d flags=%b "
7859 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
7860 ifp->if_log.level, ifp->if_log.flags,
7861 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
7862 category, subcategory);
7863 }
7864
7865 return (err);
7866 }
7867
7868 int
7869 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7870 int32_t *category, int32_t *subcategory)
7871 {
7872 if (level != NULL)
7873 *level = ifp->if_log.level;
7874 if (flags != NULL)
7875 *flags = ifp->if_log.flags;
7876 if (category != NULL)
7877 *category = ifp->if_log.category;
7878 if (subcategory != NULL)
7879 *subcategory = ifp->if_log.subcategory;
7880
7881 return (0);
7882 }
7883
7884 int
7885 ifnet_notify_address(struct ifnet *ifp, int af)
7886 {
7887 struct ifnet_notify_address_params na;
7888
7889 #if PF
7890 (void) pf_ifaddr_hook(ifp);
7891 #endif /* PF */
7892
7893 if (ifp->if_output_ctl == NULL)
7894 return (EOPNOTSUPP);
7895
7896 bzero(&na, sizeof (na));
7897 na.address_family = af;
7898
7899 return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7900 sizeof (na), &na));
7901 }
7902
7903 errno_t
7904 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7905 {
7906 if (ifp == NULL || flowid == NULL) {
7907 return (EINVAL);
7908 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7909 !IF_FULLY_ATTACHED(ifp)) {
7910 return (ENXIO);
7911 }
7912
7913 *flowid = ifp->if_flowhash;
7914
7915 return (0);
7916 }
7917
7918 errno_t
7919 ifnet_disable_output(struct ifnet *ifp)
7920 {
7921 int err;
7922
7923 if (ifp == NULL) {
7924 return (EINVAL);
7925 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7926 !IF_FULLY_ATTACHED(ifp)) {
7927 return (ENXIO);
7928 }
7929
7930 if ((err = ifnet_fc_add(ifp)) == 0) {
7931 lck_mtx_lock_spin(&ifp->if_start_lock);
7932 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7933 lck_mtx_unlock(&ifp->if_start_lock);
7934 }
7935 return (err);
7936 }
7937
7938 errno_t
7939 ifnet_enable_output(struct ifnet *ifp)
7940 {
7941 if (ifp == NULL) {
7942 return (EINVAL);
7943 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7944 !IF_FULLY_ATTACHED(ifp)) {
7945 return (ENXIO);
7946 }
7947
7948 ifnet_start_common(ifp, TRUE);
7949 return (0);
7950 }
7951
7952 void
7953 ifnet_flowadv(uint32_t flowhash)
7954 {
7955 struct ifnet_fc_entry *ifce;
7956 struct ifnet *ifp;
7957
7958 ifce = ifnet_fc_get(flowhash);
7959 if (ifce == NULL)
7960 return;
7961
7962 VERIFY(ifce->ifce_ifp != NULL);
7963 ifp = ifce->ifce_ifp;
7964
7965 /* flow hash gets recalculated per attach, so check */
7966 if (ifnet_is_attached(ifp, 1)) {
7967 if (ifp->if_flowhash == flowhash)
7968 (void) ifnet_enable_output(ifp);
7969 ifnet_decr_iorefcnt(ifp);
7970 }
7971 ifnet_fc_entry_free(ifce);
7972 }
7973
7974 /*
7975 * Function to compare ifnet_fc_entries in ifnet flow control tree
7976 */
7977 static inline int
7978 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7979 {
7980 return (fc1->ifce_flowhash - fc2->ifce_flowhash);
7981 }
7982
7983 static int
7984 ifnet_fc_add(struct ifnet *ifp)
7985 {
7986 struct ifnet_fc_entry keyfc, *ifce;
7987 uint32_t flowhash;
7988
7989 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7990 VERIFY(ifp->if_flowhash != 0);
7991 flowhash = ifp->if_flowhash;
7992
7993 bzero(&keyfc, sizeof (keyfc));
7994 keyfc.ifce_flowhash = flowhash;
7995
7996 lck_mtx_lock_spin(&ifnet_fc_lock);
7997 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7998 if (ifce != NULL && ifce->ifce_ifp == ifp) {
7999 /* Entry is already in ifnet_fc_tree, return */
8000 lck_mtx_unlock(&ifnet_fc_lock);
8001 return (0);
8002 }
8003
8004 if (ifce != NULL) {
8005 /*
8006 * There is a different fc entry with the same flow hash
8007 * but different ifp pointer. There can be a collision
8008 * on flow hash but the probability is low. Let's just
8009 * avoid adding a second one when there is a collision.
8010 */
8011 lck_mtx_unlock(&ifnet_fc_lock);
8012 return (EAGAIN);
8013 }
8014
8015 /* become regular mutex */
8016 lck_mtx_convert_spin(&ifnet_fc_lock);
8017
8018 ifce = zalloc(ifnet_fc_zone);
8019 if (ifce == NULL) {
8020 /* memory allocation failed */
8021 lck_mtx_unlock(&ifnet_fc_lock);
8022 return (ENOMEM);
8023 }
8024 bzero(ifce, ifnet_fc_zone_size);
8025
8026 ifce->ifce_flowhash = flowhash;
8027 ifce->ifce_ifp = ifp;
8028
8029 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
8030 lck_mtx_unlock(&ifnet_fc_lock);
8031 return (0);
8032 }
8033
8034 static struct ifnet_fc_entry *
8035 ifnet_fc_get(uint32_t flowhash)
8036 {
8037 struct ifnet_fc_entry keyfc, *ifce;
8038 struct ifnet *ifp;
8039
8040 bzero(&keyfc, sizeof (keyfc));
8041 keyfc.ifce_flowhash = flowhash;
8042
8043 lck_mtx_lock_spin(&ifnet_fc_lock);
8044 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
8045 if (ifce == NULL) {
8046 /* Entry is not present in ifnet_fc_tree, return */
8047 lck_mtx_unlock(&ifnet_fc_lock);
8048 return (NULL);
8049 }
8050
8051 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
8052
8053 VERIFY(ifce->ifce_ifp != NULL);
8054 ifp = ifce->ifce_ifp;
8055
8056 /* become regular mutex */
8057 lck_mtx_convert_spin(&ifnet_fc_lock);
8058
8059 if (!ifnet_is_attached(ifp, 0)) {
8060 /*
8061 * This ifp is not attached or in the process of being
8062 * detached; just don't process it.
8063 */
8064 ifnet_fc_entry_free(ifce);
8065 ifce = NULL;
8066 }
8067 lck_mtx_unlock(&ifnet_fc_lock);
8068
8069 return (ifce);
8070 }
8071
8072 static void
8073 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
8074 {
8075 zfree(ifnet_fc_zone, ifce);
8076 }
8077
8078 static uint32_t
8079 ifnet_calc_flowhash(struct ifnet *ifp)
8080 {
8081 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
8082 uint32_t flowhash = 0;
8083
8084 if (ifnet_flowhash_seed == 0)
8085 ifnet_flowhash_seed = RandomULong();
8086
8087 bzero(&fh, sizeof (fh));
8088
8089 (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name);
8090 fh.ifk_unit = ifp->if_unit;
8091 fh.ifk_flags = ifp->if_flags;
8092 fh.ifk_eflags = ifp->if_eflags;
8093 fh.ifk_capabilities = ifp->if_capabilities;
8094 fh.ifk_capenable = ifp->if_capenable;
8095 fh.ifk_output_sched_model = ifp->if_output_sched_model;
8096 fh.ifk_rand1 = RandomULong();
8097 fh.ifk_rand2 = RandomULong();
8098
8099 try_again:
8100 flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed);
8101 if (flowhash == 0) {
8102 /* try to get a non-zero flowhash */
8103 ifnet_flowhash_seed = RandomULong();
8104 goto try_again;
8105 }
8106
8107 return (flowhash);
8108 }
8109
8110 int
8111 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
8112 uint16_t flags, uint8_t *data)
8113 {
8114 #pragma unused(flags)
8115 int error = 0;
8116
8117 switch (family) {
8118 case AF_INET:
8119 if_inetdata_lock_exclusive(ifp);
8120 if (IN_IFEXTRA(ifp) != NULL) {
8121 if (len == 0) {
8122 /* Allow clearing the signature */
8123 IN_IFEXTRA(ifp)->netsig_len = 0;
8124 bzero(IN_IFEXTRA(ifp)->netsig,
8125 sizeof (IN_IFEXTRA(ifp)->netsig));
8126 if_inetdata_lock_done(ifp);
8127 break;
8128 } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) {
8129 error = EINVAL;
8130 if_inetdata_lock_done(ifp);
8131 break;
8132 }
8133 IN_IFEXTRA(ifp)->netsig_len = len;
8134 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
8135 } else {
8136 error = ENOMEM;
8137 }
8138 if_inetdata_lock_done(ifp);
8139 break;
8140
8141 case AF_INET6:
8142 if_inet6data_lock_exclusive(ifp);
8143 if (IN6_IFEXTRA(ifp) != NULL) {
8144 if (len == 0) {
8145 /* Allow clearing the signature */
8146 IN6_IFEXTRA(ifp)->netsig_len = 0;
8147 bzero(IN6_IFEXTRA(ifp)->netsig,
8148 sizeof (IN6_IFEXTRA(ifp)->netsig));
8149 if_inet6data_lock_done(ifp);
8150 break;
8151 } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) {
8152 error = EINVAL;
8153 if_inet6data_lock_done(ifp);
8154 break;
8155 }
8156 IN6_IFEXTRA(ifp)->netsig_len = len;
8157 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
8158 } else {
8159 error = ENOMEM;
8160 }
8161 if_inet6data_lock_done(ifp);
8162 break;
8163
8164 default:
8165 error = EINVAL;
8166 break;
8167 }
8168
8169 return (error);
8170 }
8171
8172 int
8173 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
8174 uint16_t *flags, uint8_t *data)
8175 {
8176 int error = 0;
8177
8178 if (ifp == NULL || len == NULL || data == NULL)
8179 return (EINVAL);
8180
8181 switch (family) {
8182 case AF_INET:
8183 if_inetdata_lock_shared(ifp);
8184 if (IN_IFEXTRA(ifp) != NULL) {
8185 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
8186 error = EINVAL;
8187 if_inetdata_lock_done(ifp);
8188 break;
8189 }
8190 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0)
8191 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
8192 else
8193 error = ENOENT;
8194 } else {
8195 error = ENOMEM;
8196 }
8197 if_inetdata_lock_done(ifp);
8198 break;
8199
8200 case AF_INET6:
8201 if_inet6data_lock_shared(ifp);
8202 if (IN6_IFEXTRA(ifp) != NULL) {
8203 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
8204 error = EINVAL;
8205 if_inet6data_lock_done(ifp);
8206 break;
8207 }
8208 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0)
8209 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
8210 else
8211 error = ENOENT;
8212 } else {
8213 error = ENOMEM;
8214 }
8215 if_inet6data_lock_done(ifp);
8216 break;
8217
8218 default:
8219 error = EINVAL;
8220 break;
8221 }
8222
8223 if (error == 0 && flags != NULL)
8224 *flags = 0;
8225
8226 return (error);
8227 }
8228
8229 #if INET6
8230 int
8231 ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8232 {
8233 int i, error = 0, one_set = 0;
8234
8235 if_inet6data_lock_exclusive(ifp);
8236
8237 if (IN6_IFEXTRA(ifp) == NULL) {
8238 error = ENOMEM;
8239 goto out;
8240 }
8241
8242 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8243 uint32_t prefix_len =
8244 prefixes[i].prefix_len;
8245 struct in6_addr *prefix =
8246 &prefixes[i].ipv6_prefix;
8247
8248 if (prefix_len == 0) {
8249 /* Allow clearing the signature */
8250 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
8251 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8252 sizeof(struct in6_addr));
8253
8254 continue;
8255 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
8256 prefix_len != NAT64_PREFIX_LEN_40 &&
8257 prefix_len != NAT64_PREFIX_LEN_48 &&
8258 prefix_len != NAT64_PREFIX_LEN_56 &&
8259 prefix_len != NAT64_PREFIX_LEN_64 &&
8260 prefix_len != NAT64_PREFIX_LEN_96) {
8261 error = EINVAL;
8262 goto out;
8263 }
8264
8265 if (IN6_IS_SCOPE_EMBED(prefix)) {
8266 error = EINVAL;
8267 goto out;
8268 }
8269
8270 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
8271 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8272 sizeof(struct in6_addr));
8273 one_set = 1;
8274 }
8275
8276 out:
8277 if_inet6data_lock_done(ifp);
8278
8279 if (error == 0 && one_set != 0)
8280 necp_update_all_clients();
8281
8282 return (error);
8283 }
8284
8285 int
8286 ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8287 {
8288 int i, found_one = 0, error = 0;
8289
8290 if (ifp == NULL)
8291 return (EINVAL);
8292
8293 if_inet6data_lock_shared(ifp);
8294
8295 if (IN6_IFEXTRA(ifp) == NULL) {
8296 error = ENOMEM;
8297 goto out;
8298 }
8299
8300 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8301 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0)
8302 found_one = 1;
8303 }
8304
8305 if (found_one == 0) {
8306 error = ENOENT;
8307 goto out;
8308 }
8309
8310 if (prefixes)
8311 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
8312 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
8313
8314 out:
8315 if_inet6data_lock_done(ifp);
8316
8317 return (error);
8318 }
8319 #endif
8320
8321 static void
8322 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
8323 protocol_family_t pf)
8324 {
8325 #pragma unused(ifp)
8326 uint32_t did_sw;
8327
8328 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
8329 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6)))
8330 return;
8331
8332 switch (pf) {
8333 case PF_INET:
8334 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
8335 if (did_sw & CSUM_DELAY_IP)
8336 hwcksum_dbg_finalized_hdr++;
8337 if (did_sw & CSUM_DELAY_DATA)
8338 hwcksum_dbg_finalized_data++;
8339 break;
8340 #if INET6
8341 case PF_INET6:
8342 /*
8343 * Checksum offload should not have been enabled when
8344 * extension headers exist; that also means that we
8345 * cannot force-finalize packets with extension headers.
8346 * Indicate to the callee should it skip such case by
8347 * setting optlen to -1.
8348 */
8349 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
8350 m->m_pkthdr.csum_flags);
8351 if (did_sw & CSUM_DELAY_IPV6_DATA)
8352 hwcksum_dbg_finalized_data++;
8353 break;
8354 #endif /* INET6 */
8355 default:
8356 return;
8357 }
8358 }
8359
8360 static void
8361 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
8362 protocol_family_t pf)
8363 {
8364 uint16_t sum = 0;
8365 uint32_t hlen;
8366
8367 if (frame_header == NULL ||
8368 frame_header < (char *)mbuf_datastart(m) ||
8369 frame_header > (char *)m->m_data) {
8370 printf("%s: frame header pointer 0x%llx out of range "
8371 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
8372 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
8373 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
8374 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
8375 (uint64_t)VM_KERNEL_ADDRPERM(m));
8376 return;
8377 }
8378 hlen = (m->m_data - frame_header);
8379
8380 switch (pf) {
8381 case PF_INET:
8382 #if INET6
8383 case PF_INET6:
8384 #endif /* INET6 */
8385 break;
8386 default:
8387 return;
8388 }
8389
8390 /*
8391 * Force partial checksum offload; useful to simulate cases
8392 * where the hardware does not support partial checksum offload,
8393 * in order to validate correctness throughout the layers above.
8394 */
8395 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
8396 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
8397
8398 if (foff > (uint32_t)m->m_pkthdr.len)
8399 return;
8400
8401 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
8402
8403 /* Compute 16-bit 1's complement sum from forced offset */
8404 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
8405
8406 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
8407 m->m_pkthdr.csum_rx_val = sum;
8408 m->m_pkthdr.csum_rx_start = (foff + hlen);
8409
8410 hwcksum_dbg_partial_forced++;
8411 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
8412 }
8413
8414 /*
8415 * Partial checksum offload verification (and adjustment);
8416 * useful to validate and test cases where the hardware
8417 * supports partial checksum offload.
8418 */
8419 if ((m->m_pkthdr.csum_flags &
8420 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
8421 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
8422 uint32_t rxoff;
8423
8424 /* Start offset must begin after frame header */
8425 rxoff = m->m_pkthdr.csum_rx_start;
8426 if (hlen > rxoff) {
8427 hwcksum_dbg_bad_rxoff++;
8428 if (dlil_verbose) {
8429 printf("%s: partial cksum start offset %d "
8430 "is less than frame header length %d for "
8431 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
8432 (uint64_t)VM_KERNEL_ADDRPERM(m));
8433 }
8434 return;
8435 }
8436 rxoff -= hlen;
8437
8438 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
8439 /*
8440 * Compute the expected 16-bit 1's complement sum;
8441 * skip this if we've already computed it above
8442 * when partial checksum offload is forced.
8443 */
8444 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
8445
8446 /* Hardware or driver is buggy */
8447 if (sum != m->m_pkthdr.csum_rx_val) {
8448 hwcksum_dbg_bad_cksum++;
8449 if (dlil_verbose) {
8450 printf("%s: bad partial cksum value "
8451 "0x%x (expected 0x%x) for mbuf "
8452 "0x%llx [rx_start %d]\n",
8453 if_name(ifp),
8454 m->m_pkthdr.csum_rx_val, sum,
8455 (uint64_t)VM_KERNEL_ADDRPERM(m),
8456 m->m_pkthdr.csum_rx_start);
8457 }
8458 return;
8459 }
8460 }
8461 hwcksum_dbg_verified++;
8462
8463 /*
8464 * This code allows us to emulate various hardwares that
8465 * perform 16-bit 1's complement sum beginning at various
8466 * start offset values.
8467 */
8468 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
8469 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
8470
8471 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len)
8472 return;
8473
8474 sum = m_adj_sum16(m, rxoff, aoff,
8475 m_pktlen(m) - aoff, sum);
8476
8477 m->m_pkthdr.csum_rx_val = sum;
8478 m->m_pkthdr.csum_rx_start = (aoff + hlen);
8479
8480 hwcksum_dbg_adjusted++;
8481 }
8482 }
8483 }
8484
8485 static int
8486 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8487 {
8488 #pragma unused(arg1, arg2)
8489 u_int32_t i;
8490 int err;
8491
8492 i = hwcksum_dbg_mode;
8493
8494 err = sysctl_handle_int(oidp, &i, 0, req);
8495 if (err != 0 || req->newptr == USER_ADDR_NULL)
8496 return (err);
8497
8498 if (hwcksum_dbg == 0)
8499 return (ENODEV);
8500
8501 if ((i & ~HWCKSUM_DBG_MASK) != 0)
8502 return (EINVAL);
8503
8504 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
8505
8506 return (err);
8507 }
8508
8509 static int
8510 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8511 {
8512 #pragma unused(arg1, arg2)
8513 u_int32_t i;
8514 int err;
8515
8516 i = hwcksum_dbg_partial_rxoff_forced;
8517
8518 err = sysctl_handle_int(oidp, &i, 0, req);
8519 if (err != 0 || req->newptr == USER_ADDR_NULL)
8520 return (err);
8521
8522 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED))
8523 return (ENODEV);
8524
8525 hwcksum_dbg_partial_rxoff_forced = i;
8526
8527 return (err);
8528 }
8529
8530 static int
8531 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8532 {
8533 #pragma unused(arg1, arg2)
8534 u_int32_t i;
8535 int err;
8536
8537 i = hwcksum_dbg_partial_rxoff_adj;
8538
8539 err = sysctl_handle_int(oidp, &i, 0, req);
8540 if (err != 0 || req->newptr == USER_ADDR_NULL)
8541 return (err);
8542
8543 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ))
8544 return (ENODEV);
8545
8546 hwcksum_dbg_partial_rxoff_adj = i;
8547
8548 return (err);
8549 }
8550
8551 static int
8552 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8553 {
8554 #pragma unused(oidp, arg1, arg2)
8555 int err;
8556
8557 if (req->oldptr == USER_ADDR_NULL) {
8558
8559 }
8560 if (req->newptr != USER_ADDR_NULL) {
8561 return (EPERM);
8562 }
8563 err = SYSCTL_OUT(req, &tx_chain_len_stats,
8564 sizeof(struct chain_len_stats));
8565
8566 return (err);
8567 }
8568
8569
8570 #if DEBUG || DEVELOPMENT
8571 /* Blob for sum16 verification */
8572 static uint8_t sumdata[] = {
8573 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8574 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8575 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8576 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8577 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8578 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8579 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8580 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8581 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8582 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8583 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8584 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8585 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8586 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8587 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8588 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8589 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8590 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8591 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8592 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8593 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8594 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8595 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8596 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8597 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8598 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8599 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8600 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8601 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8602 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8603 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8604 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8605 0xc8, 0x28, 0x02, 0x00, 0x00
8606 };
8607
8608 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
8609 static struct {
8610 boolean_t init;
8611 uint16_t len;
8612 uint16_t sumr; /* reference */
8613 uint16_t sumrp; /* reference, precomputed */
8614 } sumtbl[] = {
8615 { FALSE, 0, 0, 0x0000 },
8616 { FALSE, 1, 0, 0x001f },
8617 { FALSE, 2, 0, 0x8b1f },
8618 { FALSE, 3, 0, 0x8b27 },
8619 { FALSE, 7, 0, 0x790e },
8620 { FALSE, 11, 0, 0xcb6d },
8621 { FALSE, 20, 0, 0x20dd },
8622 { FALSE, 27, 0, 0xbabd },
8623 { FALSE, 32, 0, 0xf3e8 },
8624 { FALSE, 37, 0, 0x197d },
8625 { FALSE, 43, 0, 0x9eae },
8626 { FALSE, 64, 0, 0x4678 },
8627 { FALSE, 127, 0, 0x9399 },
8628 { FALSE, 256, 0, 0xd147 },
8629 { FALSE, 325, 0, 0x0358 },
8630 };
8631 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8632
8633 static void
8634 dlil_verify_sum16(void)
8635 {
8636 struct mbuf *m;
8637 uint8_t *buf;
8638 int n;
8639
8640 /* Make sure test data plus extra room for alignment fits in cluster */
8641 _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES);
8642
8643 kprintf("DLIL: running SUM16 self-tests ... ");
8644
8645 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
8646 MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */
8647 buf = mtod(m, uint8_t *); /* base address */
8648
8649 for (n = 0; n < SUMTBL_MAX; n++) {
8650 uint16_t len = sumtbl[n].len;
8651 int i;
8652
8653 /* Verify for all possible alignments */
8654 for (i = 0; i < (int)sizeof (uint64_t); i++) {
8655 uint16_t sum, sumr;
8656 uint8_t *c;
8657
8658 /* Copy over test data to mbuf */
8659 VERIFY(len <= sizeof (sumdata));
8660 c = buf + i;
8661 bcopy(sumdata, c, len);
8662
8663 /* Zero-offset test (align by data pointer) */
8664 m->m_data = (caddr_t)c;
8665 m->m_len = len;
8666 sum = m_sum16(m, 0, len);
8667
8668 if (!sumtbl[n].init) {
8669 sumr = in_cksum_mbuf_ref(m, len, 0, 0);
8670 sumtbl[n].sumr = sumr;
8671 sumtbl[n].init = TRUE;
8672 } else {
8673 sumr = sumtbl[n].sumr;
8674 }
8675
8676 /* Something is horribly broken; stop now */
8677 if (sumr != sumtbl[n].sumrp) {
8678 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
8679 "for len=%d align=%d sum=0x%04x "
8680 "[expected=0x%04x]\n", __func__,
8681 len, i, sum, sumr);
8682 /* NOTREACHED */
8683 } else if (sum != sumr) {
8684 panic_plain("\n%s: broken m_sum16() for len=%d "
8685 "align=%d sum=0x%04x [expected=0x%04x]\n",
8686 __func__, len, i, sum, sumr);
8687 /* NOTREACHED */
8688 }
8689
8690 /* Alignment test by offset (fixed data pointer) */
8691 m->m_data = (caddr_t)buf;
8692 m->m_len = i + len;
8693 sum = m_sum16(m, i, len);
8694
8695 /* Something is horribly broken; stop now */
8696 if (sum != sumr) {
8697 panic_plain("\n%s: broken m_sum16() for len=%d "
8698 "offset=%d sum=0x%04x [expected=0x%04x]\n",
8699 __func__, len, i, sum, sumr);
8700 /* NOTREACHED */
8701 }
8702 #if INET
8703 /* Simple sum16 contiguous buffer test by aligment */
8704 sum = b_sum16(c, len);
8705
8706 /* Something is horribly broken; stop now */
8707 if (sum != sumr) {
8708 panic_plain("\n%s: broken b_sum16() for len=%d "
8709 "align=%d sum=0x%04x [expected=0x%04x]\n",
8710 __func__, len, i, sum, sumr);
8711 /* NOTREACHED */
8712 }
8713 #endif /* INET */
8714 }
8715 }
8716 m_freem(m);
8717
8718 kprintf("PASSED\n");
8719 }
8720 #endif /* DEBUG || DEVELOPMENT */
8721
8722 #define CASE_STRINGIFY(x) case x: return #x
8723
8724 __private_extern__ const char *
8725 dlil_kev_dl_code_str(u_int32_t event_code)
8726 {
8727 switch (event_code) {
8728 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
8729 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
8730 CASE_STRINGIFY(KEV_DL_SIFMTU);
8731 CASE_STRINGIFY(KEV_DL_SIFPHYS);
8732 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
8733 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
8734 CASE_STRINGIFY(KEV_DL_ADDMULTI);
8735 CASE_STRINGIFY(KEV_DL_DELMULTI);
8736 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
8737 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
8738 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
8739 CASE_STRINGIFY(KEV_DL_LINK_OFF);
8740 CASE_STRINGIFY(KEV_DL_LINK_ON);
8741 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
8742 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
8743 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
8744 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
8745 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
8746 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
8747 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
8748 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
8749 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
8750 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
8751 CASE_STRINGIFY(KEV_DL_ISSUES);
8752 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
8753 default:
8754 break;
8755 }
8756 return ("");
8757 }
8758
8759 static void
8760 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
8761 {
8762 #pragma unused(arg1)
8763 struct ifnet *ifp = arg0;
8764
8765 if (ifnet_is_attached(ifp, 1)) {
8766 nstat_ifnet_threshold_reached(ifp->if_index);
8767 ifnet_decr_iorefcnt(ifp);
8768 }
8769 }
8770
8771 void
8772 ifnet_notify_data_threshold(struct ifnet *ifp)
8773 {
8774 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
8775 uint64_t oldbytes = ifp->if_dt_bytes;
8776
8777 ASSERT(ifp->if_dt_tcall != NULL);
8778
8779 /*
8780 * If we went over the threshold, notify NetworkStatistics.
8781 * We rate-limit it based on the threshold interval value.
8782 */
8783 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
8784 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
8785 !thread_call_isactive(ifp->if_dt_tcall)) {
8786 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
8787 uint64_t now = mach_absolute_time(), deadline = now;
8788 uint64_t ival;
8789
8790 if (tival != 0) {
8791 nanoseconds_to_absolutetime(tival, &ival);
8792 clock_deadline_for_periodic_event(ival, now, &deadline);
8793 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
8794 deadline);
8795 } else {
8796 (void) thread_call_enter(ifp->if_dt_tcall);
8797 }
8798 }
8799 }
8800
8801 #if (DEVELOPMENT || DEBUG)
8802 /*
8803 * The sysctl variable name contains the input parameters of
8804 * ifnet_get_keepalive_offload_frames()
8805 * ifp (interface index): name[0]
8806 * frames_array_count: name[1]
8807 * frame_data_offset: name[2]
8808 * The return length gives used_frames_count
8809 */
8810 static int
8811 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8812 {
8813 #pragma unused(oidp)
8814 int *name = (int *)arg1;
8815 u_int namelen = arg2;
8816 int idx;
8817 ifnet_t ifp = NULL;
8818 u_int32_t frames_array_count;
8819 size_t frame_data_offset;
8820 u_int32_t used_frames_count;
8821 struct ifnet_keepalive_offload_frame *frames_array = NULL;
8822 int error = 0;
8823 u_int32_t i;
8824
8825 /*
8826 * Only root can get look at other people TCP frames
8827 */
8828 error = proc_suser(current_proc());
8829 if (error != 0)
8830 goto done;
8831 /*
8832 * Validate the input parameters
8833 */
8834 if (req->newptr != USER_ADDR_NULL) {
8835 error = EPERM;
8836 goto done;
8837 }
8838 if (namelen != 3) {
8839 error = EINVAL;
8840 goto done;
8841 }
8842 if (req->oldptr == USER_ADDR_NULL) {
8843 error = EINVAL;
8844 goto done;
8845 }
8846 if (req->oldlen == 0) {
8847 error = EINVAL;
8848 goto done;
8849 }
8850 idx = name[0];
8851 frames_array_count = name[1];
8852 frame_data_offset = name[2];
8853
8854 /* Make sure the passed buffer is large enough */
8855 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
8856 req->oldlen) {
8857 error = ENOMEM;
8858 goto done;
8859 }
8860
8861 ifnet_head_lock_shared();
8862 if (!IF_INDEX_IN_RANGE(idx)) {
8863 ifnet_head_done();
8864 error = ENOENT;
8865 goto done;
8866 }
8867 ifp = ifindex2ifnet[idx];
8868 ifnet_head_done();
8869
8870 frames_array = _MALLOC(frames_array_count *
8871 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
8872 if (frames_array == NULL) {
8873 error = ENOMEM;
8874 goto done;
8875 }
8876
8877 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
8878 frames_array_count, frame_data_offset, &used_frames_count);
8879 if (error != 0) {
8880 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8881 __func__, error);
8882 goto done;
8883 }
8884
8885 for (i = 0; i < used_frames_count; i++) {
8886 error = SYSCTL_OUT(req, frames_array + i,
8887 sizeof(struct ifnet_keepalive_offload_frame));
8888 if (error != 0) {
8889 goto done;
8890 }
8891 }
8892 done:
8893 if (frames_array != NULL)
8894 _FREE(frames_array, M_TEMP);
8895 return (error);
8896 }
8897 #endif /* DEVELOPMENT || DEBUG */
8898
8899 void
8900 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
8901 struct ifnet *ifp)
8902 {
8903 tcp_update_stats_per_flow(ifs, ifp);
8904 }
8905
8906 static void
8907 dlil_mit_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
8908 {
8909 #pragma unused(arg1)
8910 struct ifnet *ifp = (struct ifnet *)arg0;
8911 struct dlil_threading_info *inp = ifp->if_inp;
8912
8913 ifnet_lock_shared(ifp);
8914 if (!IF_FULLY_ATTACHED(ifp) || inp == NULL) {
8915 ifnet_lock_done(ifp);
8916 return;
8917 }
8918
8919 lck_mtx_lock_spin(&inp->input_lck);
8920 inp->input_waiting |= DLIL_INPUT_WAITING;
8921 if (!(inp->input_waiting & DLIL_INPUT_RUNNING) ||
8922 !qempty(&inp->rcvq_pkts)) {
8923 inp->wtot++;
8924 wakeup_one((caddr_t)&inp->input_waiting);
8925 }
8926 lck_mtx_unlock(&inp->input_lck);
8927 ifnet_lock_done(ifp);
8928 }