2 * Copyright (c) 1999-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Data Link Inteface Layer
33 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
34 * support for mandatory and extensible security protections. This notice
35 * is included in support of clause 2.2 (b) of the Apple Public License,
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
44 #include <sys/socket.h>
45 #include <sys/domain.h>
47 #include <sys/random.h>
48 #include <net/if_dl.h>
50 #include <net/route.h>
51 #include <net/if_var.h>
53 #include <net/if_arp.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
57 #include <kern/assert.h>
58 #include <kern/task.h>
59 #include <kern/thread.h>
60 #include <kern/sched_prim.h>
61 #include <kern/locks.h>
62 #include <net/kpi_protocol.h>
64 #include <net/if_types.h>
65 #include <net/kpi_interfacefilter.h>
67 #include <libkern/OSAtomic.h>
69 #include <machine/machine_routines.h>
71 #include <mach/thread_act.h>
74 #include <security/mac_framework.h>
78 #include <net/pfvar.h>
81 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
82 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
83 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
84 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
85 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
88 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
89 #define MAX_LINKADDR 4 /* LONGWORDS */
90 #define M_NKE M_IFADDR
93 #define DLIL_PRINTF printf
95 #define DLIL_PRINTF kprintf
98 #define atomic_add_32(a, n) \
99 ((void) OSAddAtomic(n, (volatile SInt32 *)a))
102 #define _CASSERT(x) \
103 switch (0) { case 0: case (x): ; }
105 #define IF_DATA_REQUIRE_ALIGNED_32(f) \
106 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int32_t)))
108 #define IFNET_IF_DATA_REQUIRE_ALIGNED_32(f) \
109 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int32_t)))
110 #endif /* PKT_PRIORITY */
118 SLIST_ENTRY(if_proto
) next_hash
;
122 struct domain
*dl_domain
;
123 protocol_family_t protocol_family
;
127 proto_media_input input
;
128 proto_media_preout pre_output
;
129 proto_media_event event
;
130 proto_media_ioctl ioctl
;
131 proto_media_detached detached
;
132 proto_media_resolve_multi resolve_multi
;
133 proto_media_send_arp send_arp
;
136 proto_media_input_v2 input
;
137 proto_media_preout pre_output
;
138 proto_media_event event
;
139 proto_media_ioctl ioctl
;
140 proto_media_detached detached
;
141 proto_media_resolve_multi resolve_multi
;
142 proto_media_send_arp send_arp
;
147 SLIST_HEAD(proto_hash_entry
, if_proto
);
151 /* ifnet and drvr_ext are used by the stack and drivers
152 drvr_ext extends the public ifnet and must follow dl_if */
153 struct ifnet dl_if
; /* public ifnet */
155 /* dlil private fields */
156 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet are link together */
157 /* it is not the ifnet list */
158 void *if_uniqueid
; /* unique id identifying the interface */
159 size_t if_uniqueid_len
;/* length of the unique id */
160 char if_namestorage
[IFNAMSIZ
]; /* interface name storage */
163 struct ifnet_filter
{
164 TAILQ_ENTRY(ifnet_filter
) filt_next
;
168 const char *filt_name
;
170 protocol_family_t filt_protocol
;
171 iff_input_func filt_input
;
172 iff_output_func filt_output
;
173 iff_event_func filt_event
;
174 iff_ioctl_func filt_ioctl
;
175 iff_detached_func filt_detached
;
178 struct proto_input_entry
;
180 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
181 static lck_grp_t
*dlil_lock_group
;
182 static lck_grp_t
*ifnet_lock_group
;
183 static lck_grp_t
*ifnet_head_lock_group
;
184 static lck_attr_t
*ifnet_lock_attr
;
185 static lck_rw_t
*ifnet_head_mutex
;
186 static lck_mtx_t
*dlil_ifnet_mutex
;
187 static lck_mtx_t
*dlil_mutex
;
188 static u_int32_t dlil_read_count
= 0;
189 static u_int32_t dlil_detach_waiting
= 0;
190 u_int32_t dlil_filter_count
= 0;
191 extern u_int32_t ipv4_ll_arp_aware
;
193 #if IFNET_ROUTE_REFCNT
195 * Updating this variable should be done by first acquiring the global
196 * radix node head (rnh_lock), in tandem with settting/clearing the
197 * PR_AGGDRAIN for routedomain.
199 u_int32_t ifnet_aggressive_drainers
;
200 static u_int32_t net_rtref
;
201 #endif /* IFNET_ROUTE_REFCNT */
203 static struct dlil_threading_info dlil_lo_thread
;
204 __private_extern__
struct dlil_threading_info
*dlil_lo_thread_ptr
= &dlil_lo_thread
;
206 static struct mbuf
*dlil_lo_input_mbuf_head
= NULL
;
207 static struct mbuf
*dlil_lo_input_mbuf_tail
= NULL
;
209 #if IFNET_INPUT_SANITY_CHK
210 static int dlil_lo_input_mbuf_count
= 0;
211 int dlil_input_sanity_check
= 0; /* sanity checking of input packet lists received */
213 int dlil_multithreaded_input
= 1;
214 static int cur_dlil_input_threads
= 0;
216 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
);
217 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
218 static void dlil_call_delayed_detach_thread(void);
220 static void dlil_read_begin(void);
221 static __inline__
void dlil_read_end(void);
222 static int dlil_write_begin(void);
223 static void dlil_write_end(void);
226 __private_extern__
int dlil_verbose
= 1;
228 __private_extern__
int dlil_verbose
= 0;
231 unsigned int net_affinity
= 1;
232 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
234 extern void bpfdetach(struct ifnet
*);
235 extern void proto_input_run(void); // new run_netisr
237 void dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
);
238 static void dlil_input_thread_func(struct dlil_threading_info
*inpthread
);
239 __private_extern__
int dlil_create_input_thread(
240 ifnet_t
, struct dlil_threading_info
*);
241 __private_extern__
void dlil_terminate_input_thread(
242 struct dlil_threading_info
*);
244 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
248 extern u_int32_t inject_buckets
;
250 static const u_int32_t dlil_writer_waiting
= 0x80000000;
251 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
252 static lck_attr_t
*dlil_lck_attributes
= NULL
;
253 static lck_grp_t
*dlil_input_lock_grp
= NULL
;
256 _cast_non_const(const void * ptr
) {
266 /* Should these be inline? */
268 dlil_read_begin(void)
272 struct uthread
*uth
= get_bsdthread_info(current_thread());
274 if (uth
->dlil_incremented_read
== dlil_writer_waiting
)
275 panic("dlil_read_begin - thread is already a writer");
279 old_value
= dlil_read_count
;
281 if ((old_value
& dlil_writer_waiting
) != 0 && uth
->dlil_incremented_read
== 0)
283 tsleep(&dlil_read_count
, PRIBIO
, "dlil_read_count", 1);
287 new_value
= old_value
+ 1;
288 } while (!OSCompareAndSwap((UInt32
)old_value
, (UInt32
)new_value
, (UInt32
*)&dlil_read_count
));
290 uth
->dlil_incremented_read
++;
296 struct uthread
*uth
= get_bsdthread_info(current_thread());
298 OSDecrementAtomic(&dlil_read_count
);
299 uth
->dlil_incremented_read
--;
300 if (dlil_read_count
== dlil_writer_waiting
)
301 wakeup(_cast_non_const(&dlil_writer_waiting
));
305 dlil_write_begin(void)
307 struct uthread
*uth
= get_bsdthread_info(current_thread());
309 if (uth
->dlil_incremented_read
!= 0) {
312 lck_mtx_lock(dlil_mutex
);
313 OSBitOrAtomic((UInt32
)dlil_writer_waiting
, &dlil_read_count
);
315 if (dlil_read_count
== dlil_writer_waiting
) {
316 uth
->dlil_incremented_read
= dlil_writer_waiting
;
320 tsleep(_cast_non_const(&dlil_writer_waiting
), PRIBIO
, "dlil_writer_waiting", 1);
328 struct uthread
*uth
= get_bsdthread_info(current_thread());
330 if (uth
->dlil_incremented_read
!= dlil_writer_waiting
)
331 panic("dlil_write_end - thread is not a writer");
332 OSBitAndAtomic((UInt32
)~dlil_writer_waiting
, &dlil_read_count
);
333 lck_mtx_unlock(dlil_mutex
);
334 uth
->dlil_incremented_read
= 0;
335 wakeup(&dlil_read_count
);
338 #define PROTO_HASH_SLOTS 0x5
341 * Internal functions.
345 proto_hash_value(u_int32_t protocol_family
)
348 * dlil_proto_unplumb_all() depends on the mapping between
349 * the hash bucket index and the protocol family defined
350 * here; future changes must be applied there as well.
352 switch(protocol_family
) {
366 static struct if_proto
*
367 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
369 struct if_proto
*proto
= NULL
;
370 u_int32_t i
= proto_hash_value(protocol_family
);
371 if (ifp
->if_proto_hash
) {
372 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
375 while(proto
&& proto
->protocol_family
!= protocol_family
) {
376 proto
= SLIST_NEXT(proto
, next_hash
);
383 if_proto_ref(struct if_proto
*proto
)
385 OSAddAtomic(1, &proto
->refcount
);
389 if_proto_free(struct if_proto
*proto
)
391 int oldval
= OSAddAtomic(-1, &proto
->refcount
);
393 if (oldval
== 1) { /* This was the last reference */
394 FREE(proto
, M_IFADDR
);
398 __private_extern__
void
400 __unused
struct ifnet
*ifp
,
405 * Not implemented for rw locks.
407 * Function exists so when/if we use mutex we can
411 lck_mtx_assert(ifp
->if_lock
, what
);
415 __private_extern__
void
420 lck_rw_lock_shared(ifp
->if_lock
);
422 lck_mtx_assert(ifp
->if_lock
, LCK_MTX_ASSERT_NOTOWNED
);
423 lck_mtx_lock(ifp
->if_lock
);
427 __private_extern__
void
428 ifnet_lock_exclusive(
432 lck_rw_lock_exclusive(ifp
->if_lock
);
434 lck_mtx_assert(ifp
->if_lock
, LCK_MTX_ASSERT_NOTOWNED
);
435 lck_mtx_lock(ifp
->if_lock
);
439 __private_extern__
void
444 lck_rw_done(ifp
->if_lock
);
446 lck_mtx_assert(ifp
->if_lock
, LCK_MTX_ASSERT_OWNED
);
447 lck_mtx_unlock(ifp
->if_lock
);
451 __private_extern__
void
452 ifnet_head_lock_shared(void)
454 lck_rw_lock_shared(ifnet_head_mutex
);
457 __private_extern__
void
458 ifnet_head_lock_exclusive(void)
460 lck_rw_lock_exclusive(ifnet_head_mutex
);
463 __private_extern__
void
464 ifnet_head_done(void)
466 lck_rw_done(ifnet_head_mutex
);
469 static int dlil_ifp_proto_count(struct ifnet
* ifp
)
474 if (ifp
->if_proto_hash
!= NULL
) {
475 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
476 struct if_proto
*proto
;
477 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
486 __private_extern__
void
487 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
, u_int32_t event_code
,
488 struct net_event_data
*event_data
, u_int32_t event_data_len
)
490 struct net_event_data ev_data
;
491 struct kev_msg ev_msg
;
494 * a net event always starts with a net_event_data structure
495 * but the caller can generate a simple net event or
496 * provide a longer event structure to post
499 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
500 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
501 ev_msg
.kev_subclass
= event_subclass
;
502 ev_msg
.event_code
= event_code
;
504 if (event_data
== 0) {
505 event_data
= &ev_data
;
506 event_data_len
= sizeof(struct net_event_data
);
509 strncpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
510 event_data
->if_family
= ifp
->if_family
;
511 event_data
->if_unit
= (u_int32_t
) ifp
->if_unit
;
513 ev_msg
.dv
[0].data_length
= event_data_len
;
514 ev_msg
.dv
[0].data_ptr
= event_data
;
515 ev_msg
.dv
[1].data_length
= 0;
517 dlil_event_internal(ifp
, &ev_msg
);
520 __private_extern__
int
521 dlil_create_input_thread(
522 ifnet_t ifp
, struct dlil_threading_info
*inputthread
)
526 bzero(inputthread
, sizeof(*inputthread
));
527 // loopback ifp may not be configured at dlil_init time.
529 strlcat(inputthread
->input_name
, "dlil_input_main_thread_mtx", 32);
531 snprintf(inputthread
->input_name
, 32, "dlil_input_%s%d_mtx", ifp
->if_name
, ifp
->if_unit
);
533 inputthread
->lck_grp
= lck_grp_alloc_init(inputthread
->input_name
, dlil_grp_attributes
);
534 inputthread
->input_lck
= lck_mtx_alloc_init(inputthread
->lck_grp
, dlil_lck_attributes
);
536 error
= kernel_thread_start((thread_continue_t
)dlil_input_thread_func
, inputthread
, &inputthread
->input_thread
);
538 ml_thread_policy(inputthread
->input_thread
, MACHINE_GROUP
,
539 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_NETISR
));
541 * Except for the loopback dlil input thread, we create
542 * an affinity set so that the matching workloop thread
543 * can be scheduled on the same processor set.
545 if (net_affinity
&& inputthread
!= dlil_lo_thread_ptr
) {
546 struct thread
*tp
= inputthread
->input_thread
;
549 * Randomize to reduce the probability
550 * of affinity tag namespace collision.
552 read_random(&tag
, sizeof (tag
));
553 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
554 thread_reference(tp
);
555 inputthread
->tag
= tag
;
556 inputthread
->net_affinity
= TRUE
;
560 panic("dlil_create_input_thread: couldn't create thread\n");
562 OSAddAtomic(1, &cur_dlil_input_threads
);
564 printf("dlil_create_input_thread: threadinfo: %p input_thread=%p threads: cur=%d max=%d\n",
565 inputthread
, inputthread
->input_thread
, dlil_multithreaded_input
, cur_dlil_input_threads
);
569 __private_extern__
void
570 dlil_terminate_input_thread(
571 struct dlil_threading_info
*inputthread
)
573 OSAddAtomic(-1, &cur_dlil_input_threads
);
575 lck_mtx_unlock(inputthread
->input_lck
);
576 lck_mtx_free(inputthread
->input_lck
, inputthread
->lck_grp
);
577 lck_grp_free(inputthread
->lck_grp
);
579 FREE(inputthread
, M_NKE
);
581 /* For the extra reference count from kernel_thread_start() */
582 thread_deallocate(current_thread());
584 thread_terminate(current_thread());
588 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
590 thread_affinity_policy_data_t policy
;
592 bzero(&policy
, sizeof (policy
));
593 policy
.affinity_tag
= tag
;
594 return (thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
595 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
));
601 thread_t thread
= THREAD_NULL
;
605 * The following fields must be 32-bit aligned for atomic operations.
607 IF_DATA_REQUIRE_ALIGNED_32(ifi_obgpackets
);
608 IF_DATA_REQUIRE_ALIGNED_32(ifi_obgbytes
)
610 IFNET_IF_DATA_REQUIRE_ALIGNED_32(ifi_obgpackets
);
611 IFNET_IF_DATA_REQUIRE_ALIGNED_32(ifi_obgbytes
)
612 #endif /* PKT_PRIORITY */
614 PE_parse_boot_argn("net_affinity", &net_affinity
, sizeof (net_affinity
));
615 #if IFNET_ROUTE_REFCNT
616 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof (net_rtref
));
617 #endif /* IFNET_ROUTE_REFCNT */
619 TAILQ_INIT(&dlil_ifnet_head
);
620 TAILQ_INIT(&ifnet_head
);
622 /* Setup the lock groups we will use */
623 dlil_grp_attributes
= lck_grp_attr_alloc_init();
625 dlil_lock_group
= lck_grp_alloc_init("dlil internal locks", dlil_grp_attributes
);
626 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks", dlil_grp_attributes
);
627 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock", dlil_grp_attributes
);
628 dlil_input_lock_grp
= lck_grp_alloc_init("dlil input lock", dlil_grp_attributes
);
630 /* Setup the lock attributes we will use */
631 dlil_lck_attributes
= lck_attr_alloc_init();
633 ifnet_lock_attr
= lck_attr_alloc_init();
636 ifnet_head_mutex
= lck_rw_alloc_init(ifnet_head_lock_group
, dlil_lck_attributes
);
637 dlil_ifnet_mutex
= lck_mtx_alloc_init(dlil_lock_group
, dlil_lck_attributes
);
638 dlil_mutex
= lck_mtx_alloc_init(dlil_lock_group
, dlil_lck_attributes
);
640 lck_attr_free(dlil_lck_attributes
);
641 dlil_lck_attributes
= NULL
;
644 * Create and start up the first dlil input thread once everything is initialized
646 dlil_create_input_thread(0, dlil_lo_thread_ptr
);
648 (void) kernel_thread_start((thread_continue_t
)dlil_call_delayed_detach_thread
, NULL
, &thread
);
649 thread_deallocate(thread
);
651 /* Initialize the packet filter */
656 __private_extern__
int
659 const struct iff_filter
*if_filter
,
660 interface_filter_t
*filter_ref
)
663 struct ifnet_filter
*filter
;
665 MALLOC(filter
, struct ifnet_filter
*, sizeof(*filter
), M_NKE
, M_WAITOK
);
668 bzero(filter
, sizeof(*filter
));
671 filter
->filt_ifp
= ifp
;
672 filter
->filt_cookie
= if_filter
->iff_cookie
;
673 filter
->filt_name
= if_filter
->iff_name
;
674 filter
->filt_protocol
= if_filter
->iff_protocol
;
675 filter
->filt_input
= if_filter
->iff_input
;
676 filter
->filt_output
= if_filter
->iff_output
;
677 filter
->filt_event
= if_filter
->iff_event
;
678 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
679 filter
->filt_detached
= if_filter
->iff_detached
;
681 if ((retval
= dlil_write_begin()) != 0) {
682 /* Failed to acquire the write lock */
686 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
688 *filter_ref
= filter
;
691 * Bump filter count and route_generation ID to let TCP
692 * know it shouldn't do TSO on this connection
694 OSAddAtomic(1, &dlil_filter_count
);
702 dlil_detach_filter_internal(
703 interface_filter_t filter
,
710 interface_filter_t entry
= NULL
;
712 /* Take the write lock */
713 retval
= dlil_write_begin();
714 if (retval
!= 0 && retval
!= EDEADLK
)
718 * At this point either we have the write lock (retval == 0)
719 * or we couldn't get it (retval == EDEADLK) because someone
720 * else up the stack is holding the read lock. It is safe to
721 * read, either the read or write is held. Verify the filter
722 * parameter before proceeding.
724 ifnet_head_lock_shared();
725 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
726 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
735 if (entry
!= filter
) {
736 /* filter parameter is not a valid filter ref */
743 if (retval
== EDEADLK
) {
744 /* Perform a delayed detach */
745 filter
->filt_detaching
= 1;
746 dlil_detach_waiting
= 1;
747 wakeup(&dlil_detach_waiting
);
751 /* Remove the filter from the list */
752 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
, filt_next
);
756 /* Call the detached funciton if there is one */
757 if (filter
->filt_detached
)
758 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
760 /* Free the filter */
764 * Decrease filter count and route_generation ID to let TCP
765 * know it should reevalute doing TSO or not
767 OSAddAtomic(-1, &dlil_filter_count
);
774 __private_extern__
void
775 dlil_detach_filter(interface_filter_t filter
)
779 dlil_detach_filter_internal(filter
, 0);
783 dlil_input_thread_func(
784 struct dlil_threading_info
*inputthread
)
787 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
788 #if IFNET_INPUT_SANITY_CHK
789 int loop_cnt
= 0, mbuf_cnt
;
792 #endif /* IFNET_INPUT_SANITY_CHK */
794 lck_mtx_lock(inputthread
->input_lck
);
796 /* Wait until there is work to be done */
797 while ((inputthread
->input_waiting
& ~DLIL_INPUT_RUNNING
) == 0) {
798 inputthread
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
799 msleep(&inputthread
->input_waiting
, inputthread
->input_lck
, 0, inputthread
->input_name
, 0);
803 lck_mtx_assert(inputthread
->input_lck
, LCK_MTX_ASSERT_OWNED
);
805 m
= inputthread
->mbuf_head
;
806 inputthread
->mbuf_head
= NULL
;
807 inputthread
->mbuf_tail
= NULL
;
809 if (inputthread
->input_waiting
& DLIL_INPUT_TERMINATE
) {
812 /* this is the end */
813 dlil_terminate_input_thread(inputthread
);
817 inputthread
->input_waiting
|= DLIL_INPUT_RUNNING
;
818 inputthread
->input_waiting
&= ~DLIL_INPUT_WAITING
;
820 if (inputthread
== dlil_lo_thread_ptr
) {
821 m_loop
= dlil_lo_input_mbuf_head
;
822 dlil_lo_input_mbuf_head
= NULL
;
823 dlil_lo_input_mbuf_tail
= NULL
;
826 #if IFNET_INPUT_SANITY_CHK
827 if (dlil_input_sanity_check
!= 0) {
828 mbuf_cnt
= inputthread
->mbuf_count
;
829 inputthread
->mbuf_count
= 0;
830 if (inputthread
== dlil_lo_thread_ptr
) {
831 loop_cnt
= dlil_lo_input_mbuf_count
;
832 dlil_lo_input_mbuf_count
= 0;
835 lck_mtx_unlock(inputthread
->input_lck
);
837 for (m1
= m
, count
= 0; m1
; m1
= mbuf_nextpkt(m1
)) {
840 if (count
!= mbuf_cnt
) {
841 panic("dlil_input_func - thread=%p reg. loop queue has %d packets, should have %d\n",
842 inputthread
, count
, mbuf_cnt
);
845 if (inputthread
== dlil_lo_thread_ptr
) {
846 for (m1
= m_loop
, count
= 0; m1
; m1
= mbuf_nextpkt(m1
)) {
849 if (count
!= loop_cnt
) {
850 panic("dlil_input_func - thread=%p loop queue has %d packets, should have %d\n",
851 inputthread
, count
, loop_cnt
);
855 #endif /* IFNET_INPUT_SANITY_CHK */
857 lck_mtx_unlock(inputthread
->input_lck
);
862 * NOTE warning %%% attention !!!!
863 * We should think about putting some thread starvation safeguards if
864 * we deal with long chains of packets.
867 if (inputthread
== dlil_lo_thread_ptr
)
868 dlil_input_packet_list(lo_ifp
, m_loop
);
869 #if IFNET_INPUT_SANITY_CHK
871 panic("dlil_input_func - thread=%p loop queue has %d packets, should have none!\n",
872 inputthread
, loop_cnt
);
873 #endif /* IFNET_INPUT_SANITY_CHK */
878 dlil_input_packet_list(0, m
);
881 lck_mtx_lock(inputthread
->input_lck
);
883 if ((inputthread
->input_waiting
& (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
)) != 0) {
884 lck_mtx_unlock(inputthread
->input_lck
);
888 lck_mtx_unlock(inputthread
->input_lck
);
896 const struct ifnet_stat_increment_param
*stats
)
898 struct thread
*tp
= current_thread();
900 struct dlil_threading_info
*inp
;
901 #if IFNET_INPUT_SANITY_CHK
902 u_int32_t pkt_count
= 0;
903 #endif /* IFNET_INPUT_SANITY_CHK */
905 if (ifp
== NULL
|| m_head
== NULL
) {
907 mbuf_freem_list(m_head
);
913 #if IFNET_INPUT_SANITY_CHK
914 if (dlil_input_sanity_check
!= 0) {
917 rcvif
= mbuf_pkthdr_rcvif(m_tail
);
921 (ifp
->if_type
!= IFT_LOOP
&& rcvif
!= ifp
) ||
922 (mbuf_flags(m_head
) & MBUF_PKTHDR
) == 0) {
923 panic("ifnet_input - invalid mbuf %p\n", m_tail
);
926 #endif /* IFNET_INPUT_SANITY_CHK */
927 if (mbuf_nextpkt(m_tail
) == NULL
)
929 m_tail
= mbuf_nextpkt(m_tail
);
932 inp
= ifp
->if_input_thread
;
934 if (dlil_multithreaded_input
== 0 || inp
== NULL
)
935 inp
= dlil_lo_thread_ptr
;
938 * If there is a matching dlil input thread associated with an
939 * affinity set, associate this workloop thread with the same set.
940 * We will only do this once.
942 lck_mtx_lock(inp
->input_lck
);
943 if (inp
->net_affinity
&& inp
->workloop_thread
== NULL
) {
944 u_int32_t tag
= inp
->tag
;
945 inp
->workloop_thread
= tp
;
946 lck_mtx_unlock(inp
->input_lck
);
948 /* Associated the current thread with the new affinity tag */
949 (void) dlil_affinity_set(tp
, tag
);
952 * Take a reference on the workloop (current) thread; during
953 * detach, we will need to refer to it in order ot tear down
956 thread_reference(tp
);
957 lck_mtx_lock(inp
->input_lck
);
961 * Because of loopbacked multicast we cannot stuff the ifp in
962 * the rcvif of the packet header: loopback has its own dlil
966 if (inp
== dlil_lo_thread_ptr
&& ifp
->if_type
== IFT_LOOP
) {
967 if (dlil_lo_input_mbuf_head
== NULL
)
968 dlil_lo_input_mbuf_head
= m_head
;
969 else if (dlil_lo_input_mbuf_tail
!= NULL
)
970 dlil_lo_input_mbuf_tail
->m_nextpkt
= m_head
;
971 dlil_lo_input_mbuf_tail
= m_tail
;
972 #if IFNET_INPUT_SANITY_CHK
973 if (dlil_input_sanity_check
!= 0) {
974 dlil_lo_input_mbuf_count
+= pkt_count
;
975 inp
->input_mbuf_cnt
+= pkt_count
;
976 inp
->input_wake_cnt
++;
978 lck_mtx_assert(inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
983 if (inp
->mbuf_head
== NULL
)
984 inp
->mbuf_head
= m_head
;
985 else if (inp
->mbuf_tail
!= NULL
)
986 inp
->mbuf_tail
->m_nextpkt
= m_head
;
987 inp
->mbuf_tail
= m_tail
;
988 #if IFNET_INPUT_SANITY_CHK
989 if (dlil_input_sanity_check
!= 0) {
990 inp
->mbuf_count
+= pkt_count
;
991 inp
->input_mbuf_cnt
+= pkt_count
;
992 inp
->input_wake_cnt
++;
994 lck_mtx_assert(inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
1000 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
1001 if ((inp
->input_waiting
& DLIL_INPUT_RUNNING
) == 0) {
1002 wakeup((caddr_t
)&inp
->input_waiting
);
1005 ifp
->if_data
.ifi_ipackets
+= stats
->packets_in
;
1006 ifp
->if_data
.ifi_ibytes
+= stats
->bytes_in
;
1007 ifp
->if_data
.ifi_ierrors
+= stats
->errors_in
;
1009 ifp
->if_data
.ifi_opackets
+= stats
->packets_out
;
1010 ifp
->if_data
.ifi_obytes
+= stats
->bytes_out
;
1011 ifp
->if_data
.ifi_oerrors
+= stats
->errors_out
;
1013 ifp
->if_data
.ifi_collisions
+= stats
->collisions
;
1014 ifp
->if_data
.ifi_iqdrops
+= stats
->dropped
;
1017 lck_mtx_unlock(inp
->input_lck
);
1023 dlil_interface_filters_input(struct ifnet
* ifp
, struct mbuf
* * m_p
,
1024 char * * frame_header_p
,
1025 protocol_family_t protocol_family
)
1027 struct ifnet_filter
* filter
;
1029 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
1032 if (filter
->filt_input
1033 && (filter
->filt_protocol
== 0
1034 || filter
->filt_protocol
== protocol_family
)) {
1035 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
1036 ifp
, protocol_family
,
1037 m_p
, frame_header_p
);
1045 * Strip away M_PROTO1 bit prior to sending packet up the stack as
1046 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
1049 (*m_p
)->m_flags
&= ~M_PROTO1
;
1055 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
1059 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
1060 /* Version 1 protocols get one packet at a time */
1062 char * frame_header
;
1065 next_packet
= m
->m_nextpkt
;
1066 m
->m_nextpkt
= NULL
;
1067 frame_header
= m
->m_pkthdr
.header
;
1068 m
->m_pkthdr
.header
= NULL
;
1069 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
1070 ifproto
->protocol_family
,
1072 if (error
!= 0 && error
!= EJUSTRETURN
)
1077 else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
1078 /* Version 2 protocols support packet lists */
1079 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
1080 ifproto
->protocol_family
,
1082 if (error
!= 0 && error
!= EJUSTRETURN
)
1088 __private_extern__
void
1089 dlil_input_packet_list(struct ifnet
* ifp_param
, struct mbuf
*m
)
1093 protocol_family_t protocol_family
;
1095 ifnet_t ifp
= ifp_param
;
1096 char * frame_header
;
1097 struct if_proto
* last_ifproto
= NULL
;
1098 mbuf_t pkt_first
= NULL
;
1099 mbuf_t
* pkt_next
= NULL
;
1101 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
,0,0,0,0,0);
1104 struct if_proto
* ifproto
= NULL
;
1106 next_packet
= m
->m_nextpkt
;
1107 m
->m_nextpkt
= NULL
;
1108 if (ifp_param
== NULL
)
1109 ifp
= m
->m_pkthdr
.rcvif
;
1110 frame_header
= m
->m_pkthdr
.header
;
1111 m
->m_pkthdr
.header
= NULL
;
1114 /* dlil lock protects the demux and interface filters */
1118 /* find which protocol family this packet is for */
1119 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
1122 if (error
== EJUSTRETURN
) {
1125 protocol_family
= 0;
1129 if (m
->m_flags
& (M_BCAST
|M_MCAST
))
1132 /* run interface filters, exclude VLAN packets PR-3586856 */
1133 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
1136 filter_result
= dlil_interface_filters_input(ifp
, &m
,
1139 if (filter_result
!= 0) {
1140 if (filter_result
!= EJUSTRETURN
) {
1146 if (error
!= 0 || ((m
->m_flags
& M_PROMISC
) != 0) ) {
1151 /* Lookup the protocol attachment to this interface */
1152 if (protocol_family
== 0) {
1155 else if (last_ifproto
!= NULL
1156 && last_ifproto
->ifp
== ifp
1157 && (last_ifproto
->protocol_family
1158 == protocol_family
)) {
1159 ifproto
= last_ifproto
;
1162 ifproto
= find_attached_proto(ifp
, protocol_family
);
1164 if (ifproto
== NULL
) {
1165 /* no protocol for this packet, discard */
1169 if (ifproto
!= last_ifproto
) {
1170 /* make sure ifproto can't go away during input */
1171 if_proto_ref(ifproto
);
1172 if (last_ifproto
!= NULL
) {
1173 /* pass up the list for the previous protocol */
1176 dlil_ifproto_input(last_ifproto
, pkt_first
);
1178 if_proto_free(last_ifproto
);
1181 last_ifproto
= ifproto
;
1183 /* extend the list */
1184 m
->m_pkthdr
.header
= frame_header
;
1185 if (pkt_first
== NULL
) {
1190 pkt_next
= &m
->m_nextpkt
;
1193 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
1194 /* pass up the last list of packets */
1197 dlil_ifproto_input(last_ifproto
, pkt_first
);
1198 if_proto_free(last_ifproto
);
1207 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
,0,0,0,0,0);
1212 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
)
1214 struct ifnet_filter
*filter
;
1216 if (ifp_use(ifp
, kIfNetUseCount_MustNotBeZero
) == 0) {
1219 /* Pass the event to the interface filters */
1220 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
1221 if (filter
->filt_event
)
1222 filter
->filt_event(filter
->filt_cookie
, ifp
, filter
->filt_protocol
, event
);
1225 if (ifp
->if_proto_hash
) {
1228 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1229 struct if_proto
*proto
;
1231 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1232 proto_media_event eventp
= proto
->proto_kpi
== kProtoKPI_v1
1233 ? proto
->kpi
.v1
.event
: proto
->kpi
.v2
.event
;
1236 eventp(ifp
, proto
->protocol_family
, event
);
1243 /* Pass the event to the interface */
1245 ifp
->if_event(ifp
, event
);
1248 ifp_use_reached_zero(ifp
);
1251 return kev_post_msg(event
);
1257 struct kern_event_msg
*event
)
1259 struct kev_msg kev_msg
;
1262 if (ifp
== NULL
|| event
== NULL
) return EINVAL
;
1264 kev_msg
.vendor_code
= event
->vendor_code
;
1265 kev_msg
.kev_class
= event
->kev_class
;
1266 kev_msg
.kev_subclass
= event
->kev_subclass
;
1267 kev_msg
.event_code
= event
->event_code
;
1268 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
1269 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
1270 kev_msg
.dv
[1].data_length
= 0;
1272 result
= dlil_event_internal(ifp
, &kev_msg
);
1278 #include <netinet/ip6.h>
1279 #include <netinet/ip.h>
1280 static int dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
1284 struct ip6_hdr
*ip6
;
1285 int type
= SOCK_RAW
;
1290 m
= m_pullup(*mp
, sizeof(struct ip
));
1294 ip
= mtod(m
, struct ip
*);
1295 if (ip
->ip_p
== IPPROTO_TCP
)
1297 else if (ip
->ip_p
== IPPROTO_UDP
)
1301 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
1305 ip6
= mtod(m
, struct ip6_hdr
*);
1306 if (ip6
->ip6_nxt
== IPPROTO_TCP
)
1308 else if (ip6
->ip6_nxt
== IPPROTO_UDP
)
1322 u_long proto_family
,
1323 struct mbuf
*packetlist
,
1325 const struct sockaddr
*dest
,
1328 char *frame_type
= NULL
;
1329 char *dst_linkaddr
= NULL
;
1331 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
1332 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
1333 struct ifnet_filter
*filter
;
1334 struct if_proto
*proto
= 0;
1336 mbuf_t send_head
= NULL
;
1337 mbuf_t
*send_tail
= &send_head
;
1339 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
,0,0,0,0,0);
1343 frame_type
= frame_type_buffer
;
1344 dst_linkaddr
= dst_linkaddr_buffer
;
1347 proto
= find_attached_proto(ifp
, proto_family
);
1348 if (proto
== NULL
) {
1355 if (packetlist
== NULL
)
1358 packetlist
= packetlist
->m_nextpkt
;
1359 m
->m_nextpkt
= NULL
;
1362 proto_media_preout preoutp
= proto
->proto_kpi
== kProtoKPI_v1
1363 ? proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
;
1366 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
, frame_type
, dst_linkaddr
);
1369 if (retval
== EJUSTRETURN
) {
1380 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
1381 dlil_get_socket_type(&m
, proto_family
, raw
));
1388 if (raw
== 0 && ifp
->if_framer
) {
1389 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
, frame_type
);
1391 if (retval
!= EJUSTRETURN
) {
1399 * Let interface filters (if any) do their thing ...
1401 /* Do not pass VLAN tagged packets to filters PR-3586856 */
1402 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
1403 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
1404 if ((filter
->filt_protocol
== 0 || (filter
->filt_protocol
== proto_family
)) &&
1405 filter
->filt_output
) {
1406 retval
= filter
->filt_output(filter
->filt_cookie
, ifp
, proto_family
, &m
);
1408 if (retval
!= EJUSTRETURN
)
1416 * Strip away M_PROTO1 bit prior to sending packet to the driver
1417 * as this field may be used by the driver
1419 m
->m_flags
&= ~M_PROTO1
;
1422 * Finally, call the driver.
1425 if ((ifp
->if_eflags
& IFEF_SENDLIST
) != 0) {
1427 send_tail
= &m
->m_nextpkt
;
1430 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
, 0,0,0,0,0);
1431 retval
= ifp
->if_output(ifp
, m
);
1432 if (retval
&& dlil_verbose
) {
1433 printf("dlil_output: output error on %s%d retval = %d\n",
1434 ifp
->if_name
, ifp
->if_unit
, retval
);
1436 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0,0,0,0,0);
1438 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0,0,0,0,0);
1443 packetlist
= packetlist
->m_nextpkt
;
1444 m
->m_nextpkt
= NULL
;
1449 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
, 0,0,0,0,0);
1450 retval
= ifp
->if_output(ifp
, send_head
);
1451 if (retval
&& dlil_verbose
) {
1452 printf("dlil_output: output error on %s%d retval = %d\n",
1453 ifp
->if_name
, ifp
->if_unit
, retval
);
1455 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0,0,0,0,0);
1458 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
,0,0,0,0,0);
1462 if (packetlist
) /* if any packet left, clean up */
1463 mbuf_freem_list(packetlist
);
1464 if (retval
== EJUSTRETURN
)
1473 * Caller should have a lock on the protocol domain if the protocol
1474 * doesn't support finer grained locking. In most cases, the lock
1475 * will be held from the socket layer and won't be released until
1476 * we return back to the socket layer.
1478 * This does mean that we must take a protocol lock before we take
1479 * an interface lock if we're going to take both. This makes sense
1480 * because a protocol is likely to interact with an ifp while it
1481 * is under the protocol lock.
1483 __private_extern__ errno_t
1486 protocol_family_t proto_family
,
1489 const struct sockaddr
*dest
,
1492 char *frame_type
= NULL
;
1493 char *dst_linkaddr
= NULL
;
1495 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
1496 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
1497 struct ifnet_filter
*filter
;
1498 struct if_proto
*proto
= 0;
1500 mbuf_t send_head
= NULL
;
1501 mbuf_t
*send_tail
= &send_head
;
1503 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
,0,0,0,0,0);
1507 frame_type
= frame_type_buffer
;
1508 dst_linkaddr
= dst_linkaddr_buffer
;
1511 proto
= find_attached_proto(ifp
, proto_family
);
1512 if (proto
== NULL
) {
1519 if (packetlist
== NULL
)
1522 packetlist
= packetlist
->m_nextpkt
;
1523 m
->m_nextpkt
= NULL
;
1526 proto_media_preout preoutp
= proto
->proto_kpi
== kProtoKPI_v1
1527 ? proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
;
1530 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
, frame_type
, dst_linkaddr
);
1533 if (retval
== EJUSTRETURN
) {
1543 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
1544 dlil_get_socket_type(&m
, proto_family
, raw
));
1552 if (raw
== 0 && ifp
->if_framer
) {
1556 * If this is a broadcast packet that needs to be
1557 * looped back into the system, set the inbound ifp
1558 * to that of the outbound ifp. This will allow
1559 * us to determine that it is a legitimate packet
1560 * for the system. Only set the ifp if it's not
1561 * already set, just to be safe.
1563 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
1564 m
->m_pkthdr
.rcvif
== NULL
) {
1565 m
->m_pkthdr
.rcvif
= ifp
;
1569 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
, frame_type
);
1571 if (retval
!= EJUSTRETURN
) {
1578 * Clear the ifp if it was set above, and to be
1579 * safe, only if it is still the same as the
1580 * outbound ifp we have in context. If it was
1581 * looped back, then a copy of it was sent to the
1582 * loopback interface with the rcvif set, and we
1583 * are clearing the one that will go down to the
1586 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
)
1587 m
->m_pkthdr
.rcvif
= NULL
;
1591 * Let interface filters (if any) do their thing ...
1593 /* Do not pass VLAN tagged packets to filters PR-3586856 */
1594 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
1595 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
1596 if ((filter
->filt_protocol
== 0 || (filter
->filt_protocol
== proto_family
)) &&
1597 filter
->filt_output
) {
1598 retval
= filter
->filt_output(filter
->filt_cookie
, ifp
, proto_family
, &m
);
1600 if (retval
!= EJUSTRETURN
)
1609 * Strip away M_PROTO1 bit prior to sending packet to the driver
1610 * as this field may be used by the driver
1612 m
->m_flags
&= ~M_PROTO1
;
1615 * If the underlying interface is not capable of handling a
1616 * packet whose data portion spans across physically disjoint
1617 * pages, we need to "normalize" the packet so that we pass
1618 * down a chain of mbufs where each mbuf points to a span that
1619 * resides in the system page boundary. If the packet does
1620 * not cross page(s), the following is a no-op.
1622 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
1623 if ((m
= m_normalize(m
)) == NULL
)
1628 * If this is a TSO packet, make sure the interface still advertise TSO capability
1631 if ((m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
) && !(ifp
->if_hwassist
& IFNET_TSO_IPV4
)) {
1637 if ((m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV6
) && !(ifp
->if_hwassist
& IFNET_TSO_IPV6
)) {
1643 * Finally, call the driver.
1646 if ((ifp
->if_eflags
& IFEF_SENDLIST
) != 0) {
1648 send_tail
= &m
->m_nextpkt
;
1651 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
, 0,0,0,0,0);
1653 if (mbuf_get_priority(m
) == MBUF_PRIORITY_BACKGROUND
) {
1654 atomic_add_32(&ifp
->if_obgpackets
, 1);
1655 atomic_add_32(&ifp
->if_obgbytes
,
1658 #endif /* PKT_PRIORITY */
1659 retval
= ifp
->if_output(ifp
, m
);
1660 if (retval
&& dlil_verbose
) {
1661 printf("dlil_output: output error on %s%d retval = %d\n",
1662 ifp
->if_name
, ifp
->if_unit
, retval
);
1664 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0,0,0,0,0);
1666 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0,0,0,0,0);
1671 packetlist
= packetlist
->m_nextpkt
;
1672 m
->m_nextpkt
= NULL
;
1677 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
, 0,0,0,0,0);
1679 if (mbuf_get_priority(send_head
) == MBUF_PRIORITY_BACKGROUND
) {
1680 atomic_add_32(&ifp
->if_obgpackets
, 1);
1681 atomic_add_32(&ifp
->if_obgbytes
,
1682 send_head
->m_pkthdr
.len
);
1684 #endif /* PKT_PRIORITY */
1685 retval
= ifp
->if_output(ifp
, send_head
);
1686 if (retval
&& dlil_verbose
) {
1687 printf("dlil_output: output error on %s%d retval = %d\n",
1688 ifp
->if_name
, ifp
->if_unit
, retval
);
1690 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0,0,0,0,0);
1693 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
,0,0,0,0,0);
1697 if (packetlist
) /* if any packet left, clean up */
1698 mbuf_freem_list(packetlist
);
1699 if (retval
== EJUSTRETURN
)
1707 protocol_family_t proto_fam
,
1711 struct ifnet_filter
*filter
;
1712 int retval
= EOPNOTSUPP
;
1714 int holding_read
= 0;
1716 if (ifp
== NULL
|| ioctl_code
== 0)
1719 /* Attempt to increment the use count. If it's zero, bail out, the ifp is invalid */
1720 result
= ifp_use(ifp
, kIfNetUseCount_MustNotBeZero
);
1727 /* Run the interface filters first.
1728 * We want to run all filters before calling the protocol,
1729 * interface family, or interface.
1731 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
1732 if ((filter
->filt_protocol
== 0 || (filter
->filt_protocol
== proto_fam
)) &&
1733 filter
->filt_ioctl
!= NULL
) {
1734 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
, proto_fam
, ioctl_code
, ioctl_arg
);
1735 /* Only update retval if no one has handled the ioctl */
1736 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
1737 if (result
== ENOTSUP
)
1738 result
= EOPNOTSUPP
;
1740 if (retval
&& retval
!= EOPNOTSUPP
) {
1747 /* Allow the protocol to handle the ioctl */
1749 struct if_proto
*proto
= find_attached_proto(ifp
, proto_fam
);
1752 proto_media_ioctl ioctlp
= proto
->proto_kpi
== kProtoKPI_v1
1753 ? proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
;
1754 result
= EOPNOTSUPP
;
1756 result
= ioctlp(ifp
, proto_fam
, ioctl_code
, ioctl_arg
);
1758 /* Only update retval if no one has handled the ioctl */
1759 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
1760 if (result
== ENOTSUP
)
1761 result
= EOPNOTSUPP
;
1763 if (retval
&& retval
!= EOPNOTSUPP
) {
1771 * Since we have incremented the use count on the ifp, we are guaranteed
1772 * that the ifp will not go away (the function pointers may not be changed).
1773 * We release the dlil read lock so the interface ioctl may trigger a
1774 * protocol attach. This happens with vlan and may occur with other virtual
1780 /* retval is either 0 or EOPNOTSUPP */
1783 * Let the interface handle this ioctl.
1784 * If it returns EOPNOTSUPP, ignore that, we may have
1785 * already handled this in the protocol or family.
1788 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
1790 /* Only update retval if no one has handled the ioctl */
1791 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
1792 if (result
== ENOTSUP
)
1793 result
= EOPNOTSUPP
;
1795 if (retval
&& retval
!= EOPNOTSUPP
) {
1804 ifp_use_reached_zero(ifp
);
1806 if (retval
== EJUSTRETURN
)
1811 __private_extern__ errno_t
1815 bpf_packet_func callback
)
1820 if (ifp
->if_set_bpf_tap
)
1821 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
1830 const struct sockaddr
*proto_addr
,
1831 struct sockaddr
*ll_addr
,
1834 errno_t result
= EOPNOTSUPP
;
1835 struct if_proto
*proto
;
1836 const struct sockaddr
*verify
;
1837 proto_media_resolve_multi resolvep
;
1841 bzero(ll_addr
, ll_len
);
1843 /* Call the protocol first */
1844 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
1845 if (proto
!= NULL
) {
1846 resolvep
= proto
->proto_kpi
== kProtoKPI_v1
1847 ? proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
;
1848 if (resolvep
!= NULL
)
1849 result
= resolvep(ifp
, proto_addr
,(struct sockaddr_dl
*)ll_addr
,
1853 /* Let the interface verify the multicast address */
1854 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
1858 verify
= proto_addr
;
1859 result
= ifp
->if_check_multi(ifp
, verify
);
1867 __private_extern__ errno_t
1868 dlil_send_arp_internal(
1871 const struct sockaddr_dl
* sender_hw
,
1872 const struct sockaddr
* sender_proto
,
1873 const struct sockaddr_dl
* target_hw
,
1874 const struct sockaddr
* target_proto
)
1876 struct if_proto
*proto
;
1881 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
1882 if (proto
== NULL
) {
1886 proto_media_send_arp arpp
;
1887 arpp
= proto
->proto_kpi
== kProtoKPI_v1
1888 ? proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
;
1892 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
, target_hw
,
1901 static __inline__
int
1902 _is_announcement(const struct sockaddr_in
* sender_sin
,
1903 const struct sockaddr_in
* target_sin
)
1905 if (sender_sin
== NULL
) {
1908 return (sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
);
1911 __private_extern__ errno_t
1915 const struct sockaddr_dl
* sender_hw
,
1916 const struct sockaddr
* sender_proto
,
1917 const struct sockaddr_dl
* target_hw
,
1918 const struct sockaddr
* target_proto
)
1921 const struct sockaddr_in
* sender_sin
;
1922 const struct sockaddr_in
* target_sin
;
1924 if (target_proto
== NULL
|| (sender_proto
&&
1925 sender_proto
->sa_family
!= target_proto
->sa_family
))
1929 * If this is an ARP request and the target IP is IPv4LL,
1930 * send the request on all interfaces. The exception is
1931 * an announcement, which must only appear on the specific
1934 sender_sin
= (const struct sockaddr_in
*)sender_proto
;
1935 target_sin
= (const struct sockaddr_in
*)target_proto
;
1936 if (target_proto
->sa_family
== AF_INET
1937 && IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
))
1938 && ipv4_ll_arp_aware
!= 0
1939 && arpop
== ARPOP_REQUEST
1940 && !_is_announcement(target_sin
, sender_sin
)) {
1947 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
1948 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
1950 ifaddr_t source_hw
= NULL
;
1951 ifaddr_t source_ip
= NULL
;
1952 struct sockaddr_in source_ip_copy
;
1955 * Only arp on interfaces marked for IPv4LL ARPing. This may
1956 * mean that we don't ARP on the interface the subnet route
1959 if ((ifp_list
[ifp_on
]->if_eflags
& IFEF_ARPLL
) == 0) {
1963 /* Find the source IP address */
1964 ifnet_lock_shared(ifp_list
[ifp_on
]);
1965 source_hw
= TAILQ_FIRST(&ifp_list
[ifp_on
]->if_addrhead
);
1966 TAILQ_FOREACH(source_ip
, &ifp_list
[ifp_on
]->if_addrhead
,
1968 if (source_ip
->ifa_addr
&&
1969 source_ip
->ifa_addr
->sa_family
== AF_INET
) {
1974 /* No IP Source, don't arp */
1975 if (source_ip
== NULL
) {
1976 ifnet_lock_done(ifp_list
[ifp_on
]);
1980 /* Copy the source IP address */
1981 source_ip_copy
= *(struct sockaddr_in
*)source_ip
->ifa_addr
;
1983 ifnet_lock_done(ifp_list
[ifp_on
]);
1986 new_result
= dlil_send_arp_internal(ifp_list
[ifp_on
], arpop
,
1987 (struct sockaddr_dl
*)source_hw
->ifa_addr
,
1988 (struct sockaddr
*)&source_ip_copy
, NULL
,
1992 if (result
== ENOTSUP
) {
1993 result
= new_result
;
1998 ifnet_list_free(ifp_list
);
2001 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
, sender_proto
,
2002 target_hw
, target_proto
);
2008 __private_extern__
int
2017 old_value
= ifp
->if_usecnt
;
2018 if (old_value
== 0 && handle_zero
== kIfNetUseCount_MustNotBeZero
) {
2019 retval
= ENXIO
; // ifp is invalid
2022 } while (!OSCompareAndSwap((UInt32
)old_value
, (UInt32
)old_value
+ 1, (UInt32
*)&ifp
->if_usecnt
));
2027 /* ifp_unuse is broken into two pieces.
2029 * ifp_use and ifp_unuse must be called between when the caller calls
2030 * dlil_write_begin and dlil_write_end. ifp_unuse needs to perform some
2031 * operations after dlil_write_end has been called. For this reason,
2032 * anyone calling ifp_unuse must call ifp_use_reached_zero if ifp_unuse
2033 * returns a non-zero value. The caller must call ifp_use_reached_zero
2034 * after the caller has called dlil_write_end.
2036 __private_extern__
void
2037 ifp_use_reached_zero(
2040 ifnet_detached_func free_func
;
2044 if (ifp
->if_usecnt
!= 0)
2045 panic("ifp_use_reached_zero: ifp->if_usecnt != 0");
2047 ifnet_head_lock_exclusive();
2048 ifnet_lock_exclusive(ifp
);
2050 /* Remove ourselves from the list */
2051 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
2052 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
2054 /* ifp should be removed from the interface list */
2055 while (ifp
->if_multiaddrs
.lh_first
) {
2056 struct ifmultiaddr
*ifma
= ifp
->if_multiaddrs
.lh_first
;
2059 * When the interface is gone, we will no longer
2060 * be listening on these multicasts. Various bits
2061 * of the stack may be referencing these multicasts,
2062 * release only our reference.
2064 LIST_REMOVE(ifma
, ifma_link
);
2065 ifma
->ifma_ifp
= NULL
;
2069 ifp
->if_eflags
&= ~IFEF_DETACHING
; // clear the detaching flag
2070 ifnet_lock_done(ifp
);
2073 free_func
= ifp
->if_free
;
2075 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
2081 __private_extern__
int
2086 oldval
= OSDecrementAtomic(&ifp
->if_usecnt
);
2088 panic("ifp_unuse: ifp(%s%d)->if_usecnt was zero\n", ifp
->if_name
, ifp
->if_unit
);
2093 if ((ifp
->if_eflags
& IFEF_DETACHING
) == 0)
2094 panic("ifp_unuse: use count reached zero but detching flag is not set!");
2096 return 1; /* caller must call ifp_use_reached_zero */
2099 extern lck_mtx_t
*domain_proto_mtx
;
2102 dlil_attach_protocol_internal(
2103 struct if_proto
*proto
,
2104 const struct ifnet_demux_desc
*demux_list
,
2105 u_int32_t demux_count
)
2107 struct kev_dl_proto_data ev_pr_data
;
2108 struct ifnet
*ifp
= proto
->ifp
;
2110 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
2112 /* setup some of the common values */
2115 lck_mtx_lock(domain_proto_mtx
);
2117 while (dp
&& (protocol_family_t
)dp
->dom_family
!= proto
->protocol_family
)
2119 proto
->dl_domain
= dp
;
2120 lck_mtx_unlock(domain_proto_mtx
);
2124 * Take the write lock to protect readers and exclude other writers.
2126 if ((retval
= dlil_write_begin()) != 0) {
2127 printf("dlil_attach_protocol_internal - dlil_write_begin returned %d\n", retval
);
2131 /* Check that the interface isn't currently detaching */
2132 ifnet_lock_shared(ifp
);
2133 if ((ifp
->if_eflags
& IFEF_DETACHING
) != 0) {
2134 ifnet_lock_done(ifp
);
2138 ifnet_lock_done(ifp
);
2140 if (find_attached_proto(ifp
, proto
->protocol_family
) != NULL
) {
2146 * Call family module add_proto routine so it can refine the
2147 * demux descriptors as it wishes.
2149 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
, demux_count
);
2156 * We can't fail from this point on.
2157 * Increment the number of uses (protocol attachments + interface attached).
2159 ifp_use(ifp
, kIfNetUseCount_MustNotBeZero
);
2162 * Insert the protocol in the hash
2165 struct if_proto
* prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
2166 while (prev_proto
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
)
2167 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
2169 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
2171 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
], proto
, next_hash
);
2175 * Add to if_proto list for this interface
2177 if_proto_ref(proto
);
2180 /* the reserved field carries the number of protocol still attached (subject to change) */
2181 ev_pr_data
.proto_family
= proto
->protocol_family
;
2182 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
2183 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
2184 (struct net_event_data
*)&ev_pr_data
,
2185 sizeof(struct kev_dl_proto_data
));
2187 DLIL_PRINTF("dlil. Attached protocol %d to %s%d - %d\n", proto
->protocol_family
,
2188 ifp
->if_name
, ifp
->if_unit
, retval
);
2194 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
2195 const struct ifnet_attach_proto_param
*proto_details
)
2198 struct if_proto
*ifproto
= NULL
;
2200 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
)
2203 ifproto
= _MALLOC(sizeof(struct if_proto
), M_IFADDR
, M_WAITOK
);
2205 DLIL_PRINTF("ERROR - dlil failed if_proto allocation\n");
2209 bzero(ifproto
, sizeof(*ifproto
));
2212 ifproto
->protocol_family
= protocol
;
2213 ifproto
->proto_kpi
= kProtoKPI_v1
;
2214 ifproto
->kpi
.v1
.input
= proto_details
->input
;
2215 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
2216 ifproto
->kpi
.v1
.event
= proto_details
->event
;
2217 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
2218 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
2219 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
2220 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
2222 retval
= dlil_attach_protocol_internal(ifproto
,
2223 proto_details
->demux_list
, proto_details
->demux_count
);
2226 if (retval
&& ifproto
)
2227 FREE(ifproto
, M_IFADDR
);
2232 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
2233 const struct ifnet_attach_proto_param_v2
*proto_details
)
2236 struct if_proto
*ifproto
= NULL
;
2238 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
)
2241 ifproto
= _MALLOC(sizeof(struct if_proto
), M_IFADDR
, M_WAITOK
);
2243 DLIL_PRINTF("ERROR - dlil failed if_proto allocation\n");
2247 bzero(ifproto
, sizeof(*ifproto
));
2250 ifproto
->protocol_family
= protocol
;
2251 ifproto
->proto_kpi
= kProtoKPI_v2
;
2252 ifproto
->kpi
.v2
.input
= proto_details
->input
;
2253 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
2254 ifproto
->kpi
.v2
.event
= proto_details
->event
;
2255 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
2256 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
2257 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
2258 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
2260 retval
= dlil_attach_protocol_internal(ifproto
,
2261 proto_details
->demux_list
, proto_details
->demux_count
);
2264 if (retval
&& ifproto
)
2265 FREE(ifproto
, M_IFADDR
);
2269 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
2272 dlil_detach_protocol_internal(
2273 struct if_proto
*proto
)
2275 struct ifnet
*ifp
= proto
->ifp
;
2276 u_int32_t proto_family
= proto
->protocol_family
;
2277 struct kev_dl_proto_data ev_pr_data
;
2279 if (proto
->proto_kpi
== kProtoKPI_v1
) {
2280 if (proto
->kpi
.v1
.detached
)
2281 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
2283 if (proto
->proto_kpi
== kProtoKPI_v2
) {
2284 if (proto
->kpi
.v2
.detached
)
2285 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
2287 if_proto_free(proto
);
2290 * Cleanup routes that may still be in the routing table for that interface/protocol pair.
2293 if_rtproto_del(ifp
, proto_family
);
2295 /* the reserved field carries the number of protocol still attached (subject to change) */
2296 ev_pr_data
.proto_family
= proto_family
;
2297 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
2298 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
2299 (struct net_event_data
*)&ev_pr_data
,
2300 sizeof(struct kev_dl_proto_data
));
2305 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
2307 struct if_proto
*proto
= NULL
;
2309 int use_reached_zero
= 0;
2311 if (ifp
== NULL
|| proto_family
== 0) return EINVAL
;
2313 if ((retval
= dlil_write_begin()) != 0) {
2314 if (retval
== EDEADLK
) {
2317 proto
= find_attached_proto(ifp
, proto_family
);
2322 proto
->detaching
= 1;
2323 dlil_detach_waiting
= 1;
2324 wakeup(&dlil_detach_waiting
);
2331 proto
= find_attached_proto(ifp
, proto_family
);
2333 if (proto
== NULL
) {
2340 * Call family module del_proto
2343 if (ifp
->if_del_proto
)
2344 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
2346 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)], proto
, if_proto
, next_hash
);
2349 * We can do the rest of the work outside of the write lock.
2351 use_reached_zero
= ifp_unuse(ifp
);
2354 dlil_detach_protocol_internal(proto
);
2357 * Only handle the case where the interface will go away after
2358 * we've sent the message. This way post message can send the
2359 * message to the interface safely.
2362 if (use_reached_zero
)
2363 ifp_use_reached_zero(ifp
);
2370 * dlil_delayed_detach_thread is responsible for detaching
2371 * protocols, protocol filters, and interface filters after
2372 * an attempt was made to detach one of those items while
2373 * it was not safe to do so (i.e. called dlil_read_begin).
2375 * This function will take the dlil write lock and walk
2376 * through each of the interfaces looking for items with
2377 * the detaching flag set. When an item is found, it is
2378 * detached from the interface and placed on a local list.
2379 * After all of the items have been collected, we drop the
2380 * write lock and performed the post detach. This is done
2381 * so we only have to take the write lock once.
2383 * When detaching a protocol filter, if we find that we
2384 * have detached the very last protocol and we need to call
2385 * ifp_use_reached_zero, we have to break out of our work
2386 * to drop the write lock so we can call ifp_use_reached_zero.
2390 dlil_delayed_detach_thread(__unused
void* foo
, __unused wait_result_t wait
)
2392 thread_t self
= current_thread();
2395 ml_thread_policy(self
, MACHINE_GROUP
,
2396 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_NETISR
));
2400 if (dlil_detach_waiting
!= 0 && dlil_write_begin() == 0) {
2402 struct proto_hash_entry detached_protos
;
2403 struct ifnet_filter_head detached_filters
;
2404 struct if_proto
*proto
;
2405 struct if_proto
*next_proto
;
2406 struct ifnet_filter
*filt
;
2407 struct ifnet_filter
*next_filt
;
2412 /* Clear the detach waiting flag */
2413 dlil_detach_waiting
= 0;
2414 TAILQ_INIT(&detached_filters
);
2415 SLIST_INIT(&detached_protos
);
2417 ifnet_head_lock_shared();
2418 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
2421 // Look for protocols and protocol filters
2422 for (i
= 0; i
< PROTO_HASH_SLOTS
&& !reached_zero
; i
++) {
2423 struct if_proto
**prev_nextptr
= &SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
2424 for (proto
= *prev_nextptr
; proto
; proto
= *prev_nextptr
) {
2426 // Detach this protocol
2427 if (proto
->detaching
) {
2428 if (ifp
->if_del_proto
)
2429 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
2430 *prev_nextptr
= SLIST_NEXT(proto
, next_hash
);
2431 SLIST_INSERT_HEAD(&detached_protos
, proto
, next_hash
);
2432 reached_zero
= ifp_unuse(ifp
);
2438 // Update prev_nextptr to point to our next ptr
2439 prev_nextptr
= &SLIST_NEXT(proto
, next_hash
);
2444 // look for interface filters that need to be detached
2445 for (filt
= TAILQ_FIRST(&ifp
->if_flt_head
); filt
; filt
= next_filt
) {
2446 next_filt
= TAILQ_NEXT(filt
, filt_next
);
2447 if (filt
->filt_detaching
!= 0) {
2448 // take this interface filter off the interface filter list
2449 TAILQ_REMOVE(&ifp
->if_flt_head
, filt
, filt_next
);
2451 // put this interface filter on the detached filters list
2452 TAILQ_INSERT_TAIL(&detached_filters
, filt
, filt_next
);
2456 if (ifp
->if_delayed_detach
) {
2457 ifp
->if_delayed_detach
= 0;
2458 reached_zero
= ifp_unuse(ifp
);
2467 for (filt
= TAILQ_FIRST(&detached_filters
); filt
; filt
= next_filt
) {
2468 next_filt
= TAILQ_NEXT(filt
, filt_next
);
2470 * dlil_detach_filter_internal won't remove an item from
2471 * the list if it is already detached (second parameter).
2472 * The item will be freed though.
2474 dlil_detach_filter_internal(filt
, 1);
2477 for (proto
= SLIST_FIRST(&detached_protos
); proto
; proto
= next_proto
) {
2478 next_proto
= SLIST_NEXT(proto
, next_hash
);
2479 dlil_detach_protocol_internal(proto
);
2483 ifp_use_reached_zero(ifp
);
2484 dlil_detach_waiting
= 1; // we may have missed something
2488 if (!asserted
&& dlil_detach_waiting
== 0) {
2490 assert_wait(&dlil_detach_waiting
, THREAD_UNINT
);
2493 if (dlil_detach_waiting
== 0) {
2495 thread_block(dlil_delayed_detach_thread
);
2501 dlil_call_delayed_detach_thread(void) {
2502 dlil_delayed_detach_thread(NULL
, THREAD_RESTART
);
2505 extern int if_next_index(void);
2510 const struct sockaddr_dl
*ll_addr
)
2512 u_int32_t interface_family
;
2513 struct ifnet
*tmp_if
;
2514 struct proto_hash_entry
*new_proto_list
= NULL
;
2517 if (ifp
== NULL
) return EINVAL
;
2518 if (ll_addr
&& ifp
->if_addrlen
== 0) {
2519 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
2521 else if (ll_addr
&& ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
2525 interface_family
= ifp
->if_family
;
2527 ifnet_head_lock_shared();
2529 /* Verify we aren't already on the list */
2530 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
2531 if (tmp_if
== ifp
) {
2539 if ((ifp
->if_eflags
& IFEF_REUSE
) == 0 || ifp
->if_lock
== 0)
2541 ifp
->if_lock
= lck_rw_alloc_init(ifnet_lock_group
, ifnet_lock_attr
);
2543 ifp
->if_lock
= lck_mtx_alloc_init(ifnet_lock_group
, ifnet_lock_attr
);
2546 if (ifp
->if_lock
== 0) {
2550 if (!(ifp
->if_eflags
& IFEF_REUSE
) || ifp
->if_fwd_route_lock
== NULL
) {
2551 if (ifp
->if_fwd_route_lock
== NULL
)
2552 ifp
->if_fwd_route_lock
= lck_mtx_alloc_init(
2553 ifnet_lock_group
, ifnet_lock_attr
);
2555 if (ifp
->if_fwd_route_lock
== NULL
) {
2557 lck_rw_free(ifp
->if_lock
, ifnet_lock_group
);
2559 lck_mtx_free(ifp
->if_lock
, ifnet_lock_group
);
2561 ifp
->if_lock
= NULL
;
2567 * Allow interfaces without protocol families to attach
2568 * only if they have the necessary fields filled out.
2571 if (ifp
->if_add_proto
== 0 || ifp
->if_del_proto
== 0) {
2572 DLIL_PRINTF("dlil Attempt to attach interface without family module - %d\n",
2577 if ((ifp
->if_eflags
& IFEF_REUSE
) == 0 || ifp
->if_proto_hash
== NULL
) {
2578 MALLOC(new_proto_list
, struct proto_hash_entry
*, sizeof(struct proto_hash_entry
) * PROTO_HASH_SLOTS
,
2581 if (new_proto_list
== 0) {
2589 TAILQ_INIT(&ifp
->if_flt_head
);
2592 if (new_proto_list
) {
2593 bzero(new_proto_list
, (PROTO_HASH_SLOTS
* sizeof(struct proto_hash_entry
)));
2594 ifp
->if_proto_hash
= new_proto_list
;
2595 new_proto_list
= NULL
;
2601 int namelen
, masklen
, socksize
, ifasize
;
2602 struct ifaddr
*ifa
= NULL
;
2604 if (ifp
->if_snd
.ifq_maxlen
== 0)
2605 ifp
->if_snd
.ifq_maxlen
= ifqmaxlen
;
2606 TAILQ_INIT(&ifp
->if_prefixhead
);
2607 LIST_INIT(&ifp
->if_multiaddrs
);
2608 ifnet_touch_lastchange(ifp
);
2610 /* usecount to track attachment to the ifnet list */
2611 ifp_use(ifp
, kIfNetUseCount_MayBeZero
);
2613 /* Lock the list of interfaces */
2614 ifnet_head_lock_exclusive();
2615 ifnet_lock_exclusive(ifp
);
2617 if ((ifp
->if_eflags
& IFEF_REUSE
) == 0 || ifp
->if_index
== 0) {
2618 int idx
= if_next_index();
2621 ifnet_lock_done(ifp
);
2628 ifp
->if_index
= idx
;
2630 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
2632 namelen
= snprintf(workbuf
, sizeof(workbuf
), "%s%d", ifp
->if_name
, ifp
->if_unit
);
2633 #define _offsetof(t, m) ((uintptr_t)((caddr_t)&((t *)0)->m))
2634 masklen
= _offsetof(struct sockaddr_dl
, sdl_data
[0]) + namelen
;
2635 socksize
= masklen
+ ifp
->if_addrlen
;
2636 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(u_int32_t) - 1)))
2637 if ((u_int32_t
)socksize
< sizeof(struct sockaddr_dl
))
2638 socksize
= sizeof(struct sockaddr_dl
);
2639 socksize
= ROUNDUP(socksize
);
2640 ifasize
= sizeof(struct ifaddr
) + 2 * socksize
;
2643 * Allocate a new ifa if we don't have one
2644 * or the old one is too small.
2646 if (ifa
== NULL
|| socksize
> ifa
->ifa_addr
->sa_len
) {
2648 if_detach_ifa(ifp
, ifa
);
2649 ifa
= (struct ifaddr
*)_MALLOC(ifasize
, M_IFADDR
, M_WAITOK
);
2653 struct sockaddr_dl
*sdl
= (struct sockaddr_dl
*)(ifa
+ 1);
2654 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
2655 bzero(ifa
, ifasize
);
2656 ifa
->ifa_debug
|= IFD_ALLOC
;
2657 sdl
->sdl_len
= socksize
;
2658 sdl
->sdl_family
= AF_LINK
;
2659 bcopy(workbuf
, sdl
->sdl_data
, namelen
);
2660 sdl
->sdl_nlen
= namelen
;
2661 sdl
->sdl_index
= ifp
->if_index
;
2662 sdl
->sdl_type
= ifp
->if_type
;
2664 sdl
->sdl_alen
= ll_addr
->sdl_alen
;
2665 if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
)
2666 panic("ifnet_attach - ll_addr->sdl_alen != ifp->if_addrlen");
2667 bcopy(CONST_LLADDR(ll_addr
), LLADDR(sdl
), sdl
->sdl_alen
);
2670 ifa
->ifa_rtrequest
= link_rtrequest
;
2671 ifa
->ifa_addr
= (struct sockaddr
*)sdl
;
2672 sdl
= (struct sockaddr_dl
*)(socksize
+ (caddr_t
)sdl
);
2673 ifa
->ifa_netmask
= (struct sockaddr
*)sdl
;
2674 sdl
->sdl_len
= masklen
;
2675 while (namelen
!= 0)
2676 sdl
->sdl_data
[--namelen
] = 0xff;
2679 TAILQ_INIT(&ifp
->if_addrhead
);
2680 ifa
= ifnet_addrs
[ifp
->if_index
- 1];
2684 * We don't use if_attach_ifa because we want
2685 * this address to be first on the list.
2688 ifa
->ifa_debug
|= IFD_ATTACHED
;
2689 TAILQ_INSERT_HEAD(&ifp
->if_addrhead
, ifa
, ifa_link
);
2692 mac_ifnet_label_associate(ifp
);
2695 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
2696 ifindex2ifnet
[ifp
->if_index
] = ifp
;
2700 * A specific dlil input thread is created per Ethernet/PDP interface.
2701 * pseudo interfaces or other types of interfaces use the main ("loopback") thread.
2702 * If the sysctl "net.link.generic.system.multi_threaded_input" is set to zero, all packets will
2703 * be handled by the main loopback thread, reverting to 10.4.x behaviour.
2707 if (ifp
->if_type
== IFT_ETHER
|| ifp
->if_type
== IFT_PDP
) {
2710 if (dlil_multithreaded_input
> 0) {
2711 ifp
->if_input_thread
= _MALLOC(sizeof(struct dlil_threading_info
), M_NKE
, M_WAITOK
);
2712 if (ifp
->if_input_thread
== NULL
)
2713 panic("ifnet_attach ifp=%p couldn't alloc threading\n", ifp
);
2714 if ((err
= dlil_create_input_thread(ifp
, ifp
->if_input_thread
)) != 0)
2715 panic("ifnet_attach ifp=%p couldn't get a thread. err=%d\n", ifp
, err
);
2717 printf("ifnet_attach: dlil thread for ifp=%p if_index=%d\n", ifp
, ifp
->if_index
);
2721 ifnet_lock_done(ifp
);
2725 * Attach packet filter to this interface, if enaled.
2727 pf_ifnet_hook(ifp
, 1);
2731 #if IFNET_ROUTE_REFCNT
2733 (void) ifnet_set_idle_flags(ifp
, IFRF_IDLE_NOTIFY
,
2736 #endif /* IFNET_ROUTE_REFCNT */
2738 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
2747 struct ifnet_filter
*filter
;
2748 struct ifnet_filter
*filter_next
;
2751 struct ifnet_filter_head fhead
;
2752 struct dlil_threading_info
*inputthread
;
2754 if (ifp
== NULL
) return EINVAL
;
2756 ifnet_lock_exclusive(ifp
);
2758 if ((ifp
->if_eflags
& IFEF_DETACHING
) != 0) {
2759 /* Interface has already been detached */
2760 ifnet_lock_done(ifp
);
2765 * Indicate this interface is being detached.
2767 * This should prevent protocols from attaching
2768 * from this point on. Interface will remain on
2769 * the list until all of the protocols are detached.
2771 ifp
->if_eflags
|= IFEF_DETACHING
;
2772 ifnet_lock_done(ifp
);
2774 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
2776 /* Let BPF know we're detaching */
2779 #if IFNET_ROUTE_REFCNT
2781 * Check to see if this interface has previously triggered
2782 * aggressive protocol draining; if so, decrement the global
2783 * refcnt and clear PR_AGGDRAIN on the route domain if
2784 * there are no more of such an interface around.
2786 if (ifp
->if_want_aggressive_drain
!= 0)
2787 (void) ifnet_set_idle_flags(ifp
, 0, ~0);
2788 #endif /* IFNET_ROUTE_REFCNT */
2790 if ((retval
= dlil_write_begin()) != 0) {
2791 if (retval
== EDEADLK
) {
2794 /* We need to perform a delayed detach */
2795 ifp
->if_delayed_detach
= 1;
2796 dlil_detach_waiting
= 1;
2797 wakeup(&dlil_detach_waiting
);
2804 * Detach this interface from packet filter, if enabled.
2806 pf_ifnet_hook(ifp
, 0);
2809 /* Steal the list of interface filters */
2810 fhead
= ifp
->if_flt_head
;
2811 TAILQ_INIT(&ifp
->if_flt_head
);
2813 /* unuse the interface */
2814 zeroed
= ifp_unuse(ifp
);
2817 * If thread affinity was set for the workloop thread, we will need
2818 * to tear down the affinity and release the extra reference count
2819 * taken at attach time;
2821 if ((inputthread
= ifp
->if_input_thread
) != NULL
) {
2822 if (inputthread
->net_affinity
) {
2825 if (inputthread
== dlil_lo_thread_ptr
)
2826 panic("Thread affinity should not be enabled "
2827 "on the loopback dlil input thread\n");
2829 lck_mtx_lock(inputthread
->input_lck
);
2830 tp
= inputthread
->workloop_thread
;
2831 inputthread
->workloop_thread
= NULL
;
2832 inputthread
->tag
= 0;
2833 inputthread
->net_affinity
= FALSE
;
2834 lck_mtx_unlock(inputthread
->input_lck
);
2836 /* Tear down workloop thread affinity */
2838 (void) dlil_affinity_set(tp
,
2839 THREAD_AFFINITY_TAG_NULL
);
2840 thread_deallocate(tp
);
2843 /* Tear down dlil input thread affinity */
2844 tp
= inputthread
->input_thread
;
2845 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
2846 thread_deallocate(tp
);
2849 /* cleanup ifp dlil input thread, if any */
2850 ifp
->if_input_thread
= NULL
;
2852 if (inputthread
!= dlil_lo_thread_ptr
) {
2854 printf("ifnet_detach: wakeup thread threadinfo: %p "
2855 "input_thread=%p threads: cur=%d max=%d\n",
2856 inputthread
, inputthread
->input_thread
,
2857 dlil_multithreaded_input
, cur_dlil_input_threads
);
2859 lck_mtx_lock(inputthread
->input_lck
);
2861 inputthread
->input_waiting
|= DLIL_INPUT_TERMINATE
;
2862 if ((inputthread
->input_waiting
& DLIL_INPUT_RUNNING
) == 0) {
2863 wakeup((caddr_t
)&inputthread
->input_waiting
);
2865 lck_mtx_unlock(inputthread
->input_lck
);
2868 /* last chance to clean up IPv4 forwarding cached route */
2869 lck_mtx_lock(ifp
->if_fwd_route_lock
);
2870 if (ifp
->if_fwd_route
.ro_rt
!= NULL
) {
2871 rtfree(ifp
->if_fwd_route
.ro_rt
);
2872 ifp
->if_fwd_route
.ro_rt
= NULL
;
2874 lck_mtx_unlock(ifp
->if_fwd_route_lock
);
2877 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
2878 filter_next
= TAILQ_NEXT(filter
, filt_next
);
2879 dlil_detach_filter_internal(filter
, 1);
2883 ifp_use_reached_zero(ifp
);
2891 __unused ifnet_t ifnet_ptr
,
2892 __unused u_long ioctl_code
,
2893 __unused
void *ioctl_arg
)
2899 dlil_recycle_output(
2900 __unused
struct ifnet
*ifnet_ptr
,
2909 __unused ifnet_t ifnet_ptr
)
2914 dlil_recycle_set_bpf_tap(
2915 __unused ifnet_t ifp
,
2916 __unused bpf_tap_mode mode
,
2917 __unused bpf_packet_func callback
)
2919 /* XXX not sure what to do here */
2924 int dlil_if_acquire(
2926 const void *uniqueid
,
2927 size_t uniqueid_len
,
2930 struct ifnet
*ifp1
= NULL
;
2931 struct dlil_ifnet
*dlifp1
= NULL
;
2934 lck_mtx_lock(dlil_ifnet_mutex
);
2935 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
2937 ifp1
= (struct ifnet
*)dlifp1
;
2939 if (ifp1
->if_family
== family
) {
2941 /* same uniqueid and same len or no unique id specified */
2942 if ((uniqueid_len
== dlifp1
->if_uniqueid_len
)
2943 && !bcmp(uniqueid
, dlifp1
->if_uniqueid
, uniqueid_len
)) {
2945 /* check for matching interface in use */
2946 if (ifp1
->if_eflags
& IFEF_INUSE
) {
2954 panic("ifp's lock is gone\n");
2955 ifnet_lock_exclusive(ifp1
);
2956 ifp1
->if_eflags
|= (IFEF_INUSE
| IFEF_REUSE
);
2957 ifnet_lock_done(ifp1
);
2965 /* no interface found, allocate a new one */
2966 MALLOC(dlifp1
, struct dlil_ifnet
*, sizeof(*dlifp1
), M_NKE
, M_WAITOK
);
2972 bzero(dlifp1
, sizeof(*dlifp1
));
2975 MALLOC(dlifp1
->if_uniqueid
, void *, uniqueid_len
, M_NKE
, M_WAITOK
);
2976 if (dlifp1
->if_uniqueid
== 0) {
2977 FREE(dlifp1
, M_NKE
);
2981 bcopy(uniqueid
, dlifp1
->if_uniqueid
, uniqueid_len
);
2982 dlifp1
->if_uniqueid_len
= uniqueid_len
;
2985 ifp1
= (struct ifnet
*)dlifp1
;
2986 ifp1
->if_eflags
|= IFEF_INUSE
;
2987 ifp1
->if_name
= dlifp1
->if_namestorage
;
2989 mac_ifnet_label_init(ifp1
);
2992 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
2997 lck_mtx_unlock(dlil_ifnet_mutex
);
3002 __private_extern__
void
3006 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
3008 /* Interface does not have a lock until it is attached - radar 3713951 */
3010 ifnet_lock_exclusive(ifp
);
3011 ifp
->if_eflags
&= ~IFEF_INUSE
;
3012 ifp
->if_ioctl
= dlil_recycle_ioctl
;
3013 ifp
->if_output
= dlil_recycle_output
;
3014 ifp
->if_free
= dlil_recycle_free
;
3015 ifp
->if_set_bpf_tap
= dlil_recycle_set_bpf_tap
;
3017 strncpy(dlifp
->if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
3018 ifp
->if_name
= dlifp
->if_namestorage
;
3021 * We can either recycle the MAC label here or in dlil_if_acquire().
3022 * It seems logical to do it here but this means that anything that
3023 * still has a handle on ifp will now see it as unlabeled.
3024 * Since the interface is "dead" that may be OK. Revisit later.
3026 mac_ifnet_label_recycle(ifp
);
3029 ifnet_lock_done(ifp
);
3033 __private_extern__
void
3034 dlil_proto_unplumb_all(struct ifnet
*ifp
)
3037 * if_proto_hash[0-3] are for PF_INET, PF_INET6, PF_APPLETALK
3038 * and PF_VLAN, where each bucket contains exactly one entry;
3039 * PF_VLAN does not need an explicit unplumb.
3041 * if_proto_hash[4] is for other protocols; we expect anything
3042 * in this bucket to respond to the DETACHING event (which would
3043 * have happened by now) and do the unplumb then.
3045 (void) proto_unplumb(PF_INET
, ifp
);
3047 (void) proto_unplumb(PF_INET6
, ifp
);
3050 (void) proto_unplumb(PF_APPLETALK
, ifp
);